warp-beacon 1.2.6__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. etc/warp_beacon/warp_beacon.conf +4 -2
  2. warp_beacon/__version__.py +1 -1
  3. warp_beacon/jobs/__init__.py +2 -0
  4. warp_beacon/jobs/abstract.py +21 -4
  5. warp_beacon/jobs/download_job.py +6 -3
  6. warp_beacon/jobs/types.py +9 -0
  7. warp_beacon/jobs/upload_job.py +1 -0
  8. warp_beacon/mediainfo/abstract.py +11 -1
  9. warp_beacon/mediainfo/silencer.py +46 -0
  10. warp_beacon/mediainfo/video.py +13 -1
  11. warp_beacon/scraper/__init__.py +38 -23
  12. warp_beacon/scraper/abstract.py +26 -0
  13. warp_beacon/scraper/instagram.py +35 -24
  14. warp_beacon/scraper/youtube/abstract.py +105 -0
  15. warp_beacon/scraper/youtube/music.py +12 -108
  16. warp_beacon/scraper/youtube/shorts.py +20 -73
  17. warp_beacon/scraper/youtube/youtube.py +41 -0
  18. warp_beacon/storage/__init__.py +27 -6
  19. warp_beacon/telegram/__init__.py +0 -0
  20. warp_beacon/telegram/bot.py +348 -0
  21. warp_beacon/telegram/handlers.py +163 -0
  22. warp_beacon/telegram/placeholder_message.py +191 -0
  23. warp_beacon/telegram/utils.py +73 -0
  24. warp_beacon/uploader/__init__.py +9 -9
  25. warp_beacon/warp_beacon.py +8 -594
  26. {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/METADATA +4 -2
  27. warp_beacon-2.0.1.dist-info/RECORD +40 -0
  28. {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/WHEEL +1 -1
  29. {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/top_level.txt +9 -0
  30. warp_beacon-1.2.6.dist-info/RECORD +0 -31
  31. {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/LICENSE +0 -0
  32. {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -1,111 +1,15 @@
1
- import os
2
- import io
3
- import pathlib
4
- import time
5
-
6
- import socket
7
- import ssl
8
-
9
- from typing import Callable, Union
10
-
11
- import requests
12
- import urllib
13
- import http.client
14
-
15
- from PIL import Image
1
+ from warp_beacon.jobs.types import JobType
2
+ from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
16
3
 
17
4
  from pytubefix import YouTube
18
- from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
19
-
20
- from warp_beacon.mediainfo.abstract import MediaInfoAbstract
21
- from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, extract_exception_message
22
- from warp_beacon.scraper.abstract import ScraperAbstract
23
5
 
24
6
  import logging
25
7
 
26
- DOWNLOAD_DIR = "/tmp"
27
-
28
- class YoutubeMusicScraper(ScraperAbstract):
29
-
30
- def __init__(self) -> None:
31
- pass
32
-
33
- def __del__(self) -> None:
34
- pass
35
-
36
- def remove_tmp_files(self) -> None:
37
- for i in os.listdir(DOWNLOAD_DIR):
38
- if "yt_download_" in i:
39
- os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
40
-
41
- def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
42
- ret_val = ''
43
- max_retries = int(os.environ.get("YT_MUSIC_MAX_RETRIES", default=6))
44
- pause_secs = int(os.environ.get("YT_MUSIC_PAUSE_BEFORE_RETRY", default=3))
45
- timeout = int(os.environ.get("YT_MUSIC_TIMEOUT", default=60))
46
- timeout_increment = int(os.environ.get("YT_MUSIC_TIMEOUT_INCREMENT", default=60))
47
- retries = 0
48
- while max_retries >= retries:
49
- try:
50
- kwargs["timeout"] = timeout
51
- ret_val = func(*args, **kwargs)
52
- break
53
- except MaxRetriesExceeded:
54
- # do noting, not interested
55
- pass
56
- #except http.client.IncompleteRead as e:
57
- except (socket.timeout,
58
- ssl.SSLError,
59
- http.client.IncompleteRead,
60
- http.client.HTTPException,
61
- requests.RequestException,
62
- urllib.error.URLError,
63
- urllib.error.HTTPError) as e:
64
- if hasattr(e, "code") and int(e.code) == 403:
65
- raise Unavailable(extract_exception_message(e))
66
- logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
67
- logging.info("Your `YT_MUSIC_MAX_RETRIES` values is %d", max_retries)
68
- logging.exception(extract_exception_message(e))
69
- if max_retries <= retries:
70
- self.remove_tmp_files()
71
- raise TimeOut(extract_exception_message(e))
72
- retries += 1
73
- timeout += timeout_increment
74
- time.sleep(pause_secs)
75
- except (VideoUnavailable, VideoPrivate) as e:
76
- raise Unavailable(extract_exception_message(e))
77
-
78
- return ret_val
79
-
80
- def rename_local_file(self, filename: str) -> str:
81
- if not os.path.exists(filename):
82
- raise NameError("No file provided")
83
- path_info = pathlib.Path(filename)
84
- ext = path_info.suffix
85
- old_filename = path_info.stem
86
- time_name = str(time.time()).replace('.', '_')
87
- new_filename = "%s%s" % (time_name, ext)
88
- new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
89
-
90
- os.rename(filename, new_filepath)
91
-
92
- return new_filepath
93
-
94
- def download_thumbnail(self, url: str) -> Union[io.BytesIO, None]:
95
- try:
96
- reply = requests.get(url, stream=True)
97
- if reply.ok and reply.status_code == 200:
98
- image = Image.open(io.BytesIO(reply.content))
99
- image = MediaInfoAbstract.shrink_image_to_fit(image)
100
- io_buf = io.BytesIO()
101
- image.save(io_buf, format='JPEG')
102
- io_buf.seek(0)
103
- return io_buf
104
- except Exception as e:
105
- logging.error("Failed to download download thumbnail!")
106
- logging.exception(e)
107
-
108
- return None
8
+ class YoutubeMusicScraper(YoutubeAbstract):
9
+ YT_MAX_RETRIES_DEFAULT = 6
10
+ YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
11
+ YT_TIMEOUT_DEFAULT = 2
12
+ YT_TIMEOUT_INCREMENT_DEFAULT = 60
109
13
 
110
14
  def _download(self, url: str, timeout: int = 0) -> list:
111
15
  res = []
@@ -116,12 +20,12 @@ class YoutubeMusicScraper(ScraperAbstract):
116
20
  stream = yt.streams.get_audio_only()
117
21
  if stream:
118
22
  logging.info("Announced audio file size: '%d'", stream.filesize)
119
- if stream.filesize > 5e+7:
120
- logging.warning("Downloading size reported by YouTube is over than 50 mb!")
121
- raise FileTooBig("YouTube file is larger than 50 mb")
23
+ if stream.filesize > 2e+9:
24
+ logging.warning("Downloading size reported by YouTube is over than 2 GB!")
25
+ raise FileTooBig("YouTube file is larger than 2 GB")
122
26
  logging.info("Operation timeout is '%d'", timeout)
123
27
  local_file = stream.download(
124
- output_path=DOWNLOAD_DIR,
28
+ output_path=self.DOWNLOAD_DIR,
125
29
  max_retries=0,
126
30
  timeout=timeout,
127
31
  skip_existing=False,
@@ -134,7 +38,7 @@ class YoutubeMusicScraper(ScraperAbstract):
134
38
  "performer": yt.author,
135
39
  "thumb": thumbnail,
136
40
  "canonical_name": stream.title,
137
- "media_type": "audio"
41
+ "media_type": JobType.AUDIO
138
42
  })
139
43
 
140
44
  return res
@@ -1,93 +1,40 @@
1
- import os
2
- import pathlib
3
- import time
4
-
5
- import socket
6
- import ssl
7
-
8
- from typing import Callable, Union
9
-
10
- import requests
11
- import urllib
12
- import http.client
1
+ from warp_beacon.jobs.types import JobType
2
+ from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
13
3
 
14
4
  from pytubefix import YouTube
15
- from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
16
-
17
- from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, extract_exception_message
18
- from warp_beacon.scraper.abstract import ScraperAbstract
19
5
 
20
6
  import logging
21
7
 
22
- class YoutubeShortsScraper(ScraperAbstract):
23
- def __init__(self) -> None:
24
- pass
25
-
26
- def __del__(self) -> None:
27
- pass
28
-
29
- def remove_tmp_files(self) -> None:
30
- for i in os.listdir(DOWNLOAD_DIR):
31
- if "yt_download_" in i:
32
- os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
33
-
34
- def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
35
- ret_val = ''
36
- max_retries = int(os.environ.get("YT_MAX_RETRIES", default=8))
37
- pause_secs = int(os.environ.get("YT_PAUSE_BEFORE_RETRY", default=3))
38
- retries = 0
39
- while max_retries >= retries:
40
- try:
41
- ret_val = func(*args, **kwargs)
42
- break
43
- except MaxRetriesExceeded:
44
- # do noting, not interested
45
- pass
46
- except (socket.timeout, ssl.SSLError, http.client.HTTPException, requests.RequestException, urllib.error.URLError) as e:
47
- if hasattr(e, "code") and int(e.code) == 403:
48
- raise Unavailable(extract_exception_message(e))
49
- logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
50
- logging.info("Your `YT_MAX_RETRIES` values is %d", max_retries)
51
- logging.exception(extract_exception_message(e))
52
- if max_retries <= retries:
53
- self.remove_tmp_files()
54
- raise TimeOut(extract_exception_message(e))
55
- retries += 1
56
- time.sleep(pause_secs)
57
- except (VideoUnavailable, VideoPrivate) as e:
58
- raise Unavailable(extract_exception_message(e))
59
-
60
- return ret_val
61
-
62
- def rename_local_file(self, filename: str) -> str:
63
- if not os.path.exists(filename):
64
- raise NameError("No file provided")
65
- path_info = pathlib.Path(filename)
66
- ext = path_info.suffix
67
- old_filename = path_info.stem
68
- time_name = str(time.time()).replace('.', '_')
69
- new_filename = "%s%s" % (time_name, ext)
70
- new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
71
-
72
- os.rename(filename, new_filepath)
73
-
74
- return new_filepath
8
+ class YoutubeShortsScraper(YoutubeAbstract):
9
+ YT_MAX_RETRIES_DEFAULT = 8
10
+ YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
11
+ YT_TIMEOUT_DEFAULT = 2
12
+ YT_TIMEOUT_INCREMENT_DEFAULT = 60
75
13
 
76
- def _download(self, url: str) -> list:
14
+ def _download(self, url: str, timeout: int = 0) -> list:
77
15
  res = []
78
- timeout = int(os.environ.get("YT_TIMEOUT", default=2))
16
+ thumbnail = None
79
17
  yt = YouTube(url)
80
18
  stream = yt.streams.get_highest_resolution()
19
+ if yt and yt.thumbnail_url:
20
+ logging.debug("Generation thumb for Shorts ...")
21
+ thumbnail = self.download_thumbnail(yt.thumbnail_url)
81
22
  if stream:
82
23
  local_file = stream.download(
83
- output_path="/tmp",
24
+ output_path=self.DOWNLOAD_DIR,
84
25
  max_retries=0,
85
26
  timeout=timeout,
86
27
  skip_existing=False,
87
28
  filename_prefix="yt_download_"
88
29
  )
89
30
  logging.debug("Temp filename: '%s'", local_file)
90
- res.append({"local_media_path": self.rename_local_file(local_file), "media_type": "video"})
31
+ res.append({
32
+ "local_media_path": self.rename_local_file(local_file),
33
+ "performer": yt.author,
34
+ "thumb": thumbnail,
35
+ "canonical_name": stream.title,
36
+ "media_type": JobType.VIDEO
37
+ })
91
38
 
92
39
  return res
93
40
 
@@ -0,0 +1,41 @@
1
+ from warp_beacon.jobs.types import JobType
2
+ from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
3
+
4
+ from pytubefix import YouTube
5
+
6
+ import logging
7
+
8
+ class YoutubeScraper(YoutubeAbstract):
9
+ YT_MAX_RETRIES_DEFAULT = 8
10
+ YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
11
+ YT_TIMEOUT_DEFAULT = 2
12
+ YT_TIMEOUT_INCREMENT_DEFAULT = 60
13
+
14
+ def _download(self, url: str, timeout: int = 0) -> list:
15
+ res = []
16
+ thumbnail = None
17
+ yt = YouTube(url)
18
+ if yt and yt.thumbnail_url:
19
+ thumbnail = self.download_thumbnail(yt.thumbnail_url)
20
+ stream = yt.streams.get_highest_resolution()
21
+ if stream:
22
+ local_file = stream.download(
23
+ output_path=self.DOWNLOAD_DIR,
24
+ max_retries=0,
25
+ timeout=timeout,
26
+ skip_existing=False,
27
+ filename_prefix="yt_download_"
28
+ )
29
+ logging.debug("Temp filename: '%s'", local_file)
30
+ res.append({
31
+ "local_media_path": self.rename_local_file(local_file),
32
+ "performer": yt.author,
33
+ "thumb": thumbnail,
34
+ "canonical_name": stream.title,
35
+ "media_type": JobType.VIDEO
36
+ })
37
+
38
+ return res
39
+
40
+ def download(self, url: str) -> list:
41
+ return self._download_hndlr(self._download, url)
@@ -1,11 +1,19 @@
1
1
  import os
2
2
  #from typing import Optional
3
- import logging
3
+ from enum import Enum
4
4
 
5
5
  from urllib.parse import urlparse, parse_qs
6
6
 
7
7
  from pymongo import MongoClient
8
8
 
9
+ import logging
10
+
11
+ class UrlParseMode(Enum):
12
+ OTHER = 0
13
+ YT_MUSIC = 1
14
+ YT_SHORTS = 2
15
+ YOUTUBE = 3
16
+
9
17
  VIDEO_STORAGE_DIR = os.environ.get("VIDEO_STORAGE_DIR", default="/var/warp_beacon/videos")
10
18
 
11
19
  class Storage(object):
@@ -28,12 +36,22 @@ class Storage(object):
28
36
 
29
37
  @staticmethod
30
38
  def compute_uniq(url: str) -> str:
31
- if "music.youtube.com" in url:
32
- qs = parse_qs(urlparse(url).query)
33
- yt_vid_id = qs.get('v', None)
39
+ parse_mode = UrlParseMode.OTHER
40
+ if "music.youtube.com/" in url:
41
+ parse_mode = UrlParseMode.YT_MUSIC
42
+ elif "youtube.com/shorts/" in url:
43
+ parse_mode = UrlParseMode.YT_SHORTS
44
+ elif "youtube.com/" in url:
45
+ parse_mode = UrlParseMode.YOUTUBE
46
+
47
+ if parse_mode is not UrlParseMode.OTHER and parse_mode is not UrlParseMode.YT_SHORTS:
48
+ purl = urlparse(url)
49
+ qs = parse_qs(purl.query)
50
+ yt_vid_id_list = qs.get('v', None)
51
+ yt_vid_id = yt_vid_id_list.pop() if yt_vid_id_list else ""
34
52
  if yt_vid_id:
35
- path = urlparse(url).path.strip('/').replace("watch", "yt_music")
36
- return "%s/%s" % (path, yt_vid_id)
53
+ path = urlparse(url).path.strip('/').replace("watch", ("yt_music" if parse_mode is UrlParseMode.YT_MUSIC else "youtube"))
54
+ return ("%s/%s" % (path, yt_vid_id)).strip('/')
37
55
  else:
38
56
  raise ValueError("Failed to generate uniq_id for url '%s'", url)
39
57
 
@@ -65,6 +83,9 @@ class Storage(object):
65
83
  uniq_id = self.compute_uniq(media_url)
66
84
  media_ids = []
67
85
  for tg_file_id in tg_file_ids:
86
+ if not tg_file_id:
87
+ logging.warning("Passed empty `tg_file_id`! Skipping.")
88
+ continue
68
89
  if self.db_lookup_id(uniq_id):
69
90
  logging.info("Detected existing uniq_id, skipping storage write operation")
70
91
  continue
File without changes