warp-beacon 1.2.6__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- etc/warp_beacon/warp_beacon.conf +4 -2
- warp_beacon/__version__.py +1 -1
- warp_beacon/jobs/__init__.py +2 -0
- warp_beacon/jobs/abstract.py +21 -4
- warp_beacon/jobs/download_job.py +6 -3
- warp_beacon/jobs/types.py +9 -0
- warp_beacon/jobs/upload_job.py +1 -0
- warp_beacon/mediainfo/abstract.py +11 -1
- warp_beacon/mediainfo/silencer.py +46 -0
- warp_beacon/mediainfo/video.py +13 -1
- warp_beacon/scraper/__init__.py +38 -23
- warp_beacon/scraper/abstract.py +26 -0
- warp_beacon/scraper/instagram.py +35 -24
- warp_beacon/scraper/youtube/abstract.py +105 -0
- warp_beacon/scraper/youtube/music.py +12 -108
- warp_beacon/scraper/youtube/shorts.py +20 -73
- warp_beacon/scraper/youtube/youtube.py +41 -0
- warp_beacon/storage/__init__.py +27 -6
- warp_beacon/telegram/__init__.py +0 -0
- warp_beacon/telegram/bot.py +348 -0
- warp_beacon/telegram/handlers.py +163 -0
- warp_beacon/telegram/placeholder_message.py +191 -0
- warp_beacon/telegram/utils.py +73 -0
- warp_beacon/uploader/__init__.py +9 -9
- warp_beacon/warp_beacon.py +8 -594
- {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/METADATA +4 -2
- warp_beacon-2.0.1.dist-info/RECORD +40 -0
- {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/WHEEL +1 -1
- {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/top_level.txt +9 -0
- warp_beacon-1.2.6.dist-info/RECORD +0 -31
- {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/LICENSE +0 -0
- {warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -1,111 +1,15 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
import pathlib
|
4
|
-
import time
|
5
|
-
|
6
|
-
import socket
|
7
|
-
import ssl
|
8
|
-
|
9
|
-
from typing import Callable, Union
|
10
|
-
|
11
|
-
import requests
|
12
|
-
import urllib
|
13
|
-
import http.client
|
14
|
-
|
15
|
-
from PIL import Image
|
1
|
+
from warp_beacon.jobs.types import JobType
|
2
|
+
from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
|
16
3
|
|
17
4
|
from pytubefix import YouTube
|
18
|
-
from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
|
19
|
-
|
20
|
-
from warp_beacon.mediainfo.abstract import MediaInfoAbstract
|
21
|
-
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, extract_exception_message
|
22
|
-
from warp_beacon.scraper.abstract import ScraperAbstract
|
23
5
|
|
24
6
|
import logging
|
25
7
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
pass
|
32
|
-
|
33
|
-
def __del__(self) -> None:
|
34
|
-
pass
|
35
|
-
|
36
|
-
def remove_tmp_files(self) -> None:
|
37
|
-
for i in os.listdir(DOWNLOAD_DIR):
|
38
|
-
if "yt_download_" in i:
|
39
|
-
os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
|
40
|
-
|
41
|
-
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
42
|
-
ret_val = ''
|
43
|
-
max_retries = int(os.environ.get("YT_MUSIC_MAX_RETRIES", default=6))
|
44
|
-
pause_secs = int(os.environ.get("YT_MUSIC_PAUSE_BEFORE_RETRY", default=3))
|
45
|
-
timeout = int(os.environ.get("YT_MUSIC_TIMEOUT", default=60))
|
46
|
-
timeout_increment = int(os.environ.get("YT_MUSIC_TIMEOUT_INCREMENT", default=60))
|
47
|
-
retries = 0
|
48
|
-
while max_retries >= retries:
|
49
|
-
try:
|
50
|
-
kwargs["timeout"] = timeout
|
51
|
-
ret_val = func(*args, **kwargs)
|
52
|
-
break
|
53
|
-
except MaxRetriesExceeded:
|
54
|
-
# do noting, not interested
|
55
|
-
pass
|
56
|
-
#except http.client.IncompleteRead as e:
|
57
|
-
except (socket.timeout,
|
58
|
-
ssl.SSLError,
|
59
|
-
http.client.IncompleteRead,
|
60
|
-
http.client.HTTPException,
|
61
|
-
requests.RequestException,
|
62
|
-
urllib.error.URLError,
|
63
|
-
urllib.error.HTTPError) as e:
|
64
|
-
if hasattr(e, "code") and int(e.code) == 403:
|
65
|
-
raise Unavailable(extract_exception_message(e))
|
66
|
-
logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
|
67
|
-
logging.info("Your `YT_MUSIC_MAX_RETRIES` values is %d", max_retries)
|
68
|
-
logging.exception(extract_exception_message(e))
|
69
|
-
if max_retries <= retries:
|
70
|
-
self.remove_tmp_files()
|
71
|
-
raise TimeOut(extract_exception_message(e))
|
72
|
-
retries += 1
|
73
|
-
timeout += timeout_increment
|
74
|
-
time.sleep(pause_secs)
|
75
|
-
except (VideoUnavailable, VideoPrivate) as e:
|
76
|
-
raise Unavailable(extract_exception_message(e))
|
77
|
-
|
78
|
-
return ret_val
|
79
|
-
|
80
|
-
def rename_local_file(self, filename: str) -> str:
|
81
|
-
if not os.path.exists(filename):
|
82
|
-
raise NameError("No file provided")
|
83
|
-
path_info = pathlib.Path(filename)
|
84
|
-
ext = path_info.suffix
|
85
|
-
old_filename = path_info.stem
|
86
|
-
time_name = str(time.time()).replace('.', '_')
|
87
|
-
new_filename = "%s%s" % (time_name, ext)
|
88
|
-
new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
|
89
|
-
|
90
|
-
os.rename(filename, new_filepath)
|
91
|
-
|
92
|
-
return new_filepath
|
93
|
-
|
94
|
-
def download_thumbnail(self, url: str) -> Union[io.BytesIO, None]:
|
95
|
-
try:
|
96
|
-
reply = requests.get(url, stream=True)
|
97
|
-
if reply.ok and reply.status_code == 200:
|
98
|
-
image = Image.open(io.BytesIO(reply.content))
|
99
|
-
image = MediaInfoAbstract.shrink_image_to_fit(image)
|
100
|
-
io_buf = io.BytesIO()
|
101
|
-
image.save(io_buf, format='JPEG')
|
102
|
-
io_buf.seek(0)
|
103
|
-
return io_buf
|
104
|
-
except Exception as e:
|
105
|
-
logging.error("Failed to download download thumbnail!")
|
106
|
-
logging.exception(e)
|
107
|
-
|
108
|
-
return None
|
8
|
+
class YoutubeMusicScraper(YoutubeAbstract):
|
9
|
+
YT_MAX_RETRIES_DEFAULT = 6
|
10
|
+
YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
|
11
|
+
YT_TIMEOUT_DEFAULT = 2
|
12
|
+
YT_TIMEOUT_INCREMENT_DEFAULT = 60
|
109
13
|
|
110
14
|
def _download(self, url: str, timeout: int = 0) -> list:
|
111
15
|
res = []
|
@@ -116,12 +20,12 @@ class YoutubeMusicScraper(ScraperAbstract):
|
|
116
20
|
stream = yt.streams.get_audio_only()
|
117
21
|
if stream:
|
118
22
|
logging.info("Announced audio file size: '%d'", stream.filesize)
|
119
|
-
if stream.filesize >
|
120
|
-
logging.warning("Downloading size reported by YouTube is over than
|
121
|
-
raise FileTooBig("YouTube file is larger than
|
23
|
+
if stream.filesize > 2e+9:
|
24
|
+
logging.warning("Downloading size reported by YouTube is over than 2 GB!")
|
25
|
+
raise FileTooBig("YouTube file is larger than 2 GB")
|
122
26
|
logging.info("Operation timeout is '%d'", timeout)
|
123
27
|
local_file = stream.download(
|
124
|
-
output_path=DOWNLOAD_DIR,
|
28
|
+
output_path=self.DOWNLOAD_DIR,
|
125
29
|
max_retries=0,
|
126
30
|
timeout=timeout,
|
127
31
|
skip_existing=False,
|
@@ -134,7 +38,7 @@ class YoutubeMusicScraper(ScraperAbstract):
|
|
134
38
|
"performer": yt.author,
|
135
39
|
"thumb": thumbnail,
|
136
40
|
"canonical_name": stream.title,
|
137
|
-
"media_type":
|
41
|
+
"media_type": JobType.AUDIO
|
138
42
|
})
|
139
43
|
|
140
44
|
return res
|
@@ -1,93 +1,40 @@
|
|
1
|
-
import
|
2
|
-
import
|
3
|
-
import time
|
4
|
-
|
5
|
-
import socket
|
6
|
-
import ssl
|
7
|
-
|
8
|
-
from typing import Callable, Union
|
9
|
-
|
10
|
-
import requests
|
11
|
-
import urllib
|
12
|
-
import http.client
|
1
|
+
from warp_beacon.jobs.types import JobType
|
2
|
+
from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
|
13
3
|
|
14
4
|
from pytubefix import YouTube
|
15
|
-
from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
|
16
|
-
|
17
|
-
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, extract_exception_message
|
18
|
-
from warp_beacon.scraper.abstract import ScraperAbstract
|
19
5
|
|
20
6
|
import logging
|
21
7
|
|
22
|
-
class YoutubeShortsScraper(
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
pass
|
28
|
-
|
29
|
-
def remove_tmp_files(self) -> None:
|
30
|
-
for i in os.listdir(DOWNLOAD_DIR):
|
31
|
-
if "yt_download_" in i:
|
32
|
-
os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
|
33
|
-
|
34
|
-
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
35
|
-
ret_val = ''
|
36
|
-
max_retries = int(os.environ.get("YT_MAX_RETRIES", default=8))
|
37
|
-
pause_secs = int(os.environ.get("YT_PAUSE_BEFORE_RETRY", default=3))
|
38
|
-
retries = 0
|
39
|
-
while max_retries >= retries:
|
40
|
-
try:
|
41
|
-
ret_val = func(*args, **kwargs)
|
42
|
-
break
|
43
|
-
except MaxRetriesExceeded:
|
44
|
-
# do noting, not interested
|
45
|
-
pass
|
46
|
-
except (socket.timeout, ssl.SSLError, http.client.HTTPException, requests.RequestException, urllib.error.URLError) as e:
|
47
|
-
if hasattr(e, "code") and int(e.code) == 403:
|
48
|
-
raise Unavailable(extract_exception_message(e))
|
49
|
-
logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
|
50
|
-
logging.info("Your `YT_MAX_RETRIES` values is %d", max_retries)
|
51
|
-
logging.exception(extract_exception_message(e))
|
52
|
-
if max_retries <= retries:
|
53
|
-
self.remove_tmp_files()
|
54
|
-
raise TimeOut(extract_exception_message(e))
|
55
|
-
retries += 1
|
56
|
-
time.sleep(pause_secs)
|
57
|
-
except (VideoUnavailable, VideoPrivate) as e:
|
58
|
-
raise Unavailable(extract_exception_message(e))
|
59
|
-
|
60
|
-
return ret_val
|
61
|
-
|
62
|
-
def rename_local_file(self, filename: str) -> str:
|
63
|
-
if not os.path.exists(filename):
|
64
|
-
raise NameError("No file provided")
|
65
|
-
path_info = pathlib.Path(filename)
|
66
|
-
ext = path_info.suffix
|
67
|
-
old_filename = path_info.stem
|
68
|
-
time_name = str(time.time()).replace('.', '_')
|
69
|
-
new_filename = "%s%s" % (time_name, ext)
|
70
|
-
new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
|
71
|
-
|
72
|
-
os.rename(filename, new_filepath)
|
73
|
-
|
74
|
-
return new_filepath
|
8
|
+
class YoutubeShortsScraper(YoutubeAbstract):
|
9
|
+
YT_MAX_RETRIES_DEFAULT = 8
|
10
|
+
YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
|
11
|
+
YT_TIMEOUT_DEFAULT = 2
|
12
|
+
YT_TIMEOUT_INCREMENT_DEFAULT = 60
|
75
13
|
|
76
|
-
def _download(self, url: str) -> list:
|
14
|
+
def _download(self, url: str, timeout: int = 0) -> list:
|
77
15
|
res = []
|
78
|
-
|
16
|
+
thumbnail = None
|
79
17
|
yt = YouTube(url)
|
80
18
|
stream = yt.streams.get_highest_resolution()
|
19
|
+
if yt and yt.thumbnail_url:
|
20
|
+
logging.debug("Generation thumb for Shorts ...")
|
21
|
+
thumbnail = self.download_thumbnail(yt.thumbnail_url)
|
81
22
|
if stream:
|
82
23
|
local_file = stream.download(
|
83
|
-
output_path=
|
24
|
+
output_path=self.DOWNLOAD_DIR,
|
84
25
|
max_retries=0,
|
85
26
|
timeout=timeout,
|
86
27
|
skip_existing=False,
|
87
28
|
filename_prefix="yt_download_"
|
88
29
|
)
|
89
30
|
logging.debug("Temp filename: '%s'", local_file)
|
90
|
-
res.append({
|
31
|
+
res.append({
|
32
|
+
"local_media_path": self.rename_local_file(local_file),
|
33
|
+
"performer": yt.author,
|
34
|
+
"thumb": thumbnail,
|
35
|
+
"canonical_name": stream.title,
|
36
|
+
"media_type": JobType.VIDEO
|
37
|
+
})
|
91
38
|
|
92
39
|
return res
|
93
40
|
|
@@ -0,0 +1,41 @@
|
|
1
|
+
from warp_beacon.jobs.types import JobType
|
2
|
+
from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
|
3
|
+
|
4
|
+
from pytubefix import YouTube
|
5
|
+
|
6
|
+
import logging
|
7
|
+
|
8
|
+
class YoutubeScraper(YoutubeAbstract):
|
9
|
+
YT_MAX_RETRIES_DEFAULT = 8
|
10
|
+
YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
|
11
|
+
YT_TIMEOUT_DEFAULT = 2
|
12
|
+
YT_TIMEOUT_INCREMENT_DEFAULT = 60
|
13
|
+
|
14
|
+
def _download(self, url: str, timeout: int = 0) -> list:
|
15
|
+
res = []
|
16
|
+
thumbnail = None
|
17
|
+
yt = YouTube(url)
|
18
|
+
if yt and yt.thumbnail_url:
|
19
|
+
thumbnail = self.download_thumbnail(yt.thumbnail_url)
|
20
|
+
stream = yt.streams.get_highest_resolution()
|
21
|
+
if stream:
|
22
|
+
local_file = stream.download(
|
23
|
+
output_path=self.DOWNLOAD_DIR,
|
24
|
+
max_retries=0,
|
25
|
+
timeout=timeout,
|
26
|
+
skip_existing=False,
|
27
|
+
filename_prefix="yt_download_"
|
28
|
+
)
|
29
|
+
logging.debug("Temp filename: '%s'", local_file)
|
30
|
+
res.append({
|
31
|
+
"local_media_path": self.rename_local_file(local_file),
|
32
|
+
"performer": yt.author,
|
33
|
+
"thumb": thumbnail,
|
34
|
+
"canonical_name": stream.title,
|
35
|
+
"media_type": JobType.VIDEO
|
36
|
+
})
|
37
|
+
|
38
|
+
return res
|
39
|
+
|
40
|
+
def download(self, url: str) -> list:
|
41
|
+
return self._download_hndlr(self._download, url)
|
warp_beacon/storage/__init__.py
CHANGED
@@ -1,11 +1,19 @@
|
|
1
1
|
import os
|
2
2
|
#from typing import Optional
|
3
|
-
import
|
3
|
+
from enum import Enum
|
4
4
|
|
5
5
|
from urllib.parse import urlparse, parse_qs
|
6
6
|
|
7
7
|
from pymongo import MongoClient
|
8
8
|
|
9
|
+
import logging
|
10
|
+
|
11
|
+
class UrlParseMode(Enum):
|
12
|
+
OTHER = 0
|
13
|
+
YT_MUSIC = 1
|
14
|
+
YT_SHORTS = 2
|
15
|
+
YOUTUBE = 3
|
16
|
+
|
9
17
|
VIDEO_STORAGE_DIR = os.environ.get("VIDEO_STORAGE_DIR", default="/var/warp_beacon/videos")
|
10
18
|
|
11
19
|
class Storage(object):
|
@@ -28,12 +36,22 @@ class Storage(object):
|
|
28
36
|
|
29
37
|
@staticmethod
|
30
38
|
def compute_uniq(url: str) -> str:
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
parse_mode = UrlParseMode.OTHER
|
40
|
+
if "music.youtube.com/" in url:
|
41
|
+
parse_mode = UrlParseMode.YT_MUSIC
|
42
|
+
elif "youtube.com/shorts/" in url:
|
43
|
+
parse_mode = UrlParseMode.YT_SHORTS
|
44
|
+
elif "youtube.com/" in url:
|
45
|
+
parse_mode = UrlParseMode.YOUTUBE
|
46
|
+
|
47
|
+
if parse_mode is not UrlParseMode.OTHER and parse_mode is not UrlParseMode.YT_SHORTS:
|
48
|
+
purl = urlparse(url)
|
49
|
+
qs = parse_qs(purl.query)
|
50
|
+
yt_vid_id_list = qs.get('v', None)
|
51
|
+
yt_vid_id = yt_vid_id_list.pop() if yt_vid_id_list else ""
|
34
52
|
if yt_vid_id:
|
35
|
-
path = urlparse(url).path.strip('/').replace("watch", "yt_music")
|
36
|
-
return "%s/%s" % (path, yt_vid_id)
|
53
|
+
path = urlparse(url).path.strip('/').replace("watch", ("yt_music" if parse_mode is UrlParseMode.YT_MUSIC else "youtube"))
|
54
|
+
return ("%s/%s" % (path, yt_vid_id)).strip('/')
|
37
55
|
else:
|
38
56
|
raise ValueError("Failed to generate uniq_id for url '%s'", url)
|
39
57
|
|
@@ -65,6 +83,9 @@ class Storage(object):
|
|
65
83
|
uniq_id = self.compute_uniq(media_url)
|
66
84
|
media_ids = []
|
67
85
|
for tg_file_id in tg_file_ids:
|
86
|
+
if not tg_file_id:
|
87
|
+
logging.warning("Passed empty `tg_file_id`! Skipping.")
|
88
|
+
continue
|
68
89
|
if self.db_lookup_id(uniq_id):
|
69
90
|
logging.info("Detected existing uniq_id, skipping storage write operation")
|
70
91
|
continue
|
File without changes
|