PyPI - warp-beacon - Versions diffs - 1.2.6__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

warp-beacon 1.2.6py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

etc/warp_beacon/warp_beacon.conf +4 -2
warp_beacon/__version__.py +1 -1
warp_beacon/jobs/__init__.py +2 -0
warp_beacon/jobs/abstract.py +21 -4
warp_beacon/jobs/download_job.py +6 -3
warp_beacon/jobs/types.py +9 -0
warp_beacon/jobs/upload_job.py +1 -0
warp_beacon/mediainfo/abstract.py +11 -1
warp_beacon/mediainfo/silencer.py +46 -0
warp_beacon/mediainfo/video.py +13 -1
warp_beacon/scraper/__init__.py +38 -23
warp_beacon/scraper/abstract.py +26 -0
warp_beacon/scraper/instagram.py +35 -24
warp_beacon/scraper/youtube/abstract.py +105 -0
warp_beacon/scraper/youtube/music.py +12 -108
warp_beacon/scraper/youtube/shorts.py +20 -73
warp_beacon/scraper/youtube/youtube.py +41 -0
warp_beacon/storage/__init__.py +27 -6
warp_beacon/telegram/__init__.py +0 -0
warp_beacon/telegram/bot.py +348 -0
warp_beacon/telegram/handlers.py +163 -0
warp_beacon/telegram/placeholder_message.py +191 -0
warp_beacon/telegram/utils.py +73 -0
warp_beacon/uploader/__init__.py +9 -9
warp_beacon/warp_beacon.py +8 -594
{warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/METADATA +4 -2
warp_beacon-2.0.1.dist-info/RECORD +40 -0
{warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/WHEEL +1 -1
{warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/top_level.txt +9 -0
warp_beacon-1.2.6.dist-info/RECORD +0 -31
{warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/LICENSE +0 -0
{warp_beacon-1.2.6.dist-info → warp_beacon-2.0.1.dist-info}/entry_points.txt +0 -0

warp_beacon/scraper/youtube/music.py CHANGED Viewed

@@ -1,111 +1,15 @@
-import os
-import io
-import pathlib
-import time
-import socket
-import ssl
-from typing import Callable, Union
-import requests
-import urllib
-import http.client
-from PIL import Image
+from warp_beacon.jobs.types import JobType
+from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
 from pytubefix import YouTube
-from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
-from warp_beacon.mediainfo.abstract import MediaInfoAbstract
-from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, extract_exception_message
-from warp_beacon.scraper.abstract import ScraperAbstract
 import logging
-DOWNLOAD_DIR = "/tmp"
-class YoutubeMusicScraper(ScraperAbstract):
-	def __init__(self) -> None:
-		pass
-	def __del__(self) -> None:
-		pass
-	def remove_tmp_files(self) -> None:
-		for i in os.listdir(DOWNLOAD_DIR):
-			if "yt_download_" in i:
-				os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
-	def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
-		ret_val = ''
-		max_retries = int(os.environ.get("YT_MUSIC_MAX_RETRIES", default=6))
-		pause_secs = int(os.environ.get("YT_MUSIC_PAUSE_BEFORE_RETRY", default=3))
-		timeout = int(os.environ.get("YT_MUSIC_TIMEOUT", default=60))
-		timeout_increment = int(os.environ.get("YT_MUSIC_TIMEOUT_INCREMENT", default=60))
-		retries = 0
-		while max_retries >= retries:
-			try:
-				kwargs["timeout"] = timeout
-				ret_val = func(*args, **kwargs)
-				break
-			except MaxRetriesExceeded:
-				# do noting, not interested
-				pass
-			#except http.client.IncompleteRead as e:
-			except (socket.timeout,
-					ssl.SSLError,
-					http.client.IncompleteRead,
-					http.client.HTTPException,
-					requests.RequestException,
-					urllib.error.URLError,
-					urllib.error.HTTPError) as e:
-				if hasattr(e, "code") and int(e.code) == 403:
-					raise Unavailable(extract_exception_message(e))
-				logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
-				logging.info("Your `YT_MUSIC_MAX_RETRIES` values is %d", max_retries)
-				logging.exception(extract_exception_message(e))
-				if max_retries <= retries:
-					self.remove_tmp_files()
-					raise TimeOut(extract_exception_message(e))
-				retries += 1
-				timeout += timeout_increment
-				time.sleep(pause_secs)
-			except (VideoUnavailable, VideoPrivate) as e:
-				raise Unavailable(extract_exception_message(e))
-		return ret_val
-	def rename_local_file(self, filename: str) -> str:
-		if not os.path.exists(filename):
-			raise NameError("No file provided")
-		path_info = pathlib.Path(filename)
-		ext = path_info.suffix
-		old_filename = path_info.stem
-		time_name = str(time.time()).replace('.', '_')
-		new_filename = "%s%s" % (time_name, ext)
-		new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
-		os.rename(filename, new_filepath)
-		return new_filepath
-	def download_thumbnail(self, url: str) -> Union[io.BytesIO, None]:
-		try:
-			reply = requests.get(url, stream=True)
-			if reply.ok and reply.status_code == 200:
-				image = Image.open(io.BytesIO(reply.content))
-				image = MediaInfoAbstract.shrink_image_to_fit(image)
-				io_buf = io.BytesIO()
-				image.save(io_buf, format='JPEG')
-				io_buf.seek(0)
-				return io_buf
-		except Exception as e:
-			logging.error("Failed to download download thumbnail!")
-			logging.exception(e)
-		return None
+class YoutubeMusicScraper(YoutubeAbstract):
+	YT_MAX_RETRIES_DEFAULT = 6
+	YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
+	YT_TIMEOUT_DEFAULT = 2
+	YT_TIMEOUT_INCREMENT_DEFAULT = 60
 	def _download(self, url: str, timeout: int = 0) -> list:
 		res = []
@@ -116,12 +20,12 @@ class YoutubeMusicScraper(ScraperAbstract):
 		stream = yt.streams.get_audio_only()
 		if stream:
 			logging.info("Announced audio file size: '%d'", stream.filesize)
-			if stream.filesize > 5e+7:
-				logging.warning("Downloading size reported by YouTube is over than 50 mb!")
-				raise FileTooBig("YouTube file is larger than 50 mb")
+			if stream.filesize > 2e+9:
+				logging.warning("Downloading size reported by YouTube is over than 2 GB!")
+				raise FileTooBig("YouTube file is larger than 2 GB")
 			logging.info("Operation timeout is '%d'", timeout)
 			local_file = stream.download(
-				output_path=DOWNLOAD_DIR,
+				output_path=self.DOWNLOAD_DIR,
 				max_retries=0,
 				timeout=timeout,
 				skip_existing=False,
@@ -134,7 +38,7 @@ class YoutubeMusicScraper(ScraperAbstract):
 				"performer": yt.author,
 				"thumb": thumbnail,
 				"canonical_name": stream.title,
-				"media_type": "audio"
+				"media_type": JobType.AUDIO
 			})
 		return res

warp_beacon/scraper/youtube/shorts.py CHANGED Viewed

@@ -1,93 +1,40 @@
-import os
-import pathlib
-import time
-import socket
-import ssl
-from typing import Callable, Union
-import requests
-import urllib
-import http.client
+from warp_beacon.jobs.types import JobType
+from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
 from pytubefix import YouTube
-from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
-from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, extract_exception_message
-from warp_beacon.scraper.abstract import ScraperAbstract
 import logging
-class YoutubeShortsScraper(ScraperAbstract):
-	def __init__(self) -> None:
-		pass
-	def __del__(self) -> None:
-		pass
-	def remove_tmp_files(self) -> None:
-		for i in os.listdir(DOWNLOAD_DIR):
-			if "yt_download_" in i:
-				os.unlink("%s/%s" % (DOWNLOAD_DIR, i))
-	def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
-		ret_val = ''
-		max_retries = int(os.environ.get("YT_MAX_RETRIES", default=8))
-		pause_secs = int(os.environ.get("YT_PAUSE_BEFORE_RETRY", default=3))
-		retries = 0
-		while max_retries >= retries:
-			try:
-				ret_val = func(*args, **kwargs)
-				break
-			except MaxRetriesExceeded:
-				# do noting, not interested
-				pass
-			except (socket.timeout, ssl.SSLError, http.client.HTTPException, requests.RequestException, urllib.error.URLError) as e:
-				if hasattr(e, "code") and int(e.code) == 403:
-					raise Unavailable(extract_exception_message(e))
-				logging.warning("Youtube read timeout! Retrying in %d seconds ...", pause_secs)
-				logging.info("Your `YT_MAX_RETRIES` values is %d", max_retries)
-				logging.exception(extract_exception_message(e))
-				if max_retries <= retries:
-					self.remove_tmp_files()
-					raise TimeOut(extract_exception_message(e))
-				retries += 1
-				time.sleep(pause_secs)
-			except (VideoUnavailable, VideoPrivate) as e:
-				raise Unavailable(extract_exception_message(e))
-		return ret_val
-	def rename_local_file(self, filename: str) -> str:
-		if not os.path.exists(filename):
-			raise NameError("No file provided")
-		path_info = pathlib.Path(filename)
-		ext = path_info.suffix
-		old_filename = path_info.stem
-		time_name = str(time.time()).replace('.', '_')
-		new_filename = "%s%s" % (time_name, ext)
-		new_filepath = "%s/%s" % (os.path.dirname(filename), new_filename)
-		os.rename(filename, new_filepath)
-		return new_filepath
+class YoutubeShortsScraper(YoutubeAbstract):
+	YT_MAX_RETRIES_DEFAULT = 8
+	YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
+	YT_TIMEOUT_DEFAULT = 2
+	YT_TIMEOUT_INCREMENT_DEFAULT = 60
-	def _download(self, url: str) -> list:
+	def _download(self, url: str, timeout: int = 0) -> list:
 		res = []
-		timeout = int(os.environ.get("YT_TIMEOUT", default=2))
+		thumbnail = None
 		yt = YouTube(url)
 		stream = yt.streams.get_highest_resolution()
+		if yt and yt.thumbnail_url:
+			logging.debug("Generation thumb for Shorts ...")
+			thumbnail = self.download_thumbnail(yt.thumbnail_url)
 		if stream:
 			local_file = stream.download(
-				output_path="/tmp",
+				output_path=self.DOWNLOAD_DIR,
 				max_retries=0,
 				timeout=timeout,
 				skip_existing=False,
 				filename_prefix="yt_download_"
 			)
 			logging.debug("Temp filename: '%s'", local_file)
-			res.append({"local_media_path": self.rename_local_file(local_file), "media_type": "video"})
+			res.append({
+				"local_media_path": self.rename_local_file(local_file),
+				"performer": yt.author,
+				"thumb": thumbnail,
+				"canonical_name": stream.title,
+				"media_type": JobType.VIDEO
+			})
 		return res

warp_beacon/scraper/youtube/youtube.py ADDED Viewed

@@ -0,0 +1,41 @@
+from warp_beacon.jobs.types import JobType
+from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
+from pytubefix import YouTube
+import logging
+class YoutubeScraper(YoutubeAbstract):
+	YT_MAX_RETRIES_DEFAULT = 8
+	YT_PAUSE_BEFORE_RETRY_DEFAULT = 3
+	YT_TIMEOUT_DEFAULT = 2
+	YT_TIMEOUT_INCREMENT_DEFAULT = 60
+	def _download(self, url: str, timeout: int = 0) -> list:
+		res = []
+		thumbnail = None
+		yt = YouTube(url)
+		if yt and yt.thumbnail_url:
+			thumbnail = self.download_thumbnail(yt.thumbnail_url)
+		stream = yt.streams.get_highest_resolution()
+		if stream:
+			local_file = stream.download(
+				output_path=self.DOWNLOAD_DIR,
+				max_retries=0,
+				timeout=timeout,
+				skip_existing=False,
+				filename_prefix="yt_download_"
+			)
+			logging.debug("Temp filename: '%s'", local_file)
+			res.append({
+				"local_media_path": self.rename_local_file(local_file),
+				"performer": yt.author,
+				"thumb": thumbnail,
+				"canonical_name": stream.title,
+				"media_type": JobType.VIDEO
+			})
+		return res
+	def download(self, url: str) -> list:
+		return self._download_hndlr(self._download, url)

warp_beacon/storage/__init__.py CHANGED Viewed

@@ -1,11 +1,19 @@
 import os
 #from typing import Optional
-import logging
+from enum import Enum
 from urllib.parse import urlparse, parse_qs
 from pymongo import MongoClient
+import logging
+class UrlParseMode(Enum):
+	OTHER = 0
+	YT_MUSIC = 1
+	YT_SHORTS = 2
+	YOUTUBE = 3
 VIDEO_STORAGE_DIR = os.environ.get("VIDEO_STORAGE_DIR", default="/var/warp_beacon/videos")
 class Storage(object):
@@ -28,12 +36,22 @@ class Storage(object):
 	@staticmethod
 	def compute_uniq(url: str) -> str:
-		if "music.youtube.com" in url:
-			qs = parse_qs(urlparse(url).query)
-			yt_vid_id = qs.get('v', None)
+		parse_mode = UrlParseMode.OTHER
+		if "music.youtube.com/" in url:
+			parse_mode = UrlParseMode.YT_MUSIC
+		elif "youtube.com/shorts/" in url:
+			parse_mode = UrlParseMode.YT_SHORTS
+		elif "youtube.com/" in url:
+			parse_mode = UrlParseMode.YOUTUBE
+		if parse_mode is not UrlParseMode.OTHER and parse_mode is not UrlParseMode.YT_SHORTS:
+			purl = urlparse(url)
+			qs = parse_qs(purl.query)
+			yt_vid_id_list = qs.get('v', None)
+			yt_vid_id = yt_vid_id_list.pop() if yt_vid_id_list else ""
 			if yt_vid_id:
-				path = urlparse(url).path.strip('/').replace("watch", "yt_music")
-				return "%s/%s" % (path, yt_vid_id)
+				path = urlparse(url).path.strip('/').replace("watch", ("yt_music" if parse_mode is UrlParseMode.YT_MUSIC else "youtube"))
+				return ("%s/%s" % (path, yt_vid_id)).strip('/')
 			else:
 				raise ValueError("Failed to generate uniq_id for url '%s'", url)
@@ -65,6 +83,9 @@ class Storage(object):
 		uniq_id = self.compute_uniq(media_url)
 		media_ids = []
 		for tg_file_id in tg_file_ids:
+			if not tg_file_id:
+				logging.warning("Passed empty `tg_file_id`! Skipping.")
+				continue
 			if self.db_lookup_id(uniq_id):
 				logging.info("Detected existing uniq_id, skipping storage write operation")
 				continue

warp_beacon/telegram/__init__.py ADDED Viewed

File without changes

warp-beacon 1.2.6__py3-none-any.whl → 2.0.1__py3-none-any.whl

warp-beacon 1.2.6py3-none-any.whl → 2.0.1py3-none-any.whl