warp-beacon 1.2.6__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-1.2.6/warp_beacon.egg-info → warp_beacon-2.0.0}/PKG-INFO +4 -2
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/warp_beacon.conf +4 -2
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/pyproject.toml +3 -1
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/setup.py +9 -0
- warp_beacon-2.0.0/warp_beacon/__version__.py +2 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/__init__.py +2 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/abstract.py +21 -4
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/download_job.py +6 -3
- warp_beacon-2.0.0/warp_beacon/jobs/types.py +9 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/abstract.py +11 -1
- warp_beacon-2.0.0/warp_beacon/mediainfo/silencer.py +46 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/video.py +13 -1
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/__init__.py +38 -23
- warp_beacon-2.0.0/warp_beacon/scraper/abstract.py +44 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/instagram.py +35 -24
- warp_beacon-1.2.6/warp_beacon/scraper/youtube/music.py → warp_beacon-2.0.0/warp_beacon/scraper/youtube/abstract.py +46 -84
- warp_beacon-2.0.0/warp_beacon/scraper/youtube/music.py +47 -0
- warp_beacon-2.0.0/warp_beacon/scraper/youtube/shorts.py +42 -0
- warp_beacon-2.0.0/warp_beacon/scraper/youtube/youtube.py +41 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/storage/__init__.py +27 -6
- warp_beacon-2.0.0/warp_beacon/telegram/__init__.py +0 -0
- warp_beacon-2.0.0/warp_beacon/telegram/bot.py +318 -0
- warp_beacon-2.0.0/warp_beacon/telegram/handlers.py +156 -0
- warp_beacon-2.0.0/warp_beacon/telegram/placeholder_message.py +191 -0
- warp_beacon-2.0.0/warp_beacon/telegram/utils.py +73 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/uploader/__init__.py +9 -9
- warp_beacon-2.0.0/warp_beacon/warp_beacon.py +14 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0/warp_beacon.egg-info}/PKG-INFO +4 -2
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/SOURCES.txt +9 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/requires.txt +3 -1
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/top_level.txt +9 -0
- warp_beacon-1.2.6/warp_beacon/__version__.py +0 -2
- warp_beacon-1.2.6/warp_beacon/scraper/abstract.py +0 -18
- warp_beacon-1.2.6/warp_beacon/scraper/youtube/shorts.py +0 -95
- warp_beacon-1.2.6/warp_beacon/warp_beacon.py +0 -600
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/LICENSE +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/MANIFEST.in +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/README.md +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/assets/placeholder.gif +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/.gitignore +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/warp_beacon.service +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/setup.cfg +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/__init__.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/exceptions.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warp_beacon
|
3
|
-
Version:
|
3
|
+
Version: 2.0.0
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -225,7 +225,9 @@ Requires-Python: >=3.10
|
|
225
225
|
Description-Content-Type: text/markdown
|
226
226
|
License-File: LICENSE
|
227
227
|
Requires-Dist: ffmpeg-python
|
228
|
-
Requires-Dist:
|
228
|
+
Requires-Dist: uvloop
|
229
|
+
Requires-Dist: tgcrypto
|
230
|
+
Requires-Dist: pyrogram
|
229
231
|
Requires-Dist: pytubefix
|
230
232
|
Requires-Dist: av
|
231
233
|
Requires-Dist: urlextract
|
@@ -1,11 +1,13 @@
|
|
1
1
|
TG_TOKEN=""
|
2
|
+
TG_API_ID=""
|
3
|
+
TG_API_HASH=""
|
4
|
+
TG_BOT_NAME=""
|
2
5
|
INSTAGRAM_LOGIN=""
|
3
6
|
INSTAGRAM_PASSWORD=""
|
4
|
-
INSTAGRAM_VERIFICATION_CODE="
|
7
|
+
INSTAGRAM_VERIFICATION_CODE=""
|
5
8
|
MONGODB_HOST="mongodb"
|
6
9
|
MONGODB_PORT="27017"
|
7
10
|
MONGODB_USER="root"
|
8
11
|
MONGODB_PASSWORD="changeme"
|
9
12
|
VIDEO_STORAGE_DIR="/var/warp_beacon/videos"
|
10
|
-
WORKERS_POOL_SIZE=3
|
11
13
|
ENABLE_DONATES=true
|
@@ -46,6 +46,7 @@ setup(
|
|
46
46
|
url="https://github.com/sb0y/warp_beacon",
|
47
47
|
packages=[
|
48
48
|
'warp_beacon',
|
49
|
+
'warp_beacon/telegram',
|
49
50
|
'warp_beacon/uploader',
|
50
51
|
'warp_beacon/storage',
|
51
52
|
'warp_beacon/scraper',
|
@@ -57,16 +58,24 @@ setup(
|
|
57
58
|
py_modules=[
|
58
59
|
"warp_beacon/__version__",
|
59
60
|
"warp_beacon/warp_beacon",
|
61
|
+
"warp_beacon/telegram/bot",
|
62
|
+
"warp_beacon/telegram/placeholder_message",
|
63
|
+
"warp_beacon/telegram/handlers",
|
64
|
+
"warp_beacon/telegram/utils",
|
60
65
|
"warp_beacon/jobs/abstract",
|
61
66
|
"warp_beacon/jobs/download_job",
|
62
67
|
"warp_beacon/jobs/upload_job",
|
63
68
|
"warp_beacon/mediainfo/abstract",
|
64
69
|
"warp_beacon/mediainfo/video",
|
65
70
|
"warp_beacon/mediainfo/audio",
|
71
|
+
"warp_beacon/mediainfo/silencer",
|
66
72
|
"warp_beacon/compress/video",
|
67
73
|
"warp_beacon/scraper/abstract",
|
68
74
|
"warp_beacon/scraper/exceptions",
|
75
|
+
"warp_beacon/scraper/types",
|
69
76
|
"warp_beacon/scraper/instagram",
|
77
|
+
"warp_beacon/scraper/youtube/abstract",
|
78
|
+
"warp_beacon/scraper/youtube/youtube",
|
70
79
|
"warp_beacon/scraper/youtube/shorts",
|
71
80
|
"warp_beacon/scraper/youtube/music"
|
72
81
|
],
|
@@ -1,13 +1,16 @@
|
|
1
|
+
import os
|
1
2
|
from abc import ABC, abstractmethod
|
2
3
|
from typing import TypedDict
|
3
4
|
from typing_extensions import Unpack
|
4
5
|
import uuid
|
5
6
|
|
6
7
|
from warp_beacon.jobs import Origin
|
8
|
+
from warp_beacon.jobs.types import JobType
|
7
9
|
|
8
10
|
class JobSettings(TypedDict):
|
9
11
|
job_id: uuid.UUID
|
10
12
|
message_id: int
|
13
|
+
chat_id: int
|
11
14
|
placeholder_message_id: int
|
12
15
|
local_media_path: str
|
13
16
|
local_compressed_media_path: str
|
@@ -16,7 +19,7 @@ class JobSettings(TypedDict):
|
|
16
19
|
uniq_id: str
|
17
20
|
tg_file_id: str
|
18
21
|
in_process: bool
|
19
|
-
media_type:
|
22
|
+
media_type: JobType
|
20
23
|
job_failed: bool
|
21
24
|
job_failed_msg: str
|
22
25
|
job_warning: bool
|
@@ -30,6 +33,7 @@ class JobSettings(TypedDict):
|
|
30
33
|
class AbstractJob(ABC):
|
31
34
|
job_id: uuid.UUID = None
|
32
35
|
message_id: int = 0
|
36
|
+
chat_id: int = 0
|
33
37
|
placeholder_message_id: int = 0
|
34
38
|
local_media_path: str = ""
|
35
39
|
local_compressed_media_path: str = ""
|
@@ -37,7 +41,7 @@ class AbstractJob(ABC):
|
|
37
41
|
url: str = ""
|
38
42
|
uniq_id: str = ""
|
39
43
|
tg_file_id: str = ""
|
40
|
-
media_type:
|
44
|
+
media_type: JobType = JobType.VIDEO
|
41
45
|
in_process: bool = False
|
42
46
|
job_warning: bool = False
|
43
47
|
job_warning_message: str = ""
|
@@ -64,7 +68,7 @@ class AbstractJob(ABC):
|
|
64
68
|
return str(self.to_dict())
|
65
69
|
|
66
70
|
def is_empty(self) -> bool:
|
67
|
-
if self.media_type ==
|
71
|
+
if self.media_type == JobType.COLLECTION:
|
68
72
|
if not self.media_collection:
|
69
73
|
return True
|
70
74
|
elif not self.local_media_path:
|
@@ -79,4 +83,17 @@ class AbstractJob(ABC):
|
|
79
83
|
if not callable(value):
|
80
84
|
d[key] = value
|
81
85
|
|
82
|
-
return d
|
86
|
+
return d
|
87
|
+
|
88
|
+
def remove_files(self) -> bool:
|
89
|
+
if self.media_type == JobType.COLLECTION:
|
90
|
+
for i in self.media_collection:
|
91
|
+
for j in i:
|
92
|
+
if os.path.exists(j.local_media_path):
|
93
|
+
os.unlink(j.local_media_path)
|
94
|
+
else:
|
95
|
+
if os.path.exists(self.local_media_path):
|
96
|
+
os.unlink(self.local_media_path)
|
97
|
+
if self.local_compressed_media_path:
|
98
|
+
if os.path.exists(self.local_compressed_media_path):
|
99
|
+
os.unlink(self.local_compressed_media_path)
|
@@ -4,6 +4,8 @@ from typing_extensions import Unpack
|
|
4
4
|
from warp_beacon.jobs.upload_job import UploadJob
|
5
5
|
from warp_beacon.jobs.abstract import AbstractJob, JobSettings
|
6
6
|
|
7
|
+
import logging
|
8
|
+
|
7
9
|
class DownloadJob(AbstractJob):
|
8
10
|
def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
|
9
11
|
super(DownloadJob, self).__init__(**kwargs)
|
@@ -18,6 +20,7 @@ class DownloadJob(AbstractJob):
|
|
18
20
|
d = self.to_dict()
|
19
21
|
d.update(kwargs)
|
20
22
|
if "media_collection" in d:
|
21
|
-
for
|
22
|
-
d["media_collection"][
|
23
|
-
|
23
|
+
for index, chunk in enumerate(d["media_collection"]):
|
24
|
+
for k, v in enumerate(d["media_collection"][index]):
|
25
|
+
d["media_collection"][index][k] = UploadJob.build(**d["media_collection"][index][k])
|
26
|
+
return UploadJob.build(**d)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import os
|
2
|
+
import pathlib
|
2
3
|
from abc import ABC, abstractmethod
|
3
4
|
|
4
5
|
from PIL import Image
|
@@ -36,4 +37,13 @@ class MediaInfoAbstract(ABC):
|
|
36
37
|
# "/tmp/test.th.jpg",
|
37
38
|
# quality=80,
|
38
39
|
#)
|
39
|
-
return image
|
40
|
+
return image
|
41
|
+
|
42
|
+
def generate_filepath(self, base_filepath: str, postfix: str = "silenced") -> str:
|
43
|
+
path_info = pathlib.Path(base_filepath)
|
44
|
+
ext = path_info.suffix
|
45
|
+
old_filename = path_info.stem
|
46
|
+
new_filename = "%s_%s%s" % (old_filename, postfix, ext)
|
47
|
+
new_filepath = "%s/%s" % (os.path.dirname(base_filepath), new_filename)
|
48
|
+
|
49
|
+
return new_filepath
|
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
#import numpy as np
|
3
|
+
import av
|
4
|
+
|
5
|
+
from warp_beacon.mediainfo.video import VideoInfo
|
6
|
+
|
7
|
+
import logging
|
8
|
+
|
9
|
+
class Silencer(VideoInfo):
|
10
|
+
def add_silent_audio(self) -> str:
|
11
|
+
try:
|
12
|
+
new_filepath = self.generate_filepath(self.filename)
|
13
|
+
if self.container:
|
14
|
+
in_video_stream = next(s for s in self.container.streams if s.type == 'video')
|
15
|
+
codec_name = in_video_stream.codec_context.name
|
16
|
+
fps = in_video_stream.base_rate
|
17
|
+
#time_base = in_video_stream.time_base
|
18
|
+
#duration = float(in_video_stream.duration * time_base)
|
19
|
+
with av.open(new_filepath, 'w') as out_container:
|
20
|
+
out_video_stream = out_container.add_stream(codec_name, rate=fps)
|
21
|
+
out_audio_stream = out_container.add_stream('aac')
|
22
|
+
out_video_stream.width = in_video_stream.codec_context.width
|
23
|
+
out_video_stream.height = in_video_stream.codec_context.height
|
24
|
+
out_video_stream.pix_fmt = in_video_stream.codec_context.pix_fmt
|
25
|
+
out_video_stream.time_base = in_video_stream.time_base
|
26
|
+
for frame in self.container.decode(in_video_stream):
|
27
|
+
packet = out_video_stream.encode(frame)
|
28
|
+
if packet:
|
29
|
+
out_container.mux(packet)
|
30
|
+
#
|
31
|
+
aframe = av.AudioFrame(samples=32, format='s16')
|
32
|
+
aframe.pts = frame.pts
|
33
|
+
aframe.sample_rate = 16000
|
34
|
+
aframe.rate = 44100
|
35
|
+
for packet in out_audio_stream.encode(aframe):
|
36
|
+
out_container.mux(packet)
|
37
|
+
|
38
|
+
remain_packets = out_video_stream.encode(None)
|
39
|
+
out_container.mux(remain_packets)
|
40
|
+
self.filename = new_filepath
|
41
|
+
return new_filepath
|
42
|
+
except Exception as e:
|
43
|
+
logging.error("Error occurred while generating silenced video file!")
|
44
|
+
logging.exception(e)
|
45
|
+
|
46
|
+
return ''
|
@@ -15,7 +15,7 @@ class VideoInfo(MediaInfoAbstract):
|
|
15
15
|
super(VideoInfo, self).__init__(filename)
|
16
16
|
|
17
17
|
if self.container:
|
18
|
-
stream = self.container.streams.video
|
18
|
+
stream = next(s for s in self.container.streams if s.type == 'video')
|
19
19
|
time_base = stream.time_base
|
20
20
|
self.duration = float(stream.duration * time_base)
|
21
21
|
framerate = stream.average_rate
|
@@ -73,3 +73,15 @@ class VideoInfo(MediaInfoAbstract):
|
|
73
73
|
logging.exception(e)
|
74
74
|
|
75
75
|
return None
|
76
|
+
|
77
|
+
def has_sound(self) -> bool:
|
78
|
+
try:
|
79
|
+
if self.container:
|
80
|
+
stream_list = self.container.streams.get(audio=0)
|
81
|
+
if len(stream_list) > 0:
|
82
|
+
return True
|
83
|
+
except Exception as e:
|
84
|
+
logging.warning("An exception occurred while detection audio track!")
|
85
|
+
#logging.exception(e)
|
86
|
+
|
87
|
+
return False
|
@@ -8,24 +8,24 @@ from queue import Empty
|
|
8
8
|
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig
|
9
9
|
from warp_beacon.mediainfo.video import VideoInfo
|
10
10
|
from warp_beacon.mediainfo.audio import AudioInfo
|
11
|
+
from warp_beacon.mediainfo.silencer import Silencer
|
11
12
|
from warp_beacon.compress.video import VideoCompress
|
12
13
|
from warp_beacon.uploader import AsyncUploader
|
13
14
|
from warp_beacon.jobs import Origin
|
14
15
|
from warp_beacon.jobs.download_job import DownloadJob
|
16
|
+
from warp_beacon.jobs.types import JobType
|
15
17
|
|
16
18
|
import logging
|
17
19
|
|
18
|
-
CONST_CPU_COUNT = multiprocessing.cpu_count()
|
19
|
-
|
20
20
|
class AsyncDownloader(object):
|
21
21
|
__JOE_BIDEN_WAKEUP = None
|
22
22
|
workers = []
|
23
23
|
allow_loop = None
|
24
24
|
job_queue = multiprocessing.Queue()
|
25
25
|
uploader = None
|
26
|
-
workers_count =
|
26
|
+
workers_count = 0
|
27
27
|
|
28
|
-
def __init__(self, uploader: AsyncUploader, workers_count: int
|
28
|
+
def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
|
29
29
|
self.allow_loop = multiprocessing.Value('i', 1)
|
30
30
|
self.uploader = uploader
|
31
31
|
self.workers_count = workers_count
|
@@ -39,16 +39,18 @@ class AsyncDownloader(object):
|
|
39
39
|
self.workers.append(proc)
|
40
40
|
proc.start()
|
41
41
|
|
42
|
-
def get_media_info(self, path: str, fr_media_info: dict={}, media_type:
|
42
|
+
def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
|
43
43
|
media_info = None
|
44
44
|
try:
|
45
45
|
if path:
|
46
|
-
if media_type ==
|
46
|
+
if media_type == JobType.VIDEO:
|
47
47
|
video_info = VideoInfo(path)
|
48
48
|
media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
|
49
49
|
media_info.update(fr_media_info)
|
50
|
-
media_info
|
51
|
-
|
50
|
+
if not media_info.get("thumb", None):
|
51
|
+
media_info["thumb"] = video_info.generate_thumbnail()
|
52
|
+
media_info["has_sound"] = video_info.has_sound()
|
53
|
+
elif media_type == JobType.AUDIO:
|
52
54
|
audio_info = AudioInfo(path)
|
53
55
|
media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
|
54
56
|
except Exception as e:
|
@@ -81,6 +83,9 @@ class AsyncDownloader(object):
|
|
81
83
|
elif job.job_origin is Origin.YT_MUSIC:
|
82
84
|
from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
|
83
85
|
actor = YoutubeMusicScraper()
|
86
|
+
elif job.job_origin is Origin.YOUTUBE:
|
87
|
+
from warp_beacon.scraper.youtube.youtube import YoutubeScraper
|
88
|
+
actor = YoutubeScraper()
|
84
89
|
while True:
|
85
90
|
try:
|
86
91
|
logging.info("Downloading URL '%s'", job.url)
|
@@ -107,7 +112,7 @@ class AsyncDownloader(object):
|
|
107
112
|
logging.exception(e)
|
108
113
|
self.uploader.queue_task(job.to_upload_job(
|
109
114
|
job_failed=True,
|
110
|
-
job_failed_msg="Unfortunately
|
115
|
+
job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
|
111
116
|
)
|
112
117
|
break
|
113
118
|
except (UnknownError, Exception) as e:
|
@@ -133,10 +138,10 @@ class AsyncDownloader(object):
|
|
133
138
|
if items:
|
134
139
|
for item in items:
|
135
140
|
media_info = {"filesize": 0}
|
136
|
-
if item["media_type"] ==
|
137
|
-
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}))
|
141
|
+
if item["media_type"] == JobType.VIDEO:
|
142
|
+
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
|
138
143
|
logging.info("Final media info: %s", media_info)
|
139
|
-
if media_info["filesize"] >
|
144
|
+
if media_info["filesize"] > 2e+9:
|
140
145
|
logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
|
141
146
|
logging.info("Detected big file. Starting compressing with ffmpeg ...")
|
142
147
|
self.uploader.queue_task(job.to_upload_job(
|
@@ -145,27 +150,37 @@ class AsyncDownloader(object):
|
|
145
150
|
)
|
146
151
|
ffmpeg = VideoCompress(file_path=item["local_media_path"])
|
147
152
|
new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
|
148
|
-
if ffmpeg.compress_to(new_filepath, target_size=
|
153
|
+
if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
|
149
154
|
logging.info("Successfully compressed file '%s'", new_filepath)
|
150
155
|
os.unlink(item["local_media_path"])
|
151
156
|
item["local_media_path"] = new_filepath
|
152
157
|
item["local_compressed_media_path"] = new_filepath
|
153
158
|
media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
|
154
159
|
logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
|
155
|
-
|
156
|
-
|
160
|
+
if not media_info["has_sound"]:
|
161
|
+
item["media_type"] = JobType.ANIMATION
|
162
|
+
elif item["media_type"] == JobType.AUDIO:
|
163
|
+
media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
|
157
164
|
media_info["performer"] = item.get("performer", None)
|
158
165
|
media_info["thumb"] = item.get("thumb", None)
|
159
166
|
logging.info("Final media info: %s", media_info)
|
160
|
-
elif item["media_type"] ==
|
161
|
-
for
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
167
|
+
elif item["media_type"] == JobType.COLLECTION:
|
168
|
+
for chunk in item["items"]:
|
169
|
+
for v in chunk:
|
170
|
+
if v["media_type"] == JobType.VIDEO:
|
171
|
+
col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
|
172
|
+
media_info["filesize"] += int(col_media_info.get("filesize", 0))
|
173
|
+
v["media_info"] = col_media_info
|
174
|
+
if not v["media_info"]["has_sound"]:
|
175
|
+
silencer = Silencer(v["local_media_path"])
|
176
|
+
silent_video_path = silencer.add_silent_audio()
|
177
|
+
os.unlink(j["local_media_path"])
|
178
|
+
v["local_media_path"] = silent_video_path
|
179
|
+
v["media_info"].update(silencer.get_finfo())
|
180
|
+
v["media_info"]["has_sound"] = True
|
166
181
|
|
167
182
|
job_args = {"media_type": item["media_type"], "media_info": media_info}
|
168
|
-
if item["media_type"] ==
|
183
|
+
if item["media_type"] == JobType.COLLECTION:
|
169
184
|
job_args["media_collection"] = item["items"]
|
170
185
|
if item.get("save_items", None) is not None:
|
171
186
|
job_args["save_items"] = item.get("save_items", False)
|
@@ -178,6 +193,7 @@ class AsyncDownloader(object):
|
|
178
193
|
|
179
194
|
logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
|
180
195
|
logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
|
196
|
+
#logging.info(job_args)
|
181
197
|
upload_job = job.to_upload_job(**job_args)
|
182
198
|
if upload_job.is_empty():
|
183
199
|
logging.info("Upload job is empty. Nothing to do here!")
|
@@ -194,7 +210,6 @@ class AsyncDownloader(object):
|
|
194
210
|
logging.error("Error inside download worker!")
|
195
211
|
logging.exception(e)
|
196
212
|
self.notify_task_failed(job)
|
197
|
-
#self.queue_task(url=item["url"], message_id=item["message_id"], item_in_process=item["in_process"], uniq_id=item["uniq_id"])
|
198
213
|
except Empty:
|
199
214
|
pass
|
200
215
|
except Exception as e:
|
@@ -0,0 +1,44 @@
|
|
1
|
+
import os
|
2
|
+
import pathlib
|
3
|
+
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
from typing import Callable, Union
|
6
|
+
|
7
|
+
from PIL import Image
|
8
|
+
|
9
|
+
import logging
|
10
|
+
|
11
|
+
class ScraperAbstract(ABC):
|
12
|
+
def __init__(self) -> None:
|
13
|
+
pass
|
14
|
+
|
15
|
+
def __del__(self) -> None:
|
16
|
+
pass
|
17
|
+
|
18
|
+
@abstractmethod
|
19
|
+
def download(self, url: str) -> bool:
|
20
|
+
raise NotImplementedError
|
21
|
+
|
22
|
+
@abstractmethod
|
23
|
+
def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
|
24
|
+
raise NotImplementedError
|
25
|
+
|
26
|
+
@staticmethod
|
27
|
+
def convert_webp_to_png(src_file: str) -> str:
|
28
|
+
try:
|
29
|
+
if os.path.exists(src_file):
|
30
|
+
path_info = pathlib.Path(src_file)
|
31
|
+
old_filename = path_info.stem
|
32
|
+
new_filename = "%s_converted.%s" % (old_filename, "png")
|
33
|
+
new_filepath = "%s/%s" % (os.path.dirname(src_file), new_filename)
|
34
|
+
with Image.open(src_file).convert('RGB') as img:
|
35
|
+
img.save(new_filepath, 'png')
|
36
|
+
os.unlink(src_file)
|
37
|
+
return new_filepath
|
38
|
+
except Exception as e:
|
39
|
+
logging.error("Failed to convert webp file to png!")
|
40
|
+
logging.exception(e)
|
41
|
+
|
42
|
+
return ''
|
43
|
+
|
44
|
+
|
@@ -20,6 +20,8 @@ from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotF
|
|
20
20
|
|
21
21
|
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
|
22
22
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
23
|
+
from warp_beacon.jobs.types import JobType
|
24
|
+
from warp_beacon.telegram.utils import Utils
|
23
25
|
|
24
26
|
import logging
|
25
27
|
|
@@ -118,17 +120,18 @@ class InstagramScraper(ScraperAbstract):
|
|
118
120
|
|
119
121
|
return ret_val
|
120
122
|
|
121
|
-
|
122
123
|
def download_video(self, url: str, media_info: dict) -> dict:
|
123
124
|
path = self._download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
|
124
|
-
return {"local_media_path": str(path), "media_type":
|
125
|
+
return {"local_media_path": str(path), "media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration)}}
|
125
126
|
|
126
127
|
def download_photo(self, url: str) -> dict:
|
127
|
-
path = self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
|
128
|
-
|
128
|
+
path = str(self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp'))
|
129
|
+
if ".webp" in path:
|
130
|
+
path = InstagramScraper.convert_webp_to_png(path)
|
131
|
+
return {"local_media_path": path, "media_type": JobType.IMAGE}
|
129
132
|
|
130
133
|
def download_story(self, story_info: Story) -> dict:
|
131
|
-
path, media_type, media_info = "",
|
134
|
+
path, media_type, media_info = "", JobType.UNKNOWN, {}
|
132
135
|
logging.info("Story id is '%s'", story_info.id)
|
133
136
|
effective_story_id = story_info.id
|
134
137
|
if '_' in effective_story_id:
|
@@ -138,32 +141,40 @@ class InstagramScraper(ScraperAbstract):
|
|
138
141
|
logging.info("Effective story id is '%s'", effective_story_id)
|
139
142
|
effective_url = "https://www.instagram.com/stories/%s/%s/" % (story_info.user.username, effective_story_id)
|
140
143
|
if story_info.media_type == 1: # photo
|
141
|
-
path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
|
142
|
-
|
144
|
+
path = str(self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp'))
|
145
|
+
if ".webp" in path:
|
146
|
+
path = InstagramScraper.convert_webp_to_png(path)
|
147
|
+
media_type = JobType.IMAGE
|
143
148
|
elif story_info.media_type == 2: # video
|
144
|
-
path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
|
145
|
-
media_type =
|
149
|
+
path = str(self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp'))
|
150
|
+
media_type = JobType.VIDEO
|
146
151
|
media_info["duration"] = story_info.video_duration
|
147
152
|
|
148
|
-
return {"local_media_path":
|
153
|
+
return {"local_media_path": path, "media_type": media_type, "media_info": media_info, "effective_url": effective_url}
|
149
154
|
|
150
155
|
def download_stories(self, stories: list[Story]) -> dict:
|
151
|
-
|
152
|
-
for
|
153
|
-
|
156
|
+
chunks = []
|
157
|
+
for stories_chunk in Utils.chunker(stories, 10):
|
158
|
+
chunk = []
|
159
|
+
for story in stories_chunk:
|
160
|
+
chunk.append(self.download_story(story_info=story))
|
161
|
+
chunks.append(chunk)
|
154
162
|
|
155
|
-
return {"media_type":
|
163
|
+
return {"media_type": JobType.COLLECTION, "save_items": True, "items": chunks}
|
156
164
|
|
157
165
|
def download_album(self, media_info: dict) -> dict:
|
158
|
-
|
159
|
-
for
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
166
|
+
chunks = []
|
167
|
+
for media_chunk in Utils.chunker(media_info.resources, 10):
|
168
|
+
chunk = []
|
169
|
+
for media in media_chunk:
|
170
|
+
_media_info = self.cl.media_info(media.pk)
|
171
|
+
if media.media_type == 1: # photo
|
172
|
+
chunk.append(self.download_photo(url=_media_info.thumbnail_url))
|
173
|
+
elif media.media_type == 2: # video
|
174
|
+
chunk.append(self.download_video(url=_media_info.video_url, media_info=_media_info))
|
175
|
+
chunks.append(chunk)
|
176
|
+
|
177
|
+
return {"media_type": JobType.COLLECTION, "items": chunks}
|
167
178
|
|
168
179
|
def download(self, url: str) -> Optional[list[dict]]:
|
169
180
|
res = []
|
@@ -194,7 +205,7 @@ class InstagramScraper(ScraperAbstract):
|
|
194
205
|
logging.info("Waiting %d seconds according configuration option `IG_WAIT_TIMEOUT`", wait_timeout)
|
195
206
|
if res:
|
196
207
|
for i in res:
|
197
|
-
if i["media_type"] ==
|
208
|
+
if i["media_type"] == JobType.COLLECTION:
|
198
209
|
for j in i["items"]:
|
199
210
|
if os.path.exists(j["local_media_path"]):
|
200
211
|
os.unlink(j["local_media_path"])
|