warp-beacon 1.2.6__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {warp_beacon-1.2.6/warp_beacon.egg-info → warp_beacon-2.0.0}/PKG-INFO +4 -2
  2. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/warp_beacon.conf +4 -2
  3. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/pyproject.toml +3 -1
  4. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/setup.py +9 -0
  5. warp_beacon-2.0.0/warp_beacon/__version__.py +2 -0
  6. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/__init__.py +2 -0
  7. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/abstract.py +21 -4
  8. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/download_job.py +6 -3
  9. warp_beacon-2.0.0/warp_beacon/jobs/types.py +9 -0
  10. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/abstract.py +11 -1
  11. warp_beacon-2.0.0/warp_beacon/mediainfo/silencer.py +46 -0
  12. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/video.py +13 -1
  13. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/__init__.py +38 -23
  14. warp_beacon-2.0.0/warp_beacon/scraper/abstract.py +44 -0
  15. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/instagram.py +35 -24
  16. warp_beacon-1.2.6/warp_beacon/scraper/youtube/music.py → warp_beacon-2.0.0/warp_beacon/scraper/youtube/abstract.py +46 -84
  17. warp_beacon-2.0.0/warp_beacon/scraper/youtube/music.py +47 -0
  18. warp_beacon-2.0.0/warp_beacon/scraper/youtube/shorts.py +42 -0
  19. warp_beacon-2.0.0/warp_beacon/scraper/youtube/youtube.py +41 -0
  20. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/storage/__init__.py +27 -6
  21. warp_beacon-2.0.0/warp_beacon/telegram/__init__.py +0 -0
  22. warp_beacon-2.0.0/warp_beacon/telegram/bot.py +318 -0
  23. warp_beacon-2.0.0/warp_beacon/telegram/handlers.py +156 -0
  24. warp_beacon-2.0.0/warp_beacon/telegram/placeholder_message.py +191 -0
  25. warp_beacon-2.0.0/warp_beacon/telegram/utils.py +73 -0
  26. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/uploader/__init__.py +9 -9
  27. warp_beacon-2.0.0/warp_beacon/warp_beacon.py +14 -0
  28. {warp_beacon-1.2.6 → warp_beacon-2.0.0/warp_beacon.egg-info}/PKG-INFO +4 -2
  29. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/SOURCES.txt +9 -0
  30. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/requires.txt +3 -1
  31. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/top_level.txt +9 -0
  32. warp_beacon-1.2.6/warp_beacon/__version__.py +0 -2
  33. warp_beacon-1.2.6/warp_beacon/scraper/abstract.py +0 -18
  34. warp_beacon-1.2.6/warp_beacon/scraper/youtube/shorts.py +0 -95
  35. warp_beacon-1.2.6/warp_beacon/warp_beacon.py +0 -600
  36. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/LICENSE +0 -0
  37. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/MANIFEST.in +0 -0
  38. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/README.md +0 -0
  39. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/assets/placeholder.gif +0 -0
  40. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/.gitignore +0 -0
  41. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/etc/warp_beacon.service +0 -0
  42. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/setup.cfg +0 -0
  43. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/__init__.py +0 -0
  44. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/compress/__init__.py +0 -0
  45. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/compress/video.py +0 -0
  46. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/jobs/upload_job.py +0 -0
  47. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/__init__.py +0 -0
  48. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/mediainfo/audio.py +0 -0
  49. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/exceptions.py +0 -0
  50. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon/scraper/youtube/__init__.py +0 -0
  51. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/dependency_links.txt +0 -0
  52. {warp_beacon-1.2.6 → warp_beacon-2.0.0}/warp_beacon.egg-info/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warp_beacon
3
- Version: 1.2.6
3
+ Version: 2.0.0
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -225,7 +225,9 @@ Requires-Python: >=3.10
225
225
  Description-Content-Type: text/markdown
226
226
  License-File: LICENSE
227
227
  Requires-Dist: ffmpeg-python
228
- Requires-Dist: python-telegram-bot
228
+ Requires-Dist: uvloop
229
+ Requires-Dist: tgcrypto
230
+ Requires-Dist: pyrogram
229
231
  Requires-Dist: pytubefix
230
232
  Requires-Dist: av
231
233
  Requires-Dist: urlextract
@@ -1,11 +1,13 @@
1
1
  TG_TOKEN=""
2
+ TG_API_ID=""
3
+ TG_API_HASH=""
4
+ TG_BOT_NAME=""
2
5
  INSTAGRAM_LOGIN=""
3
6
  INSTAGRAM_PASSWORD=""
4
- INSTAGRAM_VERIFICATION_CODE="933917"
7
+ INSTAGRAM_VERIFICATION_CODE=""
5
8
  MONGODB_HOST="mongodb"
6
9
  MONGODB_PORT="27017"
7
10
  MONGODB_USER="root"
8
11
  MONGODB_PASSWORD="changeme"
9
12
  VIDEO_STORAGE_DIR="/var/warp_beacon/videos"
10
- WORKERS_POOL_SIZE=3
11
13
  ENABLE_DONATES=true
@@ -14,7 +14,9 @@ license = {file = "LICENSE"}
14
14
  requires-python = ">=3.10"
15
15
  dependencies = [
16
16
  "ffmpeg-python",
17
- "python-telegram-bot",
17
+ "uvloop",
18
+ "tgcrypto",
19
+ "pyrogram",
18
20
  "pytubefix",
19
21
  "av",
20
22
  "urlextract",
@@ -46,6 +46,7 @@ setup(
46
46
  url="https://github.com/sb0y/warp_beacon",
47
47
  packages=[
48
48
  'warp_beacon',
49
+ 'warp_beacon/telegram',
49
50
  'warp_beacon/uploader',
50
51
  'warp_beacon/storage',
51
52
  'warp_beacon/scraper',
@@ -57,16 +58,24 @@ setup(
57
58
  py_modules=[
58
59
  "warp_beacon/__version__",
59
60
  "warp_beacon/warp_beacon",
61
+ "warp_beacon/telegram/bot",
62
+ "warp_beacon/telegram/placeholder_message",
63
+ "warp_beacon/telegram/handlers",
64
+ "warp_beacon/telegram/utils",
60
65
  "warp_beacon/jobs/abstract",
61
66
  "warp_beacon/jobs/download_job",
62
67
  "warp_beacon/jobs/upload_job",
63
68
  "warp_beacon/mediainfo/abstract",
64
69
  "warp_beacon/mediainfo/video",
65
70
  "warp_beacon/mediainfo/audio",
71
+ "warp_beacon/mediainfo/silencer",
66
72
  "warp_beacon/compress/video",
67
73
  "warp_beacon/scraper/abstract",
68
74
  "warp_beacon/scraper/exceptions",
75
+ "warp_beacon/scraper/types",
69
76
  "warp_beacon/scraper/instagram",
77
+ "warp_beacon/scraper/youtube/abstract",
78
+ "warp_beacon/scraper/youtube/youtube",
70
79
  "warp_beacon/scraper/youtube/shorts",
71
80
  "warp_beacon/scraper/youtube/music"
72
81
  ],
@@ -0,0 +1,2 @@
1
+ __version__ = "2.0.0"
2
+
@@ -4,4 +4,6 @@ class Origin(Enum):
4
4
  INSTAGRAM = "instagram"
5
5
  YT_SHORTS = "yt_shorts"
6
6
  YT_MUSIC = "yt_music"
7
+ YOUTUBE = "youtube"
8
+ YOUTU_BE = "youtu_be"
7
9
  UNKNOWN = "unknown"
@@ -1,13 +1,16 @@
1
+ import os
1
2
  from abc import ABC, abstractmethod
2
3
  from typing import TypedDict
3
4
  from typing_extensions import Unpack
4
5
  import uuid
5
6
 
6
7
  from warp_beacon.jobs import Origin
8
+ from warp_beacon.jobs.types import JobType
7
9
 
8
10
  class JobSettings(TypedDict):
9
11
  job_id: uuid.UUID
10
12
  message_id: int
13
+ chat_id: int
11
14
  placeholder_message_id: int
12
15
  local_media_path: str
13
16
  local_compressed_media_path: str
@@ -16,7 +19,7 @@ class JobSettings(TypedDict):
16
19
  uniq_id: str
17
20
  tg_file_id: str
18
21
  in_process: bool
19
- media_type: str
22
+ media_type: JobType
20
23
  job_failed: bool
21
24
  job_failed_msg: str
22
25
  job_warning: bool
@@ -30,6 +33,7 @@ class JobSettings(TypedDict):
30
33
  class AbstractJob(ABC):
31
34
  job_id: uuid.UUID = None
32
35
  message_id: int = 0
36
+ chat_id: int = 0
33
37
  placeholder_message_id: int = 0
34
38
  local_media_path: str = ""
35
39
  local_compressed_media_path: str = ""
@@ -37,7 +41,7 @@ class AbstractJob(ABC):
37
41
  url: str = ""
38
42
  uniq_id: str = ""
39
43
  tg_file_id: str = ""
40
- media_type: str = "video"
44
+ media_type: JobType = JobType.VIDEO
41
45
  in_process: bool = False
42
46
  job_warning: bool = False
43
47
  job_warning_message: str = ""
@@ -64,7 +68,7 @@ class AbstractJob(ABC):
64
68
  return str(self.to_dict())
65
69
 
66
70
  def is_empty(self) -> bool:
67
- if self.media_type == "collection":
71
+ if self.media_type == JobType.COLLECTION:
68
72
  if not self.media_collection:
69
73
  return True
70
74
  elif not self.local_media_path:
@@ -79,4 +83,17 @@ class AbstractJob(ABC):
79
83
  if not callable(value):
80
84
  d[key] = value
81
85
 
82
- return d
86
+ return d
87
+
88
+ def remove_files(self) -> bool:
89
+ if self.media_type == JobType.COLLECTION:
90
+ for i in self.media_collection:
91
+ for j in i:
92
+ if os.path.exists(j.local_media_path):
93
+ os.unlink(j.local_media_path)
94
+ else:
95
+ if os.path.exists(self.local_media_path):
96
+ os.unlink(self.local_media_path)
97
+ if self.local_compressed_media_path:
98
+ if os.path.exists(self.local_compressed_media_path):
99
+ os.unlink(self.local_compressed_media_path)
@@ -4,6 +4,8 @@ from typing_extensions import Unpack
4
4
  from warp_beacon.jobs.upload_job import UploadJob
5
5
  from warp_beacon.jobs.abstract import AbstractJob, JobSettings
6
6
 
7
+ import logging
8
+
7
9
  class DownloadJob(AbstractJob):
8
10
  def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
9
11
  super(DownloadJob, self).__init__(**kwargs)
@@ -18,6 +20,7 @@ class DownloadJob(AbstractJob):
18
20
  d = self.to_dict()
19
21
  d.update(kwargs)
20
22
  if "media_collection" in d:
21
- for k, v in enumerate(d["media_collection"]):
22
- d["media_collection"][k] = UploadJob.build(**v)
23
- return UploadJob.build(**d)
23
+ for index, chunk in enumerate(d["media_collection"]):
24
+ for k, v in enumerate(d["media_collection"][index]):
25
+ d["media_collection"][index][k] = UploadJob.build(**d["media_collection"][index][k])
26
+ return UploadJob.build(**d)
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+
3
+ class JobType(str, Enum):
4
+ UNKNOWN = "unknown",
5
+ VIDEO = "video",
6
+ IMAGE = "image",
7
+ AUDIO = "audio",
8
+ COLLECTION = "collection"
9
+ ANIMATION = "animation"
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import pathlib
2
3
  from abc import ABC, abstractmethod
3
4
 
4
5
  from PIL import Image
@@ -36,4 +37,13 @@ class MediaInfoAbstract(ABC):
36
37
  # "/tmp/test.th.jpg",
37
38
  # quality=80,
38
39
  #)
39
- return image
40
+ return image
41
+
42
+ def generate_filepath(self, base_filepath: str, postfix: str = "silenced") -> str:
43
+ path_info = pathlib.Path(base_filepath)
44
+ ext = path_info.suffix
45
+ old_filename = path_info.stem
46
+ new_filename = "%s_%s%s" % (old_filename, postfix, ext)
47
+ new_filepath = "%s/%s" % (os.path.dirname(base_filepath), new_filename)
48
+
49
+ return new_filepath
@@ -0,0 +1,46 @@
1
+
2
+ #import numpy as np
3
+ import av
4
+
5
+ from warp_beacon.mediainfo.video import VideoInfo
6
+
7
+ import logging
8
+
9
+ class Silencer(VideoInfo):
10
+ def add_silent_audio(self) -> str:
11
+ try:
12
+ new_filepath = self.generate_filepath(self.filename)
13
+ if self.container:
14
+ in_video_stream = next(s for s in self.container.streams if s.type == 'video')
15
+ codec_name = in_video_stream.codec_context.name
16
+ fps = in_video_stream.base_rate
17
+ #time_base = in_video_stream.time_base
18
+ #duration = float(in_video_stream.duration * time_base)
19
+ with av.open(new_filepath, 'w') as out_container:
20
+ out_video_stream = out_container.add_stream(codec_name, rate=fps)
21
+ out_audio_stream = out_container.add_stream('aac')
22
+ out_video_stream.width = in_video_stream.codec_context.width
23
+ out_video_stream.height = in_video_stream.codec_context.height
24
+ out_video_stream.pix_fmt = in_video_stream.codec_context.pix_fmt
25
+ out_video_stream.time_base = in_video_stream.time_base
26
+ for frame in self.container.decode(in_video_stream):
27
+ packet = out_video_stream.encode(frame)
28
+ if packet:
29
+ out_container.mux(packet)
30
+ #
31
+ aframe = av.AudioFrame(samples=32, format='s16')
32
+ aframe.pts = frame.pts
33
+ aframe.sample_rate = 16000
34
+ aframe.rate = 44100
35
+ for packet in out_audio_stream.encode(aframe):
36
+ out_container.mux(packet)
37
+
38
+ remain_packets = out_video_stream.encode(None)
39
+ out_container.mux(remain_packets)
40
+ self.filename = new_filepath
41
+ return new_filepath
42
+ except Exception as e:
43
+ logging.error("Error occurred while generating silenced video file!")
44
+ logging.exception(e)
45
+
46
+ return ''
@@ -15,7 +15,7 @@ class VideoInfo(MediaInfoAbstract):
15
15
  super(VideoInfo, self).__init__(filename)
16
16
 
17
17
  if self.container:
18
- stream = self.container.streams.video[0]
18
+ stream = next(s for s in self.container.streams if s.type == 'video')
19
19
  time_base = stream.time_base
20
20
  self.duration = float(stream.duration * time_base)
21
21
  framerate = stream.average_rate
@@ -73,3 +73,15 @@ class VideoInfo(MediaInfoAbstract):
73
73
  logging.exception(e)
74
74
 
75
75
  return None
76
+
77
+ def has_sound(self) -> bool:
78
+ try:
79
+ if self.container:
80
+ stream_list = self.container.streams.get(audio=0)
81
+ if len(stream_list) > 0:
82
+ return True
83
+ except Exception as e:
84
+ logging.warning("An exception occurred while detection audio track!")
85
+ #logging.exception(e)
86
+
87
+ return False
@@ -8,24 +8,24 @@ from queue import Empty
8
8
  from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig
9
9
  from warp_beacon.mediainfo.video import VideoInfo
10
10
  from warp_beacon.mediainfo.audio import AudioInfo
11
+ from warp_beacon.mediainfo.silencer import Silencer
11
12
  from warp_beacon.compress.video import VideoCompress
12
13
  from warp_beacon.uploader import AsyncUploader
13
14
  from warp_beacon.jobs import Origin
14
15
  from warp_beacon.jobs.download_job import DownloadJob
16
+ from warp_beacon.jobs.types import JobType
15
17
 
16
18
  import logging
17
19
 
18
- CONST_CPU_COUNT = multiprocessing.cpu_count()
19
-
20
20
  class AsyncDownloader(object):
21
21
  __JOE_BIDEN_WAKEUP = None
22
22
  workers = []
23
23
  allow_loop = None
24
24
  job_queue = multiprocessing.Queue()
25
25
  uploader = None
26
- workers_count = CONST_CPU_COUNT
26
+ workers_count = 0
27
27
 
28
- def __init__(self, uploader: AsyncUploader, workers_count: int=CONST_CPU_COUNT) -> None:
28
+ def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
29
29
  self.allow_loop = multiprocessing.Value('i', 1)
30
30
  self.uploader = uploader
31
31
  self.workers_count = workers_count
@@ -39,16 +39,18 @@ class AsyncDownloader(object):
39
39
  self.workers.append(proc)
40
40
  proc.start()
41
41
 
42
- def get_media_info(self, path: str, fr_media_info: dict={}, media_type: str = "video") -> Optional[dict]:
42
+ def get_media_info(self, path: str, fr_media_info: dict={}, media_type: JobType = JobType.VIDEO) -> Optional[dict]:
43
43
  media_info = None
44
44
  try:
45
45
  if path:
46
- if media_type == "video":
46
+ if media_type == JobType.VIDEO:
47
47
  video_info = VideoInfo(path)
48
48
  media_info = video_info.get_finfo(tuple(fr_media_info.keys()))
49
49
  media_info.update(fr_media_info)
50
- media_info["thumb"] = video_info.generate_thumbnail()
51
- elif media_type == "audio":
50
+ if not media_info.get("thumb", None):
51
+ media_info["thumb"] = video_info.generate_thumbnail()
52
+ media_info["has_sound"] = video_info.has_sound()
53
+ elif media_type == JobType.AUDIO:
52
54
  audio_info = AudioInfo(path)
53
55
  media_info = audio_info.get_finfo(tuple(fr_media_info.keys()))
54
56
  except Exception as e:
@@ -81,6 +83,9 @@ class AsyncDownloader(object):
81
83
  elif job.job_origin is Origin.YT_MUSIC:
82
84
  from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
83
85
  actor = YoutubeMusicScraper()
86
+ elif job.job_origin is Origin.YOUTUBE:
87
+ from warp_beacon.scraper.youtube.youtube import YoutubeScraper
88
+ actor = YoutubeScraper()
84
89
  while True:
85
90
  try:
86
91
  logging.info("Downloading URL '%s'", job.url)
@@ -107,7 +112,7 @@ class AsyncDownloader(object):
107
112
  logging.exception(e)
108
113
  self.uploader.queue_task(job.to_upload_job(
109
114
  job_failed=True,
110
- job_failed_msg="Unfortunately, this file exceeds the telegram limit of 50 megabytes.")
115
+ job_failed_msg="Unfortunately this file has exceeded the Telegram limits. A file cannot be larger than 2 gigabytes.")
111
116
  )
112
117
  break
113
118
  except (UnknownError, Exception) as e:
@@ -133,10 +138,10 @@ class AsyncDownloader(object):
133
138
  if items:
134
139
  for item in items:
135
140
  media_info = {"filesize": 0}
136
- if item["media_type"] == "video":
137
- media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}))
141
+ if item["media_type"] == JobType.VIDEO:
142
+ media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.VIDEO)
138
143
  logging.info("Final media info: %s", media_info)
139
- if media_info["filesize"] > 52428800:
144
+ if media_info["filesize"] > 2e+9:
140
145
  logging.info("Filesize is '%d' MiB", round(media_info["filesize"] / 1024 / 1024))
141
146
  logging.info("Detected big file. Starting compressing with ffmpeg ...")
142
147
  self.uploader.queue_task(job.to_upload_job(
@@ -145,27 +150,37 @@ class AsyncDownloader(object):
145
150
  )
146
151
  ffmpeg = VideoCompress(file_path=item["local_media_path"])
147
152
  new_filepath = ffmpeg.generate_filepath(base_filepath=item["local_media_path"])
148
- if ffmpeg.compress_to(new_filepath, target_size=50 * 1000):
153
+ if ffmpeg.compress_to(new_filepath, target_size=2000 * 1000):
149
154
  logging.info("Successfully compressed file '%s'", new_filepath)
150
155
  os.unlink(item["local_media_path"])
151
156
  item["local_media_path"] = new_filepath
152
157
  item["local_compressed_media_path"] = new_filepath
153
158
  media_info["filesize"] = VideoInfo.get_filesize(new_filepath)
154
159
  logging.info("New file size of compressed file is '%.3f'", media_info["filesize"])
155
- elif item["media_type"] == "audio":
156
- media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), "audio")
160
+ if not media_info["has_sound"]:
161
+ item["media_type"] = JobType.ANIMATION
162
+ elif item["media_type"] == JobType.AUDIO:
163
+ media_info = self.get_media_info(item["local_media_path"], item.get("media_info", {}), JobType.AUDIO)
157
164
  media_info["performer"] = item.get("performer", None)
158
165
  media_info["thumb"] = item.get("thumb", None)
159
166
  logging.info("Final media info: %s", media_info)
160
- elif item["media_type"] == "collection":
161
- for v in item["items"]:
162
- if v["media_type"] == "video":
163
- col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
164
- media_info["filesize"] += int(col_media_info.get("filesize", 0))
165
- v["media_info"] = col_media_info
167
+ elif item["media_type"] == JobType.COLLECTION:
168
+ for chunk in item["items"]:
169
+ for v in chunk:
170
+ if v["media_type"] == JobType.VIDEO:
171
+ col_media_info = self.get_media_info(v["local_media_path"], v["media_info"])
172
+ media_info["filesize"] += int(col_media_info.get("filesize", 0))
173
+ v["media_info"] = col_media_info
174
+ if not v["media_info"]["has_sound"]:
175
+ silencer = Silencer(v["local_media_path"])
176
+ silent_video_path = silencer.add_silent_audio()
177
+ os.unlink(j["local_media_path"])
178
+ v["local_media_path"] = silent_video_path
179
+ v["media_info"].update(silencer.get_finfo())
180
+ v["media_info"]["has_sound"] = True
166
181
 
167
182
  job_args = {"media_type": item["media_type"], "media_info": media_info}
168
- if item["media_type"] == "collection":
183
+ if item["media_type"] == JobType.COLLECTION:
169
184
  job_args["media_collection"] = item["items"]
170
185
  if item.get("save_items", None) is not None:
171
186
  job_args["save_items"] = item.get("save_items", False)
@@ -178,6 +193,7 @@ class AsyncDownloader(object):
178
193
 
179
194
  logging.debug("local_media_path: '%s'", job_args.get("local_media_path", ""))
180
195
  logging.debug("media_collection: '%s'", str(job_args.get("media_collection", {})))
196
+ #logging.info(job_args)
181
197
  upload_job = job.to_upload_job(**job_args)
182
198
  if upload_job.is_empty():
183
199
  logging.info("Upload job is empty. Nothing to do here!")
@@ -194,7 +210,6 @@ class AsyncDownloader(object):
194
210
  logging.error("Error inside download worker!")
195
211
  logging.exception(e)
196
212
  self.notify_task_failed(job)
197
- #self.queue_task(url=item["url"], message_id=item["message_id"], item_in_process=item["in_process"], uniq_id=item["uniq_id"])
198
213
  except Empty:
199
214
  pass
200
215
  except Exception as e:
@@ -0,0 +1,44 @@
1
+ import os
2
+ import pathlib
3
+
4
+ from abc import ABC, abstractmethod
5
+ from typing import Callable, Union
6
+
7
+ from PIL import Image
8
+
9
+ import logging
10
+
11
+ class ScraperAbstract(ABC):
12
+ def __init__(self) -> None:
13
+ pass
14
+
15
+ def __del__(self) -> None:
16
+ pass
17
+
18
+ @abstractmethod
19
+ def download(self, url: str) -> bool:
20
+ raise NotImplementedError
21
+
22
+ @abstractmethod
23
+ def _download_hndlr(self, func: Callable, *args: tuple[str], **kwargs: dict[str]) -> Union[str, dict]:
24
+ raise NotImplementedError
25
+
26
+ @staticmethod
27
+ def convert_webp_to_png(src_file: str) -> str:
28
+ try:
29
+ if os.path.exists(src_file):
30
+ path_info = pathlib.Path(src_file)
31
+ old_filename = path_info.stem
32
+ new_filename = "%s_converted.%s" % (old_filename, "png")
33
+ new_filepath = "%s/%s" % (os.path.dirname(src_file), new_filename)
34
+ with Image.open(src_file).convert('RGB') as img:
35
+ img.save(new_filepath, 'png')
36
+ os.unlink(src_file)
37
+ return new_filepath
38
+ except Exception as e:
39
+ logging.error("Failed to convert webp file to png!")
40
+ logging.exception(e)
41
+
42
+ return ''
43
+
44
+
@@ -20,6 +20,8 @@ from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotF
20
20
 
21
21
  from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, extract_exception_message
22
22
  from warp_beacon.scraper.abstract import ScraperAbstract
23
+ from warp_beacon.jobs.types import JobType
24
+ from warp_beacon.telegram.utils import Utils
23
25
 
24
26
  import logging
25
27
 
@@ -118,17 +120,18 @@ class InstagramScraper(ScraperAbstract):
118
120
 
119
121
  return ret_val
120
122
 
121
-
122
123
  def download_video(self, url: str, media_info: dict) -> dict:
123
124
  path = self._download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
124
- return {"local_media_path": str(path), "media_type": "video", "media_info": {"duration": round(media_info.video_duration)}}
125
+ return {"local_media_path": str(path), "media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration)}}
125
126
 
126
127
  def download_photo(self, url: str) -> dict:
127
- path = self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp')
128
- return {"local_media_path": str(path), "media_type": "image"}
128
+ path = str(self._download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp'))
129
+ if ".webp" in path:
130
+ path = InstagramScraper.convert_webp_to_png(path)
131
+ return {"local_media_path": path, "media_type": JobType.IMAGE}
129
132
 
130
133
  def download_story(self, story_info: Story) -> dict:
131
- path, media_type, media_info = "", "", {}
134
+ path, media_type, media_info = "", JobType.UNKNOWN, {}
132
135
  logging.info("Story id is '%s'", story_info.id)
133
136
  effective_story_id = story_info.id
134
137
  if '_' in effective_story_id:
@@ -138,32 +141,40 @@ class InstagramScraper(ScraperAbstract):
138
141
  logging.info("Effective story id is '%s'", effective_story_id)
139
142
  effective_url = "https://www.instagram.com/stories/%s/%s/" % (story_info.user.username, effective_story_id)
140
143
  if story_info.media_type == 1: # photo
141
- path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp')
142
- media_type = "image"
144
+ path = str(self._download_hndlr(self.cl.story_download_by_url, url=story_info.thumbnail_url, folder='/tmp'))
145
+ if ".webp" in path:
146
+ path = InstagramScraper.convert_webp_to_png(path)
147
+ media_type = JobType.IMAGE
143
148
  elif story_info.media_type == 2: # video
144
- path = self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp')
145
- media_type = "video"
149
+ path = str(self._download_hndlr(self.cl.story_download_by_url, url=story_info.video_url, folder='/tmp'))
150
+ media_type = JobType.VIDEO
146
151
  media_info["duration"] = story_info.video_duration
147
152
 
148
- return {"local_media_path": str(path), "media_type": media_type, "media_info": media_info, "effective_url": effective_url}
153
+ return {"local_media_path": path, "media_type": media_type, "media_info": media_info, "effective_url": effective_url}
149
154
 
150
155
  def download_stories(self, stories: list[Story]) -> dict:
151
- res = []
152
- for story in stories:
153
- res.append(self.download_story(story_info=story))
156
+ chunks = []
157
+ for stories_chunk in Utils.chunker(stories, 10):
158
+ chunk = []
159
+ for story in stories_chunk:
160
+ chunk.append(self.download_story(story_info=story))
161
+ chunks.append(chunk)
154
162
 
155
- return {"media_type": "collection", "save_items": True, "items": res}
163
+ return {"media_type": JobType.COLLECTION, "save_items": True, "items": chunks}
156
164
 
157
165
  def download_album(self, media_info: dict) -> dict:
158
- res = []
159
- for i in media_info.resources:
160
- _media_info = self.cl.media_info(i.pk)
161
- if i.media_type == 1: # photo
162
- res.append(self.download_photo(url=_media_info.thumbnail_url))
163
- elif i.media_type == 2: # video
164
- res.append(self.download_video(url=_media_info.video_url, media_info=_media_info))
165
-
166
- return {"media_type": "collection", "items": res}
166
+ chunks = []
167
+ for media_chunk in Utils.chunker(media_info.resources, 10):
168
+ chunk = []
169
+ for media in media_chunk:
170
+ _media_info = self.cl.media_info(media.pk)
171
+ if media.media_type == 1: # photo
172
+ chunk.append(self.download_photo(url=_media_info.thumbnail_url))
173
+ elif media.media_type == 2: # video
174
+ chunk.append(self.download_video(url=_media_info.video_url, media_info=_media_info))
175
+ chunks.append(chunk)
176
+
177
+ return {"media_type": JobType.COLLECTION, "items": chunks}
167
178
 
168
179
  def download(self, url: str) -> Optional[list[dict]]:
169
180
  res = []
@@ -194,7 +205,7 @@ class InstagramScraper(ScraperAbstract):
194
205
  logging.info("Waiting %d seconds according configuration option `IG_WAIT_TIMEOUT`", wait_timeout)
195
206
  if res:
196
207
  for i in res:
197
- if i["media_type"] == "collection":
208
+ if i["media_type"] == JobType.COLLECTION:
198
209
  for j in i["items"]:
199
210
  if os.path.exists(j["local_media_path"]):
200
211
  os.unlink(j["local_media_path"])