warp-beacon 2.8.13__tar.gz → 2.8.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {warp_beacon-2.8.13/warp_beacon.egg-info → warp_beacon-2.8.15}/PKG-INFO +1 -1
  2. warp_beacon-2.8.15/warp_beacon/__version__.py +2 -0
  3. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/jobs/abstract.py +7 -0
  4. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/jobs/types.py +2 -1
  5. warp_beacon-2.8.15/warp_beacon/scraper/X/X.py +304 -0
  6. warp_beacon-2.8.15/warp_beacon/scraper/X/types.py +8 -0
  7. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/__init__.py +4 -1
  8. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/fail_handler.py +22 -3
  9. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/storage/__init__.py +5 -6
  10. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/bot.py +18 -8
  11. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/handlers.py +23 -25
  12. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/uploader/__init__.py +52 -66
  13. {warp_beacon-2.8.13 → warp_beacon-2.8.15/warp_beacon.egg-info}/PKG-INFO +1 -1
  14. warp_beacon-2.8.13/warp_beacon/__version__.py +0 -2
  15. warp_beacon-2.8.13/warp_beacon/scraper/X/X.py +0 -330
  16. warp_beacon-2.8.13/warp_beacon/scraper/X/types.py +0 -7
  17. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/LICENSE +0 -0
  18. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/MANIFEST.in +0 -0
  19. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/README.md +0 -0
  20. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/assets/cc-group-black.png +0 -0
  21. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/assets/placeholder.gif +0 -0
  22. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/etc/.gitignore +0 -0
  23. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/etc/accounts.json +0 -0
  24. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/etc/proxies.json +0 -0
  25. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/etc/warp_beacon.conf +0 -0
  26. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/etc/warp_beacon.service +0 -0
  27. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/pyproject.toml +0 -0
  28. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/setup.cfg +0 -0
  29. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/setup.py +0 -0
  30. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/__init__.py +0 -0
  31. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/compress/__init__.py +0 -0
  32. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/compress/video.py +0 -0
  33. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/jobs/__init__.py +0 -0
  34. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/jobs/download_job.py +0 -0
  35. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/jobs/upload_job.py +0 -0
  36. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/mediainfo/__init__.py +0 -0
  37. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/mediainfo/abstract.py +0 -0
  38. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/mediainfo/audio.py +0 -0
  39. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/mediainfo/silencer.py +0 -0
  40. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/mediainfo/video.py +0 -0
  41. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scheduler/__init__.py +0 -0
  42. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scheduler/instagram_human.py +0 -0
  43. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scheduler/scheduler.py +0 -0
  44. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/X/__init__.py +0 -0
  45. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/X/abstract.py +0 -0
  46. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/abstract.py +0 -0
  47. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/account_selector.py +0 -0
  48. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/exceptions.py +0 -0
  49. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/instagram/__init__.py +0 -0
  50. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/instagram/captcha.py +0 -0
  51. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/instagram/instagram.py +0 -0
  52. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/instagram/wb_instagrapi.py +0 -0
  53. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/link_resolver.py +0 -0
  54. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/utils.py +0 -0
  55. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/youtube/__init__.py +0 -0
  56. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/youtube/abstract.py +0 -0
  57. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/youtube/music.py +0 -0
  58. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/youtube/shorts.py +0 -0
  59. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/scraper/youtube/youtube.py +0 -0
  60. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/storage/mongo.py +0 -0
  61. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/__init__.py +0 -0
  62. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/caption_shortener.py +0 -0
  63. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/download_status.py +0 -0
  64. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/edit_message.py +0 -0
  65. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/placeholder_message.py +0 -0
  66. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/progress_bar.py +0 -0
  67. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/progress_file_reader.py +0 -0
  68. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/types.py +0 -0
  69. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/telegram/utils.py +0 -0
  70. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/warp_beacon.py +0 -0
  71. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon/yt_auth.py +0 -0
  72. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon.egg-info/SOURCES.txt +0 -0
  73. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon.egg-info/dependency_links.txt +0 -0
  74. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon.egg-info/entry_points.txt +0 -0
  75. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon.egg-info/requires.txt +0 -0
  76. {warp_beacon-2.8.13 → warp_beacon-2.8.15}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.8.13
3
+ Version: 2.8.15
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -0,0 +1,2 @@
1
+ __version__ = "2.8.15"
2
+
@@ -109,6 +109,9 @@ class AbstractJob(ABC):
109
109
  if self.media_type == JobType.COLLECTION:
110
110
  if not self.media_collection:
111
111
  return True
112
+ elif self.media_type == JobType.TEXT:
113
+ if not self.message_text:
114
+ return True
112
115
  elif not self.local_media_path:
113
116
  return True
114
117
  return False
@@ -129,9 +132,13 @@ class AbstractJob(ABC):
129
132
  for j in i:
130
133
  if os.path.exists(j.local_media_path):
131
134
  os.unlink(j.local_media_path)
135
+ elif self.media_type == JobType.TEXT:
136
+ pass
132
137
  else:
133
138
  if os.path.exists(self.local_media_path):
134
139
  os.unlink(self.local_media_path)
135
140
  if self.local_compressed_media_path:
136
141
  if os.path.exists(self.local_compressed_media_path):
137
142
  os.unlink(self.local_compressed_media_path)
143
+
144
+ return True
@@ -6,4 +6,5 @@ class JobType(str, Enum):
6
6
  IMAGE = "image",
7
7
  AUDIO = "audio",
8
8
  COLLECTION = "collection"
9
- ANIMATION = "animation"
9
+ ANIMATION = "animation"
10
+ TEXT = "text"
@@ -0,0 +1,304 @@
1
+ import os
2
+ import time
3
+ import logging
4
+ from mimetypes import guess_extension, guess_type
5
+ from urllib.parse import urlparse
6
+ import requests
7
+ import yt_dlp
8
+ from playwright.sync_api import sync_playwright, Page
9
+
10
+ from warp_beacon.telegram.utils import Utils
11
+ from warp_beacon.scraper.utils import ScraperUtils
12
+ from warp_beacon.jobs.types import JobType
13
+ from warp_beacon.scraper.X.abstract import XAbstract
14
+
15
+ from warp_beacon.scraper.exceptions import Unavailable
16
+
17
+ class XScraper(XAbstract):
18
+ DOWNLOAD_DIR = "/tmp"
19
+
20
+ def extract_canonical_name(self, media: dict) -> str:
21
+ ret = ""
22
+ try:
23
+ if media.get("title", None):
24
+ ret = media["title"]
25
+ if media.get("description", ""):
26
+ ret += "\n" + media["description"]
27
+ except Exception as e:
28
+ logging.warning("Failed to extract canonical media name!")
29
+ logging.exception(e)
30
+
31
+ return ret
32
+
33
+ def generate_result(self, local_files: list, job_type: JobType, canonical_name: str = "", performer: str = "") -> list:
34
+ res = []
35
+ if local_files:
36
+ if job_type == JobType.COLLECTION:
37
+ chunks = []
38
+ for media_chunk in Utils.chunker(local_files, 10):
39
+ chunk = []
40
+ for media in media_chunk:
41
+ mime_type, _ = guess_type(media)
42
+ chunk.append({
43
+ "local_media_path": self.rename_local_file(media),
44
+ "canonical_name": canonical_name,
45
+ "media_type": JobType.VIDEO if "video" in mime_type else JobType.IMAGE,
46
+ "media_info": {}
47
+ })
48
+ chunks.append(chunk)
49
+ res.append({
50
+ "media_type": JobType.COLLECTION,
51
+ "canonical_name": canonical_name,
52
+ "items": chunks
53
+ })
54
+ else:
55
+ for local_file in local_files:
56
+ res.append({
57
+ "local_media_path": self.rename_local_file(local_file),
58
+ "performer": performer,
59
+ "canonical_name": canonical_name,
60
+ "media_type": job_type
61
+ })
62
+ logging.debug(res)
63
+ return res
64
+
65
+ def _download(self, url: str, timeout: int = 60) -> list:
66
+ res = []
67
+ post_text = ""
68
+ pw_proxy = None
69
+ if self.proxy:
70
+ dsn = self.proxy.get("dsn", "")
71
+ if dsn:
72
+ parsed = urlparse(dsn)
73
+ pw_proxy = {
74
+ "server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
75
+ "username": parsed.username,
76
+ "password": parsed.password
77
+ }
78
+ logging.info("[X] build proxy: %s", pw_proxy)
79
+
80
+ contains_images, contains_videos = False, False
81
+ images, videos = [], []
82
+ with sync_playwright() as p:
83
+ with p.chromium.launch(headless=True) as browser:
84
+ with browser.new_context(proxy=pw_proxy, ignore_https_errors=True) as context:
85
+ page = context.new_page()
86
+ page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
87
+ page.wait_for_selector("article[role='article']", timeout=(timeout*1000))
88
+
89
+ contains_videos = self.tweet_contains_video(page)
90
+ contains_images = self.tweet_contains_images(page)
91
+
92
+ if contains_images:
93
+ post_text, images = self.download_images(page, timeout)
94
+
95
+ if not contains_images and not contains_videos:
96
+ post_text = self.extract_post_text(page)
97
+
98
+ if contains_videos:
99
+ media_info, videos = self.download_videos(url, timeout)
100
+ if media_info:
101
+ post_text = self.extract_canonical_name(media_info)
102
+
103
+ if not images and not videos:
104
+ if not post_text:
105
+ raise Unavailable("Content unvailable")
106
+ logging.info("[X]: Sending text message")
107
+ res.append({
108
+ "message_text": post_text,
109
+ "media_type": JobType.TEXT
110
+ })
111
+ return res
112
+
113
+ if len(images) > 1 or len(videos) > 1:
114
+ logging.info("[X]: uploading collection")
115
+ content = images + videos
116
+ res.extend(self.generate_result(content, JobType.COLLECTION, canonical_name=post_text))
117
+ else:
118
+ logging.info("[X]: uploading media")
119
+ for job_type, content in {JobType.IMAGE: images, JobType.VIDEO: videos}.items():
120
+ if content:
121
+ res.extend(self.generate_result(content, job_type, canonical_name=post_text))
122
+
123
+ return res
124
+
125
+ def download_videos(self, url: str, timeout: int = 60) -> tuple[dict, list[str]]:
126
+ local_files = []
127
+ media_info = {}
128
+ time_name = str(time.time()).replace('.', '_')
129
+ ydl_opts = {
130
+ 'socket_timeout': timeout,
131
+ 'outtmpl': f'{self.DOWNLOAD_DIR}/x_download_{time_name}_%(id)s.%(ext)s',
132
+ 'quiet': False,
133
+ 'force_generic_extractor': False,
134
+ #'noplaylist': True,
135
+ 'merge_output_format': 'mp4',
136
+ 'dump_single_json': False,
137
+ 'nocheckcertificate': True,
138
+ 'progress_hooks': [self.dlp_on_progress],
139
+ }
140
+ if self.proxy:
141
+ proxy_dsn = self.proxy.get("dsn", "")
142
+ logging.info("[X] Using proxy DSN '%s'", proxy_dsn)
143
+ if proxy_dsn:
144
+ ydl_opts["proxy"] = proxy_dsn
145
+
146
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
147
+ info = ydl.extract_info(url, download=False)
148
+ media_info = info
149
+ entries = info.get("entries", [info])
150
+
151
+ for entry in entries:
152
+ ret = ydl.download([entry['webpage_url']])
153
+ if ret == 0:
154
+ file_path = ydl.prepare_filename(entry)
155
+ if isinstance(file_path, str):
156
+ local_files.append(file_path)
157
+ else:
158
+ local_files.extend(file_path)
159
+
160
+ return media_info, local_files
161
+
162
+ def adaptive_chunk_size(self, content_length: int) -> int:
163
+ if content_length < 100_000:
164
+ return 2048
165
+ elif content_length < 5_000_000:
166
+ return 8192
167
+ elif content_length < 100_000_000:
168
+ return 32768
169
+ else:
170
+ return 65536
171
+
172
+ def get_extension_from_headers(self, response: requests.Response) -> str:
173
+ content_type = response.headers.get("Content-Type", "")
174
+ return guess_extension(content_type) or ".jpg"
175
+
176
+ def download_images(self, page: Page, timeout: int) -> tuple[str, list[str]]:
177
+ downloaded_imgs = []
178
+ headers = {
179
+ "User-Agent": ScraperUtils.get_ua(),
180
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
181
+ "Accept-Language": "en-us,en;q=0.5",
182
+ "Sec-Fetch-Mode": "navigate"
183
+ }
184
+ proxies = None
185
+ if self.proxy:
186
+ proxies = {"https": self.proxy.get("dsn", ""), "http": self.proxy.get("dsn", "")}
187
+
188
+ image_urls, post_text = self.extract_image_urls_from_x_post(page, timeout)
189
+
190
+ if not image_urls:
191
+ logging.error("[X] Content images are not found!")
192
+ return downloaded_imgs
193
+
194
+ time_name = str(time.time()).replace('.', '_')
195
+ for i, img_url in enumerate(set(image_urls)):
196
+ downloaded = 0
197
+ if "?name=small" in img_url:
198
+ img_url = img_url.replace("?name=small", "?name=orig")
199
+ with requests.get(
200
+ img_url,
201
+ headers=headers,
202
+ timeout=timeout,
203
+ stream=True,
204
+ verify=False,
205
+ proxies=proxies) as request:
206
+
207
+ request.raise_for_status()
208
+
209
+ parsed = urlparse(img_url)
210
+ ext = os.path.splitext(parsed.path)[1]
211
+ if not ext:
212
+ ext = self.get_extension_from_headers(request)
213
+ filename = f"x_download_{time_name}_{i}{ext}"
214
+ filepath = os.path.join(self.DOWNLOAD_DIR, filename)
215
+
216
+ content_length = int(request.headers.get("Content-Length", 0))
217
+
218
+ with open(filepath, "wb") as f:
219
+ #request.raw.decode_content = True
220
+ chunk_size = self.adaptive_chunk_size(content_length)
221
+ for chunk in request.iter_content(chunk_size=chunk_size):
222
+ if chunk:
223
+ f.write(chunk)
224
+ downloaded += len(chunk)
225
+ self.download_progress(
226
+ total=content_length or None,
227
+ bytes_transferred=downloaded,
228
+ path=filepath
229
+ )
230
+ downloaded_imgs.append(filepath)
231
+
232
+ return post_text, downloaded_imgs
233
+
234
+ def extract_image_urls_from_x_post(self, page: Page, timeout: int) -> tuple[list[str], str]:
235
+ img_urls, post_text = [], ''
236
+
237
+ page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
238
+ post_text = self.extract_post_text(page)
239
+
240
+ image_elements = page.query_selector_all("img")
241
+ image_urls = []
242
+
243
+ for img in image_elements:
244
+ src = img.get_attribute("src")
245
+ if src and "pbs.twimg.com/media" in src:
246
+ image_urls.append(src)
247
+
248
+ img_urls = list(set(image_urls))
249
+ return img_urls, post_text
250
+
251
+ def tweet_contains_video(self, page: Page) -> bool:
252
+ try:
253
+ return bool(
254
+ page.query_selector("article video") or
255
+ page.query_selector("div[data-testid='videoPlayer']") or
256
+ page.query_selector("div[aria-label='Embedded video']")
257
+ )
258
+ except Exception:
259
+ pass
260
+ return False
261
+
262
+ def tweet_contains_images(self, page: Page) -> bool:
263
+ try:
264
+ image_elements = page.query_selector_all("img")
265
+ image_urls = [
266
+ img.get_attribute("src")
267
+ for img in image_elements
268
+ if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
269
+ ]
270
+ return bool(image_urls)
271
+ except Exception:
272
+ pass
273
+ return False
274
+
275
+ def extract_post_text(self, page: Page) -> str:
276
+ try:
277
+ text_fragments = []
278
+
279
+ # find tweetText containers (in main and quoted)
280
+ containers = page.query_selector_all('div[data-testid="tweetText"]')
281
+ for container in containers:
282
+ fragments = []
283
+
284
+ # find <span> and <img alt=...> inside text
285
+ for node in container.query_selector_all("span, img"):
286
+ tag = node.evaluate("node => node.tagName.toLowerCase()")
287
+ if tag == "span":
288
+ value = node.inner_text().strip()
289
+ if value:
290
+ fragments.append(value)
291
+ elif tag == "img":
292
+ # emoji as image
293
+ alt = node.get_attribute("alt")
294
+ if alt:
295
+ fragments.append(alt)
296
+
297
+ if fragments:
298
+ text_fragments.append("".join(fragments))
299
+
300
+ return "\n\n".join(text_fragments).strip()
301
+
302
+ except Exception as e:
303
+ logging.warning("X: [extract_post_text] error", exc_info=e)
304
+ return ""
@@ -0,0 +1,8 @@
1
+ from enum import Flag, auto
2
+
3
+ class XMediaType(Flag):
4
+ UNKNOWN = 0
5
+ VIDEO = auto()
6
+ IMAGE = auto()
7
+ MIXED = auto()
8
+ PLAYLIST = auto()
@@ -318,7 +318,8 @@ class AsyncDownloader(object):
318
318
  if items:
319
319
  # success
320
320
  for job in fail_handler.get_failed_jobs():
321
- self.queue_task(job)
321
+ self.queue_task(job["job"])
322
+ # media info processing
322
323
  for item in items:
323
324
  media_info = {"filesize": 0}
324
325
  if item["media_type"] == JobType.VIDEO:
@@ -372,6 +373,8 @@ class AsyncDownloader(object):
372
373
  job_args["media_collection"] = item["items"]
373
374
  if item.get("save_items", None) is not None:
374
375
  job_args["save_items"] = item.get("save_items", False)
376
+ elif item["media_type"] == JobType.TEXT:
377
+ job_args["message_text"] = item.get("message_text", "")
375
378
  else:
376
379
  job_args["local_media_path"] = item["local_media_path"]
377
380
  if item.get("local_compressed_media_path", None):
@@ -16,12 +16,15 @@ class FailHandler(object):
16
16
  self.client.close()
17
17
 
18
18
  def store_failed_job(self, job: DownloadJob) -> int:
19
- db_id = -1
19
+ db_id = ""
20
20
  try:
21
21
  job_serilized = pickle.dumps(job)
22
22
  db_id = self.db.insert_one(
23
23
  {
24
- "job_data": job_serilized
24
+ "job_data": job_serilized,
25
+ "uniq_id": job.uniq_id,
26
+ "message_id": job.message_id,
27
+ "chat_id": job.chat_id
25
28
  }).inserted_id
26
29
  except Exception as e:
27
30
  logging.error("Failed to store job as failed!")
@@ -33,10 +36,26 @@ class FailHandler(object):
33
36
  try:
34
37
  cursor = self.db.find()
35
38
  for document in cursor:
36
- ret.append(pickle.loads(document["job_data"]))
39
+ ret.append({
40
+ "_id": document["_id"],
41
+ "job": pickle.loads(document["job_data"]),
42
+ "uniq_id": document.get("uniq_id"),
43
+ "message_id": document.get("message_id"),
44
+ "chat_id": document.get("chat_id")
45
+ })
37
46
  if clean:
38
47
  self.db.delete_many({})
39
48
  except Exception as e:
40
49
  logging.error("Failed to get failed jobs!")
41
50
  logging.exception(e)
42
51
  return ret
52
+
53
+ def remove_failed_job(self, uniq_id: str) -> bool:
54
+ try:
55
+ result = self.db.delete_one({"uniq_id": uniq_id})
56
+ if result.deleted_count > 0:
57
+ return True
58
+ except Exception as e:
59
+ logging.error("Failed to remove failed job!", exc_info=e)
60
+
61
+ return False
@@ -67,7 +67,8 @@ class Storage(object):
67
67
  "uniq_id": document["uniq_id"],
68
68
  "tg_file_id": document["tg_file_id"],
69
69
  "media_type": document["media_type"],
70
- "canonical_name": document.get("canonical_name")
70
+ "canonical_name": document.get("canonical_name"),
71
+ "message_text": document.get("message_text")
71
72
  })
72
73
  except Exception as e:
73
74
  logging.error("Error occurred while trying to read from the database!")
@@ -82,13 +83,10 @@ class Storage(object):
82
83
  def db_lookup_id(self, uniq_id: str) -> list[dict]:
83
84
  return self.db_find(uniq_id)
84
85
 
85
- def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "") -> list[int]:
86
+ def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "", message_text: str = "") -> list[int]:
86
87
  uniq_id = self.compute_uniq(media_url)
87
88
  media_ids = []
88
89
  for tg_file_id in tg_file_ids:
89
- if not tg_file_id:
90
- logging.warning("Passed empty `tg_file_id`! Skipping.")
91
- continue
92
90
  if self.db_lookup_id(uniq_id):
93
91
  logging.info("Detected existing uniq_id, skipping storage write operation")
94
92
  continue
@@ -98,7 +96,8 @@ class Storage(object):
98
96
  "media_type": media_type,
99
97
  "tg_file_id": tg_file_id,
100
98
  "origin": origin,
101
- "canonical_name": canonical_name
99
+ "canonical_name": canonical_name,
100
+ "message_text": message_text
102
101
  }).inserted_id)
103
102
 
104
103
  return media_ids
@@ -393,6 +393,9 @@ class Bot(object):
393
393
  tg_chunk.append(anim)
394
394
  mediafs.append(tg_chunk)
395
395
  args["media"] = mediafs
396
+ elif job.media_type == JobType.TEXT:
397
+ args["text"] = f"<b>Post text:</b><pre>{job.message_text}</pre>\n\n{self.build_signature_caption(job)}"
398
+ args["parse_mode"] = ParseMode.HTML
396
399
 
397
400
  args["chat_id"] = job.chat_id
398
401
 
@@ -412,7 +415,7 @@ class Bot(object):
412
415
  if render_donates:
413
416
  keyboard_buttons[0].append(InlineKeyboardButton("❤ Donate", url=os.environ.get("DONATE_LINK", "https://pay.cryptocloud.plus/pos/W5BMtNQt5bJFoW2E")))
414
417
 
415
- if keyboard_buttons[0]: #job.short_text or render_donates:
418
+ if keyboard_buttons[0]:
416
419
  args["reply_markup"] = InlineKeyboardMarkup(keyboard_buttons)
417
420
 
418
421
  return args
@@ -425,9 +428,14 @@ class Bot(object):
425
428
  while not retry_amount >= max_retries:
426
429
  try:
427
430
  reply_message = None
428
- if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION):
429
- if job.media_type in (JobType.VIDEO, JobType.AUDIO):
430
- await Utils.ensure_me_loaded(self.client)
431
+ if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION, JobType.TEXT):
432
+ #if job.media_type in (JobType.VIDEO, JobType.AUDIO):
433
+ # await Utils.ensure_me_loaded(self.client)
434
+ if job.media_type == JobType.TEXT:
435
+ if job.placeholder_message_id:
436
+ await self.placeholder.remove(job.chat_id, job.placeholder_message_id)
437
+ job.placeholder_message_id = None
438
+
431
439
  if job.placeholder_message_id:
432
440
  try:
433
441
  reply_message = await self.editor.edit(**self.build_tg_args(job))
@@ -440,7 +448,8 @@ class Bot(object):
440
448
  JobType.VIDEO: self.client.send_video,
441
449
  JobType.IMAGE: self.client.send_photo,
442
450
  JobType.AUDIO: self.client.send_audio,
443
- JobType.ANIMATION: self.client.send_animation
451
+ JobType.ANIMATION: self.client.send_animation,
452
+ JobType.TEXT: self.client.send_message
444
453
  }
445
454
  try:
446
455
  while True:
@@ -460,9 +469,10 @@ class Bot(object):
460
469
  job_args[reality.value.lower()] = job_args.pop(expectation.value.lower())
461
470
  reply_message = await send_funcs[reality](**job_args)
462
471
 
463
- tg_file_id = Utils.extract_file_id(reply_message)
464
- tg_file_ids.append(tg_file_id)
465
- job.tg_file_id = tg_file_id
472
+ if reply_message:
473
+ tg_file_id = Utils.extract_file_id(reply_message)
474
+ tg_file_ids.append(tg_file_id)
475
+ job.tg_file_id = tg_file_id
466
476
  logging.info("Uploaded media file with type '%s' tg_file_id is '%s'", job.media_type.value, job.tg_file_id)
467
477
  elif job.media_type == JobType.COLLECTION:
468
478
  col_job_args = self.build_tg_args(job)
@@ -14,8 +14,6 @@ from warp_beacon.jobs.upload_job import UploadJob
14
14
  from warp_beacon.jobs import Origin
15
15
  from warp_beacon.jobs.types import JobType
16
16
  from warp_beacon.scraper.link_resolver import LinkResolver
17
- from warp_beacon.scraper.fail_handler import FailHandler
18
- from warp_beacon.storage.mongo import DBClient
19
17
 
20
18
  class Handlers(object):
21
19
  storage = None
@@ -25,12 +23,7 @@ class Handlers(object):
25
23
  def __init__(self, bot: "Bot") -> None:
26
24
  self.bot = bot
27
25
  self.storage = bot.storage
28
- # add uploader callbacks to handle service restart
29
- for job in FailHandler(DBClient()).get_failed_jobs(clean=False):
30
- self.bot.uploader.add_callback(
31
- job.placeholder_message_id,
32
- self.upload_wrapper
33
- )
26
+ self.bot.uploader.uploader_wrapper = self.upload_wrapper
34
27
 
35
28
  async def help(self, _: Client, message: Message) -> None:
36
29
  """Send a message when the command /help is issued."""
@@ -111,6 +104,14 @@ class Handlers(object):
111
104
  origin=job.job_origin.value,
112
105
  canonical_name=common_canonical_name
113
106
  )
107
+ elif job.media_type == JobType.TEXT:
108
+ self.storage.add_media(
109
+ tg_file_ids=[None],
110
+ media_url=job.url,
111
+ media_type=job.media_type.value,
112
+ origin=job.job_origin.value,
113
+ message_text=job.message_text
114
+ )
114
115
  else:
115
116
  self.storage.add_media(
116
117
  tg_file_ids=[','.join(tg_file_ids)],
@@ -139,11 +140,6 @@ class Handlers(object):
139
140
  text="Failed to create message placeholder. Please check your bot Internet connection."
140
141
  )
141
142
 
142
- self.bot.uploader.add_callback(
143
- job.placeholder_message_id,
144
- self.upload_wrapper
145
- )
146
-
147
143
  self.bot.downloader.queue_task(job)
148
144
  except Exception as e:
149
145
  logging.error("Failed to schedule download task!")
@@ -215,6 +211,7 @@ class Handlers(object):
215
211
  elif ent_len:
216
212
  media_type = JobType[entities[0]["media_type"].upper()]
217
213
  canonical_name = entities[0]["canonical_name"]
214
+ message_text = entities[0]["message_text"]
218
215
  await self.bot.upload_job(
219
216
  UploadJob(
220
217
  url=url,
@@ -228,22 +225,23 @@ class Handlers(object):
228
225
  chat_type=message.chat.type,
229
226
  source_username=Utils.extract_message_author(message),
230
227
  canonical_name=canonical_name,
231
- message_leftover=msg_leftover
228
+ message_leftover=msg_leftover,
229
+ message_text=message_text
232
230
  )
233
231
  )
234
232
  else:
235
233
  if await self.queue_job(DownloadJob.build(
236
- url=url,
237
- message_id=effective_message_id,
238
- chat_id=chat.id,
239
- user_id=message.from_user.id,
240
- in_process=self.bot.uploader.is_inprocess(uniq_id),
241
- uniq_id=uniq_id,
242
- job_origin=origin,
243
- source_username=Utils.extract_message_author(message),
244
- chat_type=chat.type,
245
- message_leftover=msg_leftover
246
- )):
234
+ url=url,
235
+ message_id=effective_message_id,
236
+ chat_id=chat.id,
237
+ user_id=message.from_user.id,
238
+ in_process=self.bot.uploader.is_inprocess(uniq_id),
239
+ uniq_id=uniq_id,
240
+ job_origin=origin,
241
+ source_username=Utils.extract_message_author(message),
242
+ chat_type=chat.type,
243
+ message_leftover=msg_leftover
244
+ )):
247
245
  self.bot.uploader.set_inprocess(uniq_id)
248
246
 
249
247
  if chat.type not in (ChatType.GROUP, ChatType.SUPERGROUP) and not urls: