warp-beacon 2.8.12__py3-none-any.whl → 2.8.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- __version__ = "2.8.12"
1
+ __version__ = "2.8.14"
2
2
 
@@ -109,6 +109,9 @@ class AbstractJob(ABC):
109
109
  if self.media_type == JobType.COLLECTION:
110
110
  if not self.media_collection:
111
111
  return True
112
+ elif self.media_type == JobType.TEXT:
113
+ if not self.message_text:
114
+ return True
112
115
  elif not self.local_media_path:
113
116
  return True
114
117
  return False
@@ -129,9 +132,13 @@ class AbstractJob(ABC):
129
132
  for j in i:
130
133
  if os.path.exists(j.local_media_path):
131
134
  os.unlink(j.local_media_path)
135
+ elif self.media_type == JobType.TEXT:
136
+ pass
132
137
  else:
133
138
  if os.path.exists(self.local_media_path):
134
139
  os.unlink(self.local_media_path)
135
140
  if self.local_compressed_media_path:
136
141
  if os.path.exists(self.local_compressed_media_path):
137
142
  os.unlink(self.local_compressed_media_path)
143
+
144
+ return True
warp_beacon/jobs/types.py CHANGED
@@ -6,4 +6,5 @@ class JobType(str, Enum):
6
6
  IMAGE = "image",
7
7
  AUDIO = "audio",
8
8
  COLLECTION = "collection"
9
- ANIMATION = "animation"
9
+ ANIMATION = "animation"
10
+ TEXT = "text"
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import time
3
3
  import logging
4
- from mimetypes import guess_extension
4
+ from mimetypes import guess_extension, guess_type
5
5
  from urllib.parse import urlparse
6
6
  import requests
7
7
  import yt_dlp
@@ -9,10 +9,11 @@ from playwright.sync_api import sync_playwright, Page
9
9
 
10
10
  from warp_beacon.telegram.utils import Utils
11
11
  from warp_beacon.scraper.utils import ScraperUtils
12
- from warp_beacon.scraper.X.types import XMediaType
13
12
  from warp_beacon.jobs.types import JobType
14
13
  from warp_beacon.scraper.X.abstract import XAbstract
15
14
 
15
+ from warp_beacon.scraper.exceptions import Unavailable
16
+
16
17
  class XScraper(XAbstract):
17
18
  DOWNLOAD_DIR = "/tmp"
18
19
 
@@ -29,107 +30,134 @@ class XScraper(XAbstract):
29
30
 
30
31
  return ret
31
32
 
32
- def get_media_type(self, media_info: dict) -> XMediaType:
33
- media_type = XMediaType.UNKNOWN
34
- #logging.info("[X] post info: '%s'", media_info)
35
-
36
- if 'ext' in media_info:
37
- logging.info("[X] Format: '%s'", media_info['ext'])
38
- if 'formats' in media_info:
39
- logging.info("[X] Contains video.")
40
- media_type = XMediaType.VIDEO
41
- elif 'thumbnails' in media_info:
42
- logging.info("[X] contains images.")
43
- media_type = XMediaType.IMAGE
44
- else:
45
- logging.info("[X] No media found.")
46
-
47
- return media_type
33
+ def generate_result(self, local_files: list, job_type: JobType, canonical_name: str = "", performer: str = "") -> list:
34
+ res = []
35
+ if local_files:
36
+ if job_type == JobType.COLLECTION:
37
+ chunks = []
38
+ for media_chunk in Utils.chunker(local_files, 10):
39
+ chunk = []
40
+ for media in media_chunk:
41
+ mime_type, _ = guess_type(media)
42
+ chunk.append({
43
+ "local_media_path": self.rename_local_file(media),
44
+ "canonical_name": canonical_name,
45
+ "media_type": JobType.VIDEO if "video" in mime_type else JobType.IMAGE,
46
+ "media_info": {}
47
+ })
48
+ chunks.append(chunk)
49
+ res.append({
50
+ "media_type": JobType.COLLECTION,
51
+ "canonical_name": canonical_name,
52
+ "items": chunks
53
+ })
54
+ else:
55
+ for local_file in local_files:
56
+ res.append({
57
+ "local_media_path": self.rename_local_file(local_file),
58
+ "performer": performer,
59
+ "canonical_name": canonical_name,
60
+ "media_type": job_type
61
+ })
62
+ logging.debug(res)
63
+ return res
48
64
 
49
65
  def _download(self, url: str, timeout: int = 60) -> list:
50
66
  res = []
51
- job_type = JobType.UNKNOWN
67
+ post_text = ""
68
+ pw_proxy = None
69
+ if self.proxy:
70
+ dsn = self.proxy.get("dsn", "")
71
+ if dsn:
72
+ parsed = urlparse(dsn)
73
+ pw_proxy = {
74
+ "server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
75
+ "username": parsed.username,
76
+ "password": parsed.password
77
+ }
78
+ logging.info("[X] build proxy: %s", pw_proxy)
79
+
80
+ contains_images, contains_videos = False, False
81
+ images, videos = [], []
82
+ with sync_playwright() as p:
83
+ with p.chromium.launch(headless=True) as browser:
84
+ with browser.new_context(proxy=pw_proxy, ignore_https_errors=True) as context:
85
+ page = context.new_page()
86
+ page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
87
+ page.wait_for_selector("article[role='article']", timeout=(timeout*1000))
88
+
89
+ contains_videos = self.tweet_contains_video(page)
90
+ contains_images = self.tweet_contains_images(page)
91
+
92
+ if contains_images:
93
+ post_text, images = self.download_images(page, timeout)
94
+
95
+ if not contains_images and not contains_videos:
96
+ post_text = self.extract_post_text(page)
97
+
98
+ if contains_videos:
99
+ media_info, videos = self.download_videos(url, timeout)
100
+ if media_info:
101
+ post_text = self.extract_canonical_name(media_info)
102
+
103
+ if not images and not videos:
104
+ if not post_text:
105
+ raise Unavailable("Content unvailable")
106
+ logging.info("[X]: Sending text message")
107
+ res.append({
108
+ "message_text": post_text,
109
+ "media_type": JobType.TEXT
110
+ })
111
+ return res
112
+
113
+ if len(images) > 1 or len(videos) > 1:
114
+ logging.info("[X]: uploading collection")
115
+ content = images + videos
116
+ res.extend(self.generate_result(content, JobType.COLLECTION, canonical_name=post_text))
117
+ else:
118
+ logging.info("[X]: uploading media")
119
+ for job_type, content in {JobType.IMAGE: images, JobType.VIDEO: videos}.items():
120
+ if content:
121
+ res.extend(self.generate_result(content, job_type, canonical_name=post_text))
122
+
123
+ return res
124
+
125
+ def download_videos(self, url: str, timeout: int = 60) -> tuple[dict, list[str]]:
126
+ local_files = []
127
+ media_info = {}
52
128
  time_name = str(time.time()).replace('.', '_')
53
129
  ydl_opts = {
54
130
  'socket_timeout': timeout,
55
- 'outtmpl': f'{self.DOWNLOAD_DIR}/x_download_{time_name}.%(ext)s',
131
+ 'outtmpl': f'{self.DOWNLOAD_DIR}/x_download_{time_name}_%(id)s.%(ext)s',
56
132
  'quiet': False,
57
133
  'force_generic_extractor': False,
58
- 'noplaylist': True,
134
+ #'noplaylist': True,
59
135
  'merge_output_format': 'mp4',
60
- 'dump_single_json': True,
136
+ 'dump_single_json': False,
61
137
  'nocheckcertificate': True,
62
138
  'progress_hooks': [self.dlp_on_progress],
63
139
  }
64
-
65
140
  if self.proxy:
66
141
  proxy_dsn = self.proxy.get("dsn", "")
67
142
  logging.info("[X] Using proxy DSN '%s'", proxy_dsn)
68
143
  if proxy_dsn:
69
144
  ydl_opts["proxy"] = proxy_dsn
70
145
 
71
- local_file, media_info, media_type, post_text = "", {}, XMediaType.UNKNOWN, ""
72
- #tweet_contains_video, tweet_contains_images = False, False
73
-
74
- #with sync_playwright() as p:
75
- # with p.chromium.launch(headless=True) as browser:
76
- # with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
77
- # page = context.new_page()
78
- # page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
79
- # tweet_contains_video = self.tweet_contains_video(page)
80
- # tweet_contains_images = self.tweet_contains_images(page)
81
-
82
146
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
83
- try:
84
- media_info = ydl.extract_info(url, download=False)
85
- media_type = self.get_media_type(media_info)
86
- if media_type == XMediaType.VIDEO:
87
- local_file = self.download_video(url, ydl, media_info)
88
- post_text = self.extract_canonical_name(media_info)
89
- job_type = JobType.VIDEO
90
- except yt_dlp.utils.DownloadError as e:
91
- msg = str(e).lower()
92
- if "no video could be found in this tweet" in msg:
93
- logging.warning("[X] yt_dlp failed to extract info. Falling back to image scraping.")
94
- media_type = XMediaType.IMAGE
95
- else:
96
- raise
97
-
98
- images = []
99
- if media_type == XMediaType.IMAGE:
100
- job_type = JobType.IMAGE
101
- images, post_text = self.download_images(url, timeout)
102
- if images:
103
- if len(images) > 1:
104
- job_type = JobType.COLLECTION
105
- else:
106
- local_file = images[0]
107
-
108
- if job_type == JobType.COLLECTION:
109
- chunks = []
110
- for media_chunk in Utils.chunker(images, 10):
111
- chunk = []
112
- for media in media_chunk:
113
- chunk.append({
114
- "local_media_path": self.rename_local_file(media),
115
- "canonical_name": post_text,
116
- "media_type": JobType.IMAGE
117
- })
118
- chunks.append(chunk)
119
- res.append({
120
- "media_type": JobType.COLLECTION,
121
- "items": chunks
122
- })
123
- else:
124
- if local_file:
125
- res.append({
126
- "local_media_path": self.rename_local_file(local_file),
127
- "performer": media_info.get("uploader", "Unknown"),
128
- "canonical_name": post_text,
129
- "media_type": job_type
130
- })
147
+ info = ydl.extract_info(url, download=False)
148
+ media_info = info
149
+ entries = info.get("entries", [info])
150
+
151
+ for entry in entries:
152
+ ret = ydl.download([entry['webpage_url']])
153
+ if ret == 0:
154
+ file_path = ydl.prepare_filename(entry)
155
+ if isinstance(file_path, str):
156
+ local_files.append(file_path)
157
+ else:
158
+ local_files.extend(file_path)
131
159
 
132
- return res
160
+ return media_info, local_files
133
161
 
134
162
  def adaptive_chunk_size(self, content_length: int) -> int:
135
163
  if content_length < 100_000:
@@ -141,20 +169,11 @@ class XScraper(XAbstract):
141
169
  else:
142
170
  return 65536
143
171
 
144
- def download_video(self, url: str, ydl: yt_dlp.YoutubeDL, media_info: dict) -> str:
145
- local_file = ""
146
- ydl.download([url])
147
- local_file = ydl.prepare_filename(media_info)
148
- logging.debug("Temp filename: '%s'", local_file)
149
- if local_file:
150
- local_file = self.rename_local_file(local_file)
151
- return local_file
152
-
153
172
  def get_extension_from_headers(self, response: requests.Response) -> str:
154
173
  content_type = response.headers.get("Content-Type", "")
155
174
  return guess_extension(content_type) or ".jpg"
156
175
 
157
- def download_images(self, url: str, timeout: int = 60) -> tuple[list[str], str]:
176
+ def download_images(self, page: Page, timeout: int) -> tuple[str, list[str]]:
158
177
  downloaded_imgs = []
159
178
  headers = {
160
179
  "User-Agent": ScraperUtils.get_ua(),
@@ -166,7 +185,7 @@ class XScraper(XAbstract):
166
185
  if self.proxy:
167
186
  proxies = {"https": self.proxy.get("dsn", ""), "http": self.proxy.get("dsn", "")}
168
187
 
169
- image_urls, post_text = self.extract_image_urls_from_x_post(url, timeout=timeout)
188
+ image_urls, post_text = self.extract_image_urls_from_x_post(page, timeout)
170
189
 
171
190
  if not image_urls:
172
191
  logging.error("[X] Content images are not found!")
@@ -210,101 +229,25 @@ class XScraper(XAbstract):
210
229
  )
211
230
  downloaded_imgs.append(filepath)
212
231
 
213
- return downloaded_imgs, post_text
232
+ return post_text, downloaded_imgs
214
233
 
215
- def extract_post_text(self, page: Page) -> str:
216
- try:
217
- tweet_texts = []
218
- # collecting text blocks from post
219
- containers = page.query_selector_all('div[data-testid="tweetText"]')
220
- for container in containers:
221
- try:
222
- spans = container.query_selector_all("span")
223
- if spans:
224
- for span in spans:
225
- text = span.inner_text().strip()
226
- if text:
227
- tweet_texts.append(text)
228
- else:
229
- # to span's try container itself
230
- text = container.inner_text().strip()
231
- if text:
232
- tweet_texts.append(text)
233
- except Exception:
234
- continue
235
-
236
- return " ".join(tweet_texts).strip()
237
- except Exception as e:
238
- logging.warning("Failed to extract tweet text.", exc_info=e)
239
-
240
- return ""
241
-
242
- def extract_image_urls_from_x_post(self, url: str, timeout: int = 60) -> tuple[list[str], str]:
234
+ def extract_image_urls_from_x_post(self, page: Page, timeout: int) -> tuple[list[str], str]:
243
235
  img_urls, post_text = [], ''
244
236
 
245
- proxy = None
246
- if self.proxy:
247
- dsn = self.proxy.get("dsn", "")
248
- if dsn:
249
- parsed = urlparse(dsn)
250
- proxy = {
251
- "server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
252
- "username": parsed.username,
253
- "password": parsed.password
254
- }
255
- logging.info("[X] build proxy: %s", proxy)
256
-
257
- with sync_playwright() as p:
258
- with p.chromium.launch(headless=True) as browser:
259
- with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
260
- page = context.new_page()
261
- page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
262
-
263
- #page.wait_for_timeout(3000)
264
- page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
265
- post_text = self.extract_post_text(page)
237
+ page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
238
+ post_text = self.extract_post_text(page)
266
239
 
267
- image_elements = page.query_selector_all("img")
268
- image_urls = []
240
+ image_elements = page.query_selector_all("img")
241
+ image_urls = []
269
242
 
270
- for img in image_elements:
271
- src = img.get_attribute("src")
272
- if src and "pbs.twimg.com/media" in src:
273
- image_urls.append(src)
243
+ for img in image_elements:
244
+ src = img.get_attribute("src")
245
+ if src and "pbs.twimg.com/media" in src:
246
+ image_urls.append(src)
274
247
 
275
- img_urls = list(set(image_urls))
248
+ img_urls = list(set(image_urls))
276
249
  return img_urls, post_text
277
250
 
278
- def get_media_type_from_info_and_dom(self, media_info: dict, page: Page) -> XMediaType:
279
- is_video = (
280
- media_info.get("vcodec") != "none" or
281
- media_info.get("ext") in {"mp4", "mov", "mkv"} or
282
- any(
283
- f.get("vcodec") not in (None, "none")
284
- for f in media_info.get("formats", [])
285
- )
286
- )
287
-
288
- try:
289
- image_elements = page.query_selector_all("img")
290
- image_urls = [
291
- img.get_attribute("src")
292
- for img in image_elements
293
- if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
294
- ]
295
- has_images = bool(image_urls)
296
- except Exception:
297
- has_images = False
298
-
299
- if is_video and has_images:
300
- return XMediaType.MIXED
301
- elif is_video:
302
- return XMediaType.VIDEO
303
- elif has_images:
304
- return XMediaType.IMAGE
305
-
306
- return XMediaType.UNKNOWN
307
-
308
251
  def tweet_contains_video(self, page: Page) -> bool:
309
252
  try:
310
253
  return bool(
@@ -327,4 +270,35 @@ class XScraper(XAbstract):
327
270
  return bool(image_urls)
328
271
  except Exception:
329
272
  pass
330
- return False
273
+ return False
274
+
275
+ def extract_post_text(self, page: Page) -> str:
276
+ try:
277
+ text_fragments = []
278
+
279
+ # find tweetText containers (in main and quoted)
280
+ containers = page.query_selector_all('div[data-testid="tweetText"]')
281
+ for container in containers:
282
+ fragments = []
283
+
284
+ # find <span> and <img alt=...> inside text
285
+ for node in container.query_selector_all("span, img"):
286
+ tag = node.evaluate("node => node.tagName.toLowerCase()")
287
+ if tag == "span":
288
+ value = node.inner_text().strip()
289
+ if value:
290
+ fragments.append(value)
291
+ elif tag == "img":
292
+ # emoji as image
293
+ alt = node.get_attribute("alt")
294
+ if alt:
295
+ fragments.append(alt)
296
+
297
+ if fragments:
298
+ text_fragments.append("".join(fragments))
299
+
300
+ return "\n\n".join(text_fragments).strip()
301
+
302
+ except Exception as e:
303
+ logging.warning("X: [extract_post_text] error", exc_info=e)
304
+ return ""
@@ -1,7 +1,8 @@
1
- import enum
1
+ from enum import Flag, auto
2
2
 
3
- class XMediaType(enum.Enum):
3
+ class XMediaType(Flag):
4
4
  UNKNOWN = 0
5
- VIDEO = 1
6
- IMAGE = 2
7
- MIXED = 3
5
+ VIDEO = auto()
6
+ IMAGE = auto()
7
+ MIXED = auto()
8
+ PLAYLIST = auto()
@@ -319,6 +319,7 @@ class AsyncDownloader(object):
319
319
  # success
320
320
  for job in fail_handler.get_failed_jobs():
321
321
  self.queue_task(job)
322
+ # media info processing
322
323
  for item in items:
323
324
  media_info = {"filesize": 0}
324
325
  if item["media_type"] == JobType.VIDEO:
@@ -372,6 +373,8 @@ class AsyncDownloader(object):
372
373
  job_args["media_collection"] = item["items"]
373
374
  if item.get("save_items", None) is not None:
374
375
  job_args["save_items"] = item.get("save_items", False)
376
+ elif item["media_type"] == JobType.TEXT:
377
+ job_args["message_text"] = item.get("message_text", "")
375
378
  else:
376
379
  job_args["local_media_path"] = item["local_media_path"]
377
380
  if item.get("local_compressed_media_path", None):
@@ -67,7 +67,8 @@ class Storage(object):
67
67
  "uniq_id": document["uniq_id"],
68
68
  "tg_file_id": document["tg_file_id"],
69
69
  "media_type": document["media_type"],
70
- "canonical_name": document.get("canonical_name")
70
+ "canonical_name": document.get("canonical_name"),
71
+ "message_text": document.get("message_text")
71
72
  })
72
73
  except Exception as e:
73
74
  logging.error("Error occurred while trying to read from the database!")
@@ -82,13 +83,10 @@ class Storage(object):
82
83
  def db_lookup_id(self, uniq_id: str) -> list[dict]:
83
84
  return self.db_find(uniq_id)
84
85
 
85
- def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "") -> list[int]:
86
+ def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "", message_text: str = "") -> list[int]:
86
87
  uniq_id = self.compute_uniq(media_url)
87
88
  media_ids = []
88
89
  for tg_file_id in tg_file_ids:
89
- if not tg_file_id:
90
- logging.warning("Passed empty `tg_file_id`! Skipping.")
91
- continue
92
90
  if self.db_lookup_id(uniq_id):
93
91
  logging.info("Detected existing uniq_id, skipping storage write operation")
94
92
  continue
@@ -98,7 +96,8 @@ class Storage(object):
98
96
  "media_type": media_type,
99
97
  "tg_file_id": tg_file_id,
100
98
  "origin": origin,
101
- "canonical_name": canonical_name
99
+ "canonical_name": canonical_name,
100
+ "message_text": message_text
102
101
  }).inserted_id)
103
102
 
104
103
  return media_ids
@@ -11,7 +11,7 @@ from pyrogram import Client, filters
11
11
  from pyrogram.enums import ParseMode, ChatType
12
12
  from pyrogram.handlers import MessageHandler, CallbackQueryHandler
13
13
  from pyrogram.types import InputMediaAudio, InputMediaPhoto, InputMediaVideo, InputMediaAnimation, InlineKeyboardButton, InlineKeyboardMarkup
14
- from pyrogram.errors import NetworkMigrate, BadRequest, MultiMediaTooLong, MessageIdInvalid
14
+ from pyrogram.errors import NetworkMigrate, BadRequest, MultiMediaTooLong, MessageIdInvalid, FloodWait
15
15
 
16
16
  import warp_beacon
17
17
  from warp_beacon.__version__ import __version__
@@ -393,6 +393,9 @@ class Bot(object):
393
393
  tg_chunk.append(anim)
394
394
  mediafs.append(tg_chunk)
395
395
  args["media"] = mediafs
396
+ elif job.media_type == JobType.TEXT:
397
+ args["text"] = f"<b>Post text:</b><pre>{job.message_text}</pre>\n\n{self.build_signature_caption(job)}"
398
+ args["parse_mode"] = ParseMode.HTML
396
399
 
397
400
  args["chat_id"] = job.chat_id
398
401
 
@@ -412,7 +415,7 @@ class Bot(object):
412
415
  if render_donates:
413
416
  keyboard_buttons[0].append(InlineKeyboardButton("❤ Donate", url=os.environ.get("DONATE_LINK", "https://pay.cryptocloud.plus/pos/W5BMtNQt5bJFoW2E")))
414
417
 
415
- if keyboard_buttons[0]: #job.short_text or render_donates:
418
+ if keyboard_buttons[0]:
416
419
  args["reply_markup"] = InlineKeyboardMarkup(keyboard_buttons)
417
420
 
418
421
  return args
@@ -425,9 +428,14 @@ class Bot(object):
425
428
  while not retry_amount >= max_retries:
426
429
  try:
427
430
  reply_message = None
428
- if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION):
429
- if job.media_type in (JobType.VIDEO, JobType.AUDIO):
430
- await Utils.ensure_me_loaded(self.client)
431
+ if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION, JobType.TEXT):
432
+ #if job.media_type in (JobType.VIDEO, JobType.AUDIO):
433
+ # await Utils.ensure_me_loaded(self.client)
434
+ if job.media_type == JobType.TEXT:
435
+ if job.placeholder_message_id:
436
+ await self.placeholder.remove(job.chat_id, job.placeholder_message_id)
437
+ job.placeholder_message_id = None
438
+
431
439
  if job.placeholder_message_id:
432
440
  try:
433
441
  reply_message = await self.editor.edit(**self.build_tg_args(job))
@@ -440,10 +448,17 @@ class Bot(object):
440
448
  JobType.VIDEO: self.client.send_video,
441
449
  JobType.IMAGE: self.client.send_photo,
442
450
  JobType.AUDIO: self.client.send_audio,
443
- JobType.ANIMATION: self.client.send_animation
451
+ JobType.ANIMATION: self.client.send_animation,
452
+ JobType.TEXT: self.client.send_message
444
453
  }
445
454
  try:
446
- reply_message = await send_funcs[job.media_type](**self.build_tg_args(job))
455
+ while True:
456
+ try:
457
+ reply_message = await send_funcs[job.media_type](**self.build_tg_args(job))
458
+ break
459
+ except FloodWait as e:
460
+ logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
461
+ asyncio.sleep(e.value)
447
462
  except ValueError as e:
448
463
  err_text = str(e)
449
464
  if "Expected" in err_text:
@@ -454,9 +469,10 @@ class Bot(object):
454
469
  job_args[reality.value.lower()] = job_args.pop(expectation.value.lower())
455
470
  reply_message = await send_funcs[reality](**job_args)
456
471
 
457
- tg_file_id = Utils.extract_file_id(reply_message)
458
- tg_file_ids.append(tg_file_id)
459
- job.tg_file_id = tg_file_id
472
+ if reply_message:
473
+ tg_file_id = Utils.extract_file_id(reply_message)
474
+ tg_file_ids.append(tg_file_id)
475
+ job.tg_file_id = tg_file_id
460
476
  logging.info("Uploaded media file with type '%s' tg_file_id is '%s'", job.media_type.value, job.tg_file_id)
461
477
  elif job.media_type == JobType.COLLECTION:
462
478
  col_job_args = self.build_tg_args(job)
@@ -464,7 +480,14 @@ class Bot(object):
464
480
  snd_grp_options = {"chat_id": job.chat_id, "reply_to_message_id": job.message_id}
465
481
  for i, media_chunk in enumerate(col_job_args["media"]):
466
482
  snd_grp_options["media"] = media_chunk
467
- messages = await self.client.send_media_group(**snd_grp_options)
483
+ messages = []
484
+ while True:
485
+ try:
486
+ messages = await self.client.send_media_group(**snd_grp_options)
487
+ break
488
+ except FloodWait as e:
489
+ logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
490
+ asyncio.sleep(e.value)
468
491
  sent_messages += messages
469
492
  if job.media_collection:
470
493
  for j, _ in enumerate(media_chunk):
@@ -1,9 +1,13 @@
1
+ import asyncio
1
2
  import re
2
3
 
4
+ import logging
5
+
3
6
  from pyrogram.client import Client
4
7
  from pyrogram.types import InputMedia, InputMediaAudio, InputMediaPhoto, InputMediaVideo, InputMediaAnimation, InlineKeyboardMarkup
5
8
  from pyrogram import raw
6
9
  from pyrogram import types
10
+ from pyrogram.errors import FloodWait
7
11
 
8
12
  from warp_beacon.telegram.progress_bar import ProgressBar
9
13
  from warp_beacon.telegram.types import ReportType
@@ -124,23 +128,30 @@ class EditMessage(object):
124
128
  raw_file_thumb = await self.client.save_file(path=media.thumb)
125
129
  raw_media = self.get_wrapped_animation(raw_file=raw_file, raw_thumb=raw_file_thumb, media=media, file_name=file_name)
126
130
 
127
- peer = await self.client.resolve_peer(chat_id)
128
-
129
- r = await self.client.invoke(
130
- raw.functions.messages.EditMessage(
131
- peer=peer,
132
- id=message_id,
133
- media=raw_media,
134
- reply_markup=await reply_markup.write(self.client) if reply_markup else None,
135
- message=message,
136
- entities=entities
137
- )
138
- )
131
+ peer, r = None, None
132
+ while True:
133
+ try:
134
+ peer = await self.client.resolve_peer(chat_id)
135
+ r = await self.client.invoke(
136
+ raw.functions.messages.EditMessage(
137
+ peer=peer,
138
+ id=message_id,
139
+ media=raw_media,
140
+ reply_markup=await reply_markup.write(self.client) if reply_markup else None,
141
+ message=message,
142
+ entities=entities
143
+ )
144
+ )
145
+ break
146
+ except FloodWait as e:
147
+ logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
148
+ asyncio.sleep(e.value)
139
149
 
140
- for i in r.updates:
141
- if isinstance(i, (raw.types.UpdateEditMessage, raw.types.UpdateEditChannelMessage)):
142
- return await types.Message._parse(
143
- self.client, i.message,
144
- {i.id: i for i in r.users},
145
- {i.id: i for i in r.chats}
146
- )
150
+ if r:
151
+ for i in r.updates:
152
+ if isinstance(i, (raw.types.UpdateEditMessage, raw.types.UpdateEditChannelMessage)):
153
+ return await types.Message._parse(
154
+ self.client, i.message,
155
+ {i.id: i for i in r.users},
156
+ {i.id: i for i in r.chats}
157
+ )
@@ -111,6 +111,14 @@ class Handlers(object):
111
111
  origin=job.job_origin.value,
112
112
  canonical_name=common_canonical_name
113
113
  )
114
+ elif job.media_type == JobType.TEXT:
115
+ self.storage.add_media(
116
+ tg_file_ids=[None],
117
+ media_url=job.url,
118
+ media_type=job.media_type.value,
119
+ origin=job.job_origin.value,
120
+ message_text=job.message_text
121
+ )
114
122
  else:
115
123
  self.storage.add_media(
116
124
  tg_file_ids=[','.join(tg_file_ids)],
@@ -215,6 +223,7 @@ class Handlers(object):
215
223
  elif ent_len:
216
224
  media_type = JobType[entities[0]["media_type"].upper()]
217
225
  canonical_name = entities[0]["canonical_name"]
226
+ message_text = entities[0]["message_text"]
218
227
  await self.bot.upload_job(
219
228
  UploadJob(
220
229
  url=url,
@@ -228,22 +237,23 @@ class Handlers(object):
228
237
  chat_type=message.chat.type,
229
238
  source_username=Utils.extract_message_author(message),
230
239
  canonical_name=canonical_name,
231
- message_leftover=msg_leftover
240
+ message_leftover=msg_leftover,
241
+ message_text=message_text
232
242
  )
233
243
  )
234
244
  else:
235
245
  if await self.queue_job(DownloadJob.build(
236
- url=url,
237
- message_id=effective_message_id,
238
- chat_id=chat.id,
239
- user_id=message.from_user.id,
240
- in_process=self.bot.uploader.is_inprocess(uniq_id),
241
- uniq_id=uniq_id,
242
- job_origin=origin,
243
- source_username=Utils.extract_message_author(message),
244
- chat_type=chat.type,
245
- message_leftover=msg_leftover
246
- )):
246
+ url=url,
247
+ message_id=effective_message_id,
248
+ chat_id=chat.id,
249
+ user_id=message.from_user.id,
250
+ in_process=self.bot.uploader.is_inprocess(uniq_id),
251
+ uniq_id=uniq_id,
252
+ job_origin=origin,
253
+ source_username=Utils.extract_message_author(message),
254
+ chat_type=chat.type,
255
+ message_leftover=msg_leftover
256
+ )):
247
257
  self.bot.uploader.set_inprocess(uniq_id)
248
258
 
249
259
  if chat.type not in (ChatType.GROUP, ChatType.SUPERGROUP) and not urls:
@@ -89,7 +89,7 @@ class AsyncUploader(object):
89
89
  while self.allow_loop:
90
90
  try:
91
91
  try:
92
- job = self.job_queue.get()
92
+ job: UploadJob = self.job_queue.get()
93
93
  if job is self.__JOE_BIDEN_WAKEUP:
94
94
  break
95
95
  if job.is_message_to_admin and job.message_text and self.admin_message_callback:
@@ -118,7 +118,10 @@ class AsyncUploader(object):
118
118
  message_id = job.placeholder_message_id
119
119
 
120
120
  if not in_process and not job.job_failed and not job.job_warning and not job.replay:
121
- logging.info("Accepted upload job, file(s): '%s'", path)
121
+ if job.media_type == JobType.TEXT:
122
+ logging.info("Uploading job text: '%s'", job.message_text)
123
+ else:
124
+ logging.info("Accepted upload job, file(s): '%s'", path)
122
125
 
123
126
  try:
124
127
  if message_id in self.callbacks:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.8.12
3
+ Version: 2.8.14
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -4,15 +4,15 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
4
4
  var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
5
5
  var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
6
6
  warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
7
- warp_beacon/__version__.py,sha256=ioCIkmzTwVXvR1lLWlgUjehGwiXaxB8kTGnfQ-3C1FA,24
7
+ warp_beacon/__version__.py,sha256=TZgBJIjZg_hpyHZh7yBDpvAmjXgQy0i383jyxYXsn9A,24
8
8
  warp_beacon/warp_beacon.py,sha256=ADCR30uGXIsDrt9WoiI9Ghu2QtWs0qZIK6x3pQKM_B4,1109
9
9
  warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
10
10
  warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
12
12
  warp_beacon/jobs/__init__.py,sha256=vW5T4jJUla97TNRapX_Y6eJCiPMEbySSlD0SJQKfAXs,189
13
- warp_beacon/jobs/abstract.py,sha256=x8shgG1So1g-Yqu_uzij7yuqherJTIhVhdslOrq69Z4,3263
13
+ warp_beacon/jobs/abstract.py,sha256=dDGWFJL474_u_Musk-nZ6NfKH6CYdAEQlpX8thl-hPg,3411
14
14
  warp_beacon/jobs/download_job.py,sha256=pfSEZpWVzya0hddU5794p2uQYfm4lHrtM1Ck0T-UrLk,844
15
- warp_beacon/jobs/types.py,sha256=Ae8zINgbs7cOcYkYoOCOACA7duyhnIGMQAJ_SJB1QRQ,176
15
+ warp_beacon/jobs/types.py,sha256=bb73jHm12ahq3BPDwdsWzHB69KmElSpgbj-A0lA3VNk,191
16
16
  warp_beacon/jobs/upload_job.py,sha256=_ul4psPej1jLEs-BMcMR80GbXDSmm38jE9yoZtecclY,741
17
17
  warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  warp_beacon/mediainfo/abstract.py,sha256=ZR2JMuRpoh7nDNov9a8YkAfr6BI2HXnXzQtVrLgDxjs,1185
@@ -22,17 +22,17 @@ warp_beacon/mediainfo/video.py,sha256=UBZrhTN5IDI-aYu6tsJEILo9nFkjHhkldGVFmvV7tE
22
22
  warp_beacon/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  warp_beacon/scheduler/instagram_human.py,sha256=lOytnonvqtB_8z6TVzrVJ1prqqgE4fktZZl-xpn7SOA,12951
24
24
  warp_beacon/scheduler/scheduler.py,sha256=0u9AIr9fTBmjU1GpOwKlPuNLskeJ4A-S2uAUzb-qO6w,4997
25
- warp_beacon/scraper/__init__.py,sha256=o9-HQEf4yQVNtWuJN4NcLUovejiHhP_KkQ1Xf5EaQvU,20670
25
+ warp_beacon/scraper/__init__.py,sha256=EBS7k8vmn3ya_Ud0tPHtBo7xUMUP9tabmYk5Ase_zyw,20818
26
26
  warp_beacon/scraper/abstract.py,sha256=pWbaTu-gDZgi-iFjqMR_uGzPl5KLv-4gTdJ9w6cD4sk,3802
27
27
  warp_beacon/scraper/account_selector.py,sha256=n-466AiTXZ8o5cgcNkNwNiWLoi-EkLC7bHh6El1eIF8,10274
28
28
  warp_beacon/scraper/exceptions.py,sha256=hicAe6_0xN7Ry2gcFX4UvqPWMtF_lX2ihH1njQAaqCA,1496
29
29
  warp_beacon/scraper/fail_handler.py,sha256=5ODu4b8ndZWAcHIXrcUufsWFihetzNUoAi8IgAkreyQ,998
30
30
  warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
31
31
  warp_beacon/scraper/utils.py,sha256=AOZmDki2Pbr84IG-j_wN2UghKCiWFVDYdx6HJl0JTBs,1258
32
- warp_beacon/scraper/X/X.py,sha256=3-GBXHcSmGIiDLucJWLpiuaA8EDfP0-B7ws4MiCelkE,9883
32
+ warp_beacon/scraper/X/X.py,sha256=lKxNe70iIasLnyN8QA_1rLa70Bd3Y9fL6J4AEdUgNJs,9200
33
33
  warp_beacon/scraper/X/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  warp_beacon/scraper/X/abstract.py,sha256=pCzZPTCtn8pRbBx2SeuBUpMkEHqnOLtwLBAHYceL12Q,5475
35
- warp_beacon/scraper/X/types.py,sha256=9Y0PJo3vZ1DMQcyfqoE4y2-AQRAetVmIxQwFDZkZy30,87
35
+ warp_beacon/scraper/X/types.py,sha256=RrAyODNA8WA0YzznOSK2wr-hstXf3BnEisy06uL-bdA,132
36
36
  warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
38
38
  warp_beacon/scraper/instagram/instagram.py,sha256=uzqUCVniRa3d9uavoMAz9-9MHvYOh7n_G7UyfgzHgAk,19154
@@ -42,23 +42,23 @@ warp_beacon/scraper/youtube/abstract.py,sha256=7CVR2fW6bpWYYKcveRddd6XlgDsfV_Pp3
42
42
  warp_beacon/scraper/youtube/music.py,sha256=5AeSBQyUgVCJT2hoBCV2WvlyuV9US09SYJhmBG_P9F8,2755
43
43
  warp_beacon/scraper/youtube/shorts.py,sha256=y0591kpWU35rt5OoWamkcHIstNZ98SXUlUKvYmUsyEY,4030
44
44
  warp_beacon/scraper/youtube/youtube.py,sha256=uYR7XpfP6ZnSvw1Gc4qG_M8jkCyv3maEytFdNWlYPwU,6732
45
- warp_beacon/storage/__init__.py,sha256=NaKKPXjwa8LvWsqnVJVz0riXj765lGswG9piKgI2lkY,3389
45
+ warp_beacon/storage/__init__.py,sha256=xg3quvc-Lkc-hCZ2lkrTsGqLyMybTN14mwrSuDJMfD4,3403
46
46
  warp_beacon/storage/mongo.py,sha256=qC4ZiO8XXvPnP0rJwz4CJx42pqFsyAjCiW10W5QdT6E,527
47
47
  warp_beacon/telegram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- warp_beacon/telegram/bot.py,sha256=N4Gswpw-U8Z6yPAKMe7zVfDQSRAgvm6Sx6k5_Y7WWoI,19780
48
+ warp_beacon/telegram/bot.py,sha256=xeSzUxNZf8zjU5K_UkwtzBzB6nIml1UPI0g1ITQdoEQ,20675
49
49
  warp_beacon/telegram/caption_shortener.py,sha256=EnguNCF52ne7y4P-iJAbI6K3sqoJqJbND_dX5Fhwkv0,1549
50
50
  warp_beacon/telegram/download_status.py,sha256=N-Qg13LVcPskyQNKG1lw50K1lhFtXu003muCRzZ7wiE,1561
51
- warp_beacon/telegram/edit_message.py,sha256=6DX8eJV70MG_b79WbuJG55qLBs66boElzYdCr8zpMeI,5380
52
- warp_beacon/telegram/handlers.py,sha256=32kJtlfSZyG4JbEj9W8tH6us7sZg2ziMmnxpEX_qGXw,10482
51
+ warp_beacon/telegram/edit_message.py,sha256=vzSrtlt-QxBU-X9uRSKo7gJftOrvB5Qo3iNsVNptCoE,5684
52
+ warp_beacon/telegram/handlers.py,sha256=2XJ3v9sVsWa3V3cji9FegO7l5JNqXqXxQrxIOzI2fyE,10793
53
53
  warp_beacon/telegram/placeholder_message.py,sha256=wN9-BRiyrtHG-EvXtZkGJHt2CX71munQ57ITttjt0mw,6400
54
54
  warp_beacon/telegram/progress_bar.py,sha256=IP4xtvLtdJtqdr2C-0YaU428iQGrKurbP4Npr31iW74,5014
55
55
  warp_beacon/telegram/progress_file_reader.py,sha256=e3equyNKlKs764AD-iE9QRsh3YDHTzP78Mx5tdvPPWs,969
56
56
  warp_beacon/telegram/types.py,sha256=Kvdng6uCF1HRoqQgGW1ZYYPJoVuYkFb-LDvMBbW5Hjk,89
57
57
  warp_beacon/telegram/utils.py,sha256=zTF8VQfAWetBSjAPbmNe_Zi_LN5fAcWptJKjLaFNHaE,5073
58
- warp_beacon/uploader/__init__.py,sha256=1enK6qMWaTZEaK456JwaKOfvCvznHA8cjgceOsrF6Po,5732
59
- warp_beacon-2.8.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- warp_beacon-2.8.12.dist-info/METADATA,sha256=W7U_kzv74XYVaeNArKB7Gw7UmwGBe2T4Ow6WYmUSl6Y,23236
61
- warp_beacon-2.8.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
- warp_beacon-2.8.12.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
63
- warp_beacon-2.8.12.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
64
- warp_beacon-2.8.12.dist-info/RECORD,,
58
+ warp_beacon/uploader/__init__.py,sha256=dR0VjIGSr859TTdorA2tKnjH7EpQOXnG71aXhZFaMl0,5863
59
+ warp_beacon-2.8.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ warp_beacon-2.8.14.dist-info/METADATA,sha256=8eP6Ho7mF0Uq7jz0-MLzRb8niSFPPh3ewdFrmN3BH0I,23236
61
+ warp_beacon-2.8.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ warp_beacon-2.8.14.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
63
+ warp_beacon-2.8.14.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
64
+ warp_beacon-2.8.14.dist-info/RECORD,,