warp-beacon 2.8.12__py3-none-any.whl → 2.8.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warp_beacon/__version__.py +1 -1
- warp_beacon/jobs/abstract.py +7 -0
- warp_beacon/jobs/types.py +2 -1
- warp_beacon/scraper/X/X.py +156 -182
- warp_beacon/scraper/X/types.py +6 -5
- warp_beacon/scraper/__init__.py +3 -0
- warp_beacon/storage/__init__.py +5 -6
- warp_beacon/telegram/bot.py +34 -11
- warp_beacon/telegram/edit_message.py +30 -19
- warp_beacon/telegram/handlers.py +22 -12
- warp_beacon/uploader/__init__.py +5 -2
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/METADATA +1 -1
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/RECORD +17 -17
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/WHEEL +0 -0
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/entry_points.txt +0 -0
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/licenses/LICENSE +0 -0
- {warp_beacon-2.8.12.dist-info → warp_beacon-2.8.14.dist-info}/top_level.txt +0 -0
warp_beacon/__version__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "2.8.
|
1
|
+
__version__ = "2.8.14"
|
2
2
|
|
warp_beacon/jobs/abstract.py
CHANGED
@@ -109,6 +109,9 @@ class AbstractJob(ABC):
|
|
109
109
|
if self.media_type == JobType.COLLECTION:
|
110
110
|
if not self.media_collection:
|
111
111
|
return True
|
112
|
+
elif self.media_type == JobType.TEXT:
|
113
|
+
if not self.message_text:
|
114
|
+
return True
|
112
115
|
elif not self.local_media_path:
|
113
116
|
return True
|
114
117
|
return False
|
@@ -129,9 +132,13 @@ class AbstractJob(ABC):
|
|
129
132
|
for j in i:
|
130
133
|
if os.path.exists(j.local_media_path):
|
131
134
|
os.unlink(j.local_media_path)
|
135
|
+
elif self.media_type == JobType.TEXT:
|
136
|
+
pass
|
132
137
|
else:
|
133
138
|
if os.path.exists(self.local_media_path):
|
134
139
|
os.unlink(self.local_media_path)
|
135
140
|
if self.local_compressed_media_path:
|
136
141
|
if os.path.exists(self.local_compressed_media_path):
|
137
142
|
os.unlink(self.local_compressed_media_path)
|
143
|
+
|
144
|
+
return True
|
warp_beacon/jobs/types.py
CHANGED
warp_beacon/scraper/X/X.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import time
|
3
3
|
import logging
|
4
|
-
from mimetypes import guess_extension
|
4
|
+
from mimetypes import guess_extension, guess_type
|
5
5
|
from urllib.parse import urlparse
|
6
6
|
import requests
|
7
7
|
import yt_dlp
|
@@ -9,10 +9,11 @@ from playwright.sync_api import sync_playwright, Page
|
|
9
9
|
|
10
10
|
from warp_beacon.telegram.utils import Utils
|
11
11
|
from warp_beacon.scraper.utils import ScraperUtils
|
12
|
-
from warp_beacon.scraper.X.types import XMediaType
|
13
12
|
from warp_beacon.jobs.types import JobType
|
14
13
|
from warp_beacon.scraper.X.abstract import XAbstract
|
15
14
|
|
15
|
+
from warp_beacon.scraper.exceptions import Unavailable
|
16
|
+
|
16
17
|
class XScraper(XAbstract):
|
17
18
|
DOWNLOAD_DIR = "/tmp"
|
18
19
|
|
@@ -29,107 +30,134 @@ class XScraper(XAbstract):
|
|
29
30
|
|
30
31
|
return ret
|
31
32
|
|
32
|
-
def
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
33
|
+
def generate_result(self, local_files: list, job_type: JobType, canonical_name: str = "", performer: str = "") -> list:
|
34
|
+
res = []
|
35
|
+
if local_files:
|
36
|
+
if job_type == JobType.COLLECTION:
|
37
|
+
chunks = []
|
38
|
+
for media_chunk in Utils.chunker(local_files, 10):
|
39
|
+
chunk = []
|
40
|
+
for media in media_chunk:
|
41
|
+
mime_type, _ = guess_type(media)
|
42
|
+
chunk.append({
|
43
|
+
"local_media_path": self.rename_local_file(media),
|
44
|
+
"canonical_name": canonical_name,
|
45
|
+
"media_type": JobType.VIDEO if "video" in mime_type else JobType.IMAGE,
|
46
|
+
"media_info": {}
|
47
|
+
})
|
48
|
+
chunks.append(chunk)
|
49
|
+
res.append({
|
50
|
+
"media_type": JobType.COLLECTION,
|
51
|
+
"canonical_name": canonical_name,
|
52
|
+
"items": chunks
|
53
|
+
})
|
54
|
+
else:
|
55
|
+
for local_file in local_files:
|
56
|
+
res.append({
|
57
|
+
"local_media_path": self.rename_local_file(local_file),
|
58
|
+
"performer": performer,
|
59
|
+
"canonical_name": canonical_name,
|
60
|
+
"media_type": job_type
|
61
|
+
})
|
62
|
+
logging.debug(res)
|
63
|
+
return res
|
48
64
|
|
49
65
|
def _download(self, url: str, timeout: int = 60) -> list:
|
50
66
|
res = []
|
51
|
-
|
67
|
+
post_text = ""
|
68
|
+
pw_proxy = None
|
69
|
+
if self.proxy:
|
70
|
+
dsn = self.proxy.get("dsn", "")
|
71
|
+
if dsn:
|
72
|
+
parsed = urlparse(dsn)
|
73
|
+
pw_proxy = {
|
74
|
+
"server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
|
75
|
+
"username": parsed.username,
|
76
|
+
"password": parsed.password
|
77
|
+
}
|
78
|
+
logging.info("[X] build proxy: %s", pw_proxy)
|
79
|
+
|
80
|
+
contains_images, contains_videos = False, False
|
81
|
+
images, videos = [], []
|
82
|
+
with sync_playwright() as p:
|
83
|
+
with p.chromium.launch(headless=True) as browser:
|
84
|
+
with browser.new_context(proxy=pw_proxy, ignore_https_errors=True) as context:
|
85
|
+
page = context.new_page()
|
86
|
+
page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
|
87
|
+
page.wait_for_selector("article[role='article']", timeout=(timeout*1000))
|
88
|
+
|
89
|
+
contains_videos = self.tweet_contains_video(page)
|
90
|
+
contains_images = self.tweet_contains_images(page)
|
91
|
+
|
92
|
+
if contains_images:
|
93
|
+
post_text, images = self.download_images(page, timeout)
|
94
|
+
|
95
|
+
if not contains_images and not contains_videos:
|
96
|
+
post_text = self.extract_post_text(page)
|
97
|
+
|
98
|
+
if contains_videos:
|
99
|
+
media_info, videos = self.download_videos(url, timeout)
|
100
|
+
if media_info:
|
101
|
+
post_text = self.extract_canonical_name(media_info)
|
102
|
+
|
103
|
+
if not images and not videos:
|
104
|
+
if not post_text:
|
105
|
+
raise Unavailable("Content unvailable")
|
106
|
+
logging.info("[X]: Sending text message")
|
107
|
+
res.append({
|
108
|
+
"message_text": post_text,
|
109
|
+
"media_type": JobType.TEXT
|
110
|
+
})
|
111
|
+
return res
|
112
|
+
|
113
|
+
if len(images) > 1 or len(videos) > 1:
|
114
|
+
logging.info("[X]: uploading collection")
|
115
|
+
content = images + videos
|
116
|
+
res.extend(self.generate_result(content, JobType.COLLECTION, canonical_name=post_text))
|
117
|
+
else:
|
118
|
+
logging.info("[X]: uploading media")
|
119
|
+
for job_type, content in {JobType.IMAGE: images, JobType.VIDEO: videos}.items():
|
120
|
+
if content:
|
121
|
+
res.extend(self.generate_result(content, job_type, canonical_name=post_text))
|
122
|
+
|
123
|
+
return res
|
124
|
+
|
125
|
+
def download_videos(self, url: str, timeout: int = 60) -> tuple[dict, list[str]]:
|
126
|
+
local_files = []
|
127
|
+
media_info = {}
|
52
128
|
time_name = str(time.time()).replace('.', '_')
|
53
129
|
ydl_opts = {
|
54
130
|
'socket_timeout': timeout,
|
55
|
-
'outtmpl': f'{self.DOWNLOAD_DIR}/x_download_{time_name}.%(ext)s',
|
131
|
+
'outtmpl': f'{self.DOWNLOAD_DIR}/x_download_{time_name}_%(id)s.%(ext)s',
|
56
132
|
'quiet': False,
|
57
133
|
'force_generic_extractor': False,
|
58
|
-
'noplaylist': True,
|
134
|
+
#'noplaylist': True,
|
59
135
|
'merge_output_format': 'mp4',
|
60
|
-
'dump_single_json':
|
136
|
+
'dump_single_json': False,
|
61
137
|
'nocheckcertificate': True,
|
62
138
|
'progress_hooks': [self.dlp_on_progress],
|
63
139
|
}
|
64
|
-
|
65
140
|
if self.proxy:
|
66
141
|
proxy_dsn = self.proxy.get("dsn", "")
|
67
142
|
logging.info("[X] Using proxy DSN '%s'", proxy_dsn)
|
68
143
|
if proxy_dsn:
|
69
144
|
ydl_opts["proxy"] = proxy_dsn
|
70
145
|
|
71
|
-
local_file, media_info, media_type, post_text = "", {}, XMediaType.UNKNOWN, ""
|
72
|
-
#tweet_contains_video, tweet_contains_images = False, False
|
73
|
-
|
74
|
-
#with sync_playwright() as p:
|
75
|
-
# with p.chromium.launch(headless=True) as browser:
|
76
|
-
# with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
|
77
|
-
# page = context.new_page()
|
78
|
-
# page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
|
79
|
-
# tweet_contains_video = self.tweet_contains_video(page)
|
80
|
-
# tweet_contains_images = self.tweet_contains_images(page)
|
81
|
-
|
82
146
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
else:
|
96
|
-
raise
|
97
|
-
|
98
|
-
images = []
|
99
|
-
if media_type == XMediaType.IMAGE:
|
100
|
-
job_type = JobType.IMAGE
|
101
|
-
images, post_text = self.download_images(url, timeout)
|
102
|
-
if images:
|
103
|
-
if len(images) > 1:
|
104
|
-
job_type = JobType.COLLECTION
|
105
|
-
else:
|
106
|
-
local_file = images[0]
|
107
|
-
|
108
|
-
if job_type == JobType.COLLECTION:
|
109
|
-
chunks = []
|
110
|
-
for media_chunk in Utils.chunker(images, 10):
|
111
|
-
chunk = []
|
112
|
-
for media in media_chunk:
|
113
|
-
chunk.append({
|
114
|
-
"local_media_path": self.rename_local_file(media),
|
115
|
-
"canonical_name": post_text,
|
116
|
-
"media_type": JobType.IMAGE
|
117
|
-
})
|
118
|
-
chunks.append(chunk)
|
119
|
-
res.append({
|
120
|
-
"media_type": JobType.COLLECTION,
|
121
|
-
"items": chunks
|
122
|
-
})
|
123
|
-
else:
|
124
|
-
if local_file:
|
125
|
-
res.append({
|
126
|
-
"local_media_path": self.rename_local_file(local_file),
|
127
|
-
"performer": media_info.get("uploader", "Unknown"),
|
128
|
-
"canonical_name": post_text,
|
129
|
-
"media_type": job_type
|
130
|
-
})
|
147
|
+
info = ydl.extract_info(url, download=False)
|
148
|
+
media_info = info
|
149
|
+
entries = info.get("entries", [info])
|
150
|
+
|
151
|
+
for entry in entries:
|
152
|
+
ret = ydl.download([entry['webpage_url']])
|
153
|
+
if ret == 0:
|
154
|
+
file_path = ydl.prepare_filename(entry)
|
155
|
+
if isinstance(file_path, str):
|
156
|
+
local_files.append(file_path)
|
157
|
+
else:
|
158
|
+
local_files.extend(file_path)
|
131
159
|
|
132
|
-
return
|
160
|
+
return media_info, local_files
|
133
161
|
|
134
162
|
def adaptive_chunk_size(self, content_length: int) -> int:
|
135
163
|
if content_length < 100_000:
|
@@ -141,20 +169,11 @@ class XScraper(XAbstract):
|
|
141
169
|
else:
|
142
170
|
return 65536
|
143
171
|
|
144
|
-
def download_video(self, url: str, ydl: yt_dlp.YoutubeDL, media_info: dict) -> str:
|
145
|
-
local_file = ""
|
146
|
-
ydl.download([url])
|
147
|
-
local_file = ydl.prepare_filename(media_info)
|
148
|
-
logging.debug("Temp filename: '%s'", local_file)
|
149
|
-
if local_file:
|
150
|
-
local_file = self.rename_local_file(local_file)
|
151
|
-
return local_file
|
152
|
-
|
153
172
|
def get_extension_from_headers(self, response: requests.Response) -> str:
|
154
173
|
content_type = response.headers.get("Content-Type", "")
|
155
174
|
return guess_extension(content_type) or ".jpg"
|
156
175
|
|
157
|
-
def download_images(self,
|
176
|
+
def download_images(self, page: Page, timeout: int) -> tuple[str, list[str]]:
|
158
177
|
downloaded_imgs = []
|
159
178
|
headers = {
|
160
179
|
"User-Agent": ScraperUtils.get_ua(),
|
@@ -166,7 +185,7 @@ class XScraper(XAbstract):
|
|
166
185
|
if self.proxy:
|
167
186
|
proxies = {"https": self.proxy.get("dsn", ""), "http": self.proxy.get("dsn", "")}
|
168
187
|
|
169
|
-
image_urls, post_text = self.extract_image_urls_from_x_post(
|
188
|
+
image_urls, post_text = self.extract_image_urls_from_x_post(page, timeout)
|
170
189
|
|
171
190
|
if not image_urls:
|
172
191
|
logging.error("[X] Content images are not found!")
|
@@ -210,101 +229,25 @@ class XScraper(XAbstract):
|
|
210
229
|
)
|
211
230
|
downloaded_imgs.append(filepath)
|
212
231
|
|
213
|
-
return
|
232
|
+
return post_text, downloaded_imgs
|
214
233
|
|
215
|
-
def
|
216
|
-
try:
|
217
|
-
tweet_texts = []
|
218
|
-
# collecting text blocks from post
|
219
|
-
containers = page.query_selector_all('div[data-testid="tweetText"]')
|
220
|
-
for container in containers:
|
221
|
-
try:
|
222
|
-
spans = container.query_selector_all("span")
|
223
|
-
if spans:
|
224
|
-
for span in spans:
|
225
|
-
text = span.inner_text().strip()
|
226
|
-
if text:
|
227
|
-
tweet_texts.append(text)
|
228
|
-
else:
|
229
|
-
# to span's try container itself
|
230
|
-
text = container.inner_text().strip()
|
231
|
-
if text:
|
232
|
-
tweet_texts.append(text)
|
233
|
-
except Exception:
|
234
|
-
continue
|
235
|
-
|
236
|
-
return " ".join(tweet_texts).strip()
|
237
|
-
except Exception as e:
|
238
|
-
logging.warning("Failed to extract tweet text.", exc_info=e)
|
239
|
-
|
240
|
-
return ""
|
241
|
-
|
242
|
-
def extract_image_urls_from_x_post(self, url: str, timeout: int = 60) -> tuple[list[str], str]:
|
234
|
+
def extract_image_urls_from_x_post(self, page: Page, timeout: int) -> tuple[list[str], str]:
|
243
235
|
img_urls, post_text = [], ''
|
244
236
|
|
245
|
-
|
246
|
-
|
247
|
-
dsn = self.proxy.get("dsn", "")
|
248
|
-
if dsn:
|
249
|
-
parsed = urlparse(dsn)
|
250
|
-
proxy = {
|
251
|
-
"server": f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
|
252
|
-
"username": parsed.username,
|
253
|
-
"password": parsed.password
|
254
|
-
}
|
255
|
-
logging.info("[X] build proxy: %s", proxy)
|
256
|
-
|
257
|
-
with sync_playwright() as p:
|
258
|
-
with p.chromium.launch(headless=True) as browser:
|
259
|
-
with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
|
260
|
-
page = context.new_page()
|
261
|
-
page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
|
262
|
-
|
263
|
-
#page.wait_for_timeout(3000)
|
264
|
-
page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
|
265
|
-
post_text = self.extract_post_text(page)
|
237
|
+
page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
|
238
|
+
post_text = self.extract_post_text(page)
|
266
239
|
|
267
|
-
|
268
|
-
|
240
|
+
image_elements = page.query_selector_all("img")
|
241
|
+
image_urls = []
|
269
242
|
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
243
|
+
for img in image_elements:
|
244
|
+
src = img.get_attribute("src")
|
245
|
+
if src and "pbs.twimg.com/media" in src:
|
246
|
+
image_urls.append(src)
|
274
247
|
|
275
|
-
|
248
|
+
img_urls = list(set(image_urls))
|
276
249
|
return img_urls, post_text
|
277
250
|
|
278
|
-
def get_media_type_from_info_and_dom(self, media_info: dict, page: Page) -> XMediaType:
|
279
|
-
is_video = (
|
280
|
-
media_info.get("vcodec") != "none" or
|
281
|
-
media_info.get("ext") in {"mp4", "mov", "mkv"} or
|
282
|
-
any(
|
283
|
-
f.get("vcodec") not in (None, "none")
|
284
|
-
for f in media_info.get("formats", [])
|
285
|
-
)
|
286
|
-
)
|
287
|
-
|
288
|
-
try:
|
289
|
-
image_elements = page.query_selector_all("img")
|
290
|
-
image_urls = [
|
291
|
-
img.get_attribute("src")
|
292
|
-
for img in image_elements
|
293
|
-
if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
|
294
|
-
]
|
295
|
-
has_images = bool(image_urls)
|
296
|
-
except Exception:
|
297
|
-
has_images = False
|
298
|
-
|
299
|
-
if is_video and has_images:
|
300
|
-
return XMediaType.MIXED
|
301
|
-
elif is_video:
|
302
|
-
return XMediaType.VIDEO
|
303
|
-
elif has_images:
|
304
|
-
return XMediaType.IMAGE
|
305
|
-
|
306
|
-
return XMediaType.UNKNOWN
|
307
|
-
|
308
251
|
def tweet_contains_video(self, page: Page) -> bool:
|
309
252
|
try:
|
310
253
|
return bool(
|
@@ -327,4 +270,35 @@ class XScraper(XAbstract):
|
|
327
270
|
return bool(image_urls)
|
328
271
|
except Exception:
|
329
272
|
pass
|
330
|
-
return False
|
273
|
+
return False
|
274
|
+
|
275
|
+
def extract_post_text(self, page: Page) -> str:
|
276
|
+
try:
|
277
|
+
text_fragments = []
|
278
|
+
|
279
|
+
# find tweetText containers (in main and quoted)
|
280
|
+
containers = page.query_selector_all('div[data-testid="tweetText"]')
|
281
|
+
for container in containers:
|
282
|
+
fragments = []
|
283
|
+
|
284
|
+
# find <span> and <img alt=...> inside text
|
285
|
+
for node in container.query_selector_all("span, img"):
|
286
|
+
tag = node.evaluate("node => node.tagName.toLowerCase()")
|
287
|
+
if tag == "span":
|
288
|
+
value = node.inner_text().strip()
|
289
|
+
if value:
|
290
|
+
fragments.append(value)
|
291
|
+
elif tag == "img":
|
292
|
+
# emoji as image
|
293
|
+
alt = node.get_attribute("alt")
|
294
|
+
if alt:
|
295
|
+
fragments.append(alt)
|
296
|
+
|
297
|
+
if fragments:
|
298
|
+
text_fragments.append("".join(fragments))
|
299
|
+
|
300
|
+
return "\n\n".join(text_fragments).strip()
|
301
|
+
|
302
|
+
except Exception as e:
|
303
|
+
logging.warning("X: [extract_post_text] error", exc_info=e)
|
304
|
+
return ""
|
warp_beacon/scraper/X/types.py
CHANGED
warp_beacon/scraper/__init__.py
CHANGED
@@ -319,6 +319,7 @@ class AsyncDownloader(object):
|
|
319
319
|
# success
|
320
320
|
for job in fail_handler.get_failed_jobs():
|
321
321
|
self.queue_task(job)
|
322
|
+
# media info processing
|
322
323
|
for item in items:
|
323
324
|
media_info = {"filesize": 0}
|
324
325
|
if item["media_type"] == JobType.VIDEO:
|
@@ -372,6 +373,8 @@ class AsyncDownloader(object):
|
|
372
373
|
job_args["media_collection"] = item["items"]
|
373
374
|
if item.get("save_items", None) is not None:
|
374
375
|
job_args["save_items"] = item.get("save_items", False)
|
376
|
+
elif item["media_type"] == JobType.TEXT:
|
377
|
+
job_args["message_text"] = item.get("message_text", "")
|
375
378
|
else:
|
376
379
|
job_args["local_media_path"] = item["local_media_path"]
|
377
380
|
if item.get("local_compressed_media_path", None):
|
warp_beacon/storage/__init__.py
CHANGED
@@ -67,7 +67,8 @@ class Storage(object):
|
|
67
67
|
"uniq_id": document["uniq_id"],
|
68
68
|
"tg_file_id": document["tg_file_id"],
|
69
69
|
"media_type": document["media_type"],
|
70
|
-
"canonical_name": document.get("canonical_name")
|
70
|
+
"canonical_name": document.get("canonical_name"),
|
71
|
+
"message_text": document.get("message_text")
|
71
72
|
})
|
72
73
|
except Exception as e:
|
73
74
|
logging.error("Error occurred while trying to read from the database!")
|
@@ -82,13 +83,10 @@ class Storage(object):
|
|
82
83
|
def db_lookup_id(self, uniq_id: str) -> list[dict]:
|
83
84
|
return self.db_find(uniq_id)
|
84
85
|
|
85
|
-
def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "") -> list[int]:
|
86
|
+
def add_media(self, tg_file_ids: list[str], media_url: str, media_type: str, origin: str, canonical_name: str = "", message_text: str = "") -> list[int]:
|
86
87
|
uniq_id = self.compute_uniq(media_url)
|
87
88
|
media_ids = []
|
88
89
|
for tg_file_id in tg_file_ids:
|
89
|
-
if not tg_file_id:
|
90
|
-
logging.warning("Passed empty `tg_file_id`! Skipping.")
|
91
|
-
continue
|
92
90
|
if self.db_lookup_id(uniq_id):
|
93
91
|
logging.info("Detected existing uniq_id, skipping storage write operation")
|
94
92
|
continue
|
@@ -98,7 +96,8 @@ class Storage(object):
|
|
98
96
|
"media_type": media_type,
|
99
97
|
"tg_file_id": tg_file_id,
|
100
98
|
"origin": origin,
|
101
|
-
"canonical_name": canonical_name
|
99
|
+
"canonical_name": canonical_name,
|
100
|
+
"message_text": message_text
|
102
101
|
}).inserted_id)
|
103
102
|
|
104
103
|
return media_ids
|
warp_beacon/telegram/bot.py
CHANGED
@@ -11,7 +11,7 @@ from pyrogram import Client, filters
|
|
11
11
|
from pyrogram.enums import ParseMode, ChatType
|
12
12
|
from pyrogram.handlers import MessageHandler, CallbackQueryHandler
|
13
13
|
from pyrogram.types import InputMediaAudio, InputMediaPhoto, InputMediaVideo, InputMediaAnimation, InlineKeyboardButton, InlineKeyboardMarkup
|
14
|
-
from pyrogram.errors import NetworkMigrate, BadRequest, MultiMediaTooLong, MessageIdInvalid
|
14
|
+
from pyrogram.errors import NetworkMigrate, BadRequest, MultiMediaTooLong, MessageIdInvalid, FloodWait
|
15
15
|
|
16
16
|
import warp_beacon
|
17
17
|
from warp_beacon.__version__ import __version__
|
@@ -393,6 +393,9 @@ class Bot(object):
|
|
393
393
|
tg_chunk.append(anim)
|
394
394
|
mediafs.append(tg_chunk)
|
395
395
|
args["media"] = mediafs
|
396
|
+
elif job.media_type == JobType.TEXT:
|
397
|
+
args["text"] = f"<b>Post text:</b><pre>{job.message_text}</pre>\n\n{self.build_signature_caption(job)}"
|
398
|
+
args["parse_mode"] = ParseMode.HTML
|
396
399
|
|
397
400
|
args["chat_id"] = job.chat_id
|
398
401
|
|
@@ -412,7 +415,7 @@ class Bot(object):
|
|
412
415
|
if render_donates:
|
413
416
|
keyboard_buttons[0].append(InlineKeyboardButton("❤ Donate", url=os.environ.get("DONATE_LINK", "https://pay.cryptocloud.plus/pos/W5BMtNQt5bJFoW2E")))
|
414
417
|
|
415
|
-
if keyboard_buttons[0]:
|
418
|
+
if keyboard_buttons[0]:
|
416
419
|
args["reply_markup"] = InlineKeyboardMarkup(keyboard_buttons)
|
417
420
|
|
418
421
|
return args
|
@@ -425,9 +428,14 @@ class Bot(object):
|
|
425
428
|
while not retry_amount >= max_retries:
|
426
429
|
try:
|
427
430
|
reply_message = None
|
428
|
-
if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION):
|
429
|
-
if job.media_type in (JobType.VIDEO, JobType.AUDIO):
|
430
|
-
|
431
|
+
if job.media_type in (JobType.VIDEO, JobType.IMAGE, JobType.AUDIO, JobType.ANIMATION, JobType.TEXT):
|
432
|
+
#if job.media_type in (JobType.VIDEO, JobType.AUDIO):
|
433
|
+
# await Utils.ensure_me_loaded(self.client)
|
434
|
+
if job.media_type == JobType.TEXT:
|
435
|
+
if job.placeholder_message_id:
|
436
|
+
await self.placeholder.remove(job.chat_id, job.placeholder_message_id)
|
437
|
+
job.placeholder_message_id = None
|
438
|
+
|
431
439
|
if job.placeholder_message_id:
|
432
440
|
try:
|
433
441
|
reply_message = await self.editor.edit(**self.build_tg_args(job))
|
@@ -440,10 +448,17 @@ class Bot(object):
|
|
440
448
|
JobType.VIDEO: self.client.send_video,
|
441
449
|
JobType.IMAGE: self.client.send_photo,
|
442
450
|
JobType.AUDIO: self.client.send_audio,
|
443
|
-
JobType.ANIMATION: self.client.send_animation
|
451
|
+
JobType.ANIMATION: self.client.send_animation,
|
452
|
+
JobType.TEXT: self.client.send_message
|
444
453
|
}
|
445
454
|
try:
|
446
|
-
|
455
|
+
while True:
|
456
|
+
try:
|
457
|
+
reply_message = await send_funcs[job.media_type](**self.build_tg_args(job))
|
458
|
+
break
|
459
|
+
except FloodWait as e:
|
460
|
+
logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
|
461
|
+
asyncio.sleep(e.value)
|
447
462
|
except ValueError as e:
|
448
463
|
err_text = str(e)
|
449
464
|
if "Expected" in err_text:
|
@@ -454,9 +469,10 @@ class Bot(object):
|
|
454
469
|
job_args[reality.value.lower()] = job_args.pop(expectation.value.lower())
|
455
470
|
reply_message = await send_funcs[reality](**job_args)
|
456
471
|
|
457
|
-
|
458
|
-
|
459
|
-
|
472
|
+
if reply_message:
|
473
|
+
tg_file_id = Utils.extract_file_id(reply_message)
|
474
|
+
tg_file_ids.append(tg_file_id)
|
475
|
+
job.tg_file_id = tg_file_id
|
460
476
|
logging.info("Uploaded media file with type '%s' tg_file_id is '%s'", job.media_type.value, job.tg_file_id)
|
461
477
|
elif job.media_type == JobType.COLLECTION:
|
462
478
|
col_job_args = self.build_tg_args(job)
|
@@ -464,7 +480,14 @@ class Bot(object):
|
|
464
480
|
snd_grp_options = {"chat_id": job.chat_id, "reply_to_message_id": job.message_id}
|
465
481
|
for i, media_chunk in enumerate(col_job_args["media"]):
|
466
482
|
snd_grp_options["media"] = media_chunk
|
467
|
-
messages =
|
483
|
+
messages = []
|
484
|
+
while True:
|
485
|
+
try:
|
486
|
+
messages = await self.client.send_media_group(**snd_grp_options)
|
487
|
+
break
|
488
|
+
except FloodWait as e:
|
489
|
+
logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
|
490
|
+
asyncio.sleep(e.value)
|
468
491
|
sent_messages += messages
|
469
492
|
if job.media_collection:
|
470
493
|
for j, _ in enumerate(media_chunk):
|
@@ -1,9 +1,13 @@
|
|
1
|
+
import asyncio
|
1
2
|
import re
|
2
3
|
|
4
|
+
import logging
|
5
|
+
|
3
6
|
from pyrogram.client import Client
|
4
7
|
from pyrogram.types import InputMedia, InputMediaAudio, InputMediaPhoto, InputMediaVideo, InputMediaAnimation, InlineKeyboardMarkup
|
5
8
|
from pyrogram import raw
|
6
9
|
from pyrogram import types
|
10
|
+
from pyrogram.errors import FloodWait
|
7
11
|
|
8
12
|
from warp_beacon.telegram.progress_bar import ProgressBar
|
9
13
|
from warp_beacon.telegram.types import ReportType
|
@@ -124,23 +128,30 @@ class EditMessage(object):
|
|
124
128
|
raw_file_thumb = await self.client.save_file(path=media.thumb)
|
125
129
|
raw_media = self.get_wrapped_animation(raw_file=raw_file, raw_thumb=raw_file_thumb, media=media, file_name=file_name)
|
126
130
|
|
127
|
-
peer =
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
131
|
+
peer, r = None, None
|
132
|
+
while True:
|
133
|
+
try:
|
134
|
+
peer = await self.client.resolve_peer(chat_id)
|
135
|
+
r = await self.client.invoke(
|
136
|
+
raw.functions.messages.EditMessage(
|
137
|
+
peer=peer,
|
138
|
+
id=message_id,
|
139
|
+
media=raw_media,
|
140
|
+
reply_markup=await reply_markup.write(self.client) if reply_markup else None,
|
141
|
+
message=message,
|
142
|
+
entities=entities
|
143
|
+
)
|
144
|
+
)
|
145
|
+
break
|
146
|
+
except FloodWait as e:
|
147
|
+
logging.warning("FloodWait occurred, waiting '%d' seconds before retry", int(e.value))
|
148
|
+
asyncio.sleep(e.value)
|
139
149
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
150
|
+
if r:
|
151
|
+
for i in r.updates:
|
152
|
+
if isinstance(i, (raw.types.UpdateEditMessage, raw.types.UpdateEditChannelMessage)):
|
153
|
+
return await types.Message._parse(
|
154
|
+
self.client, i.message,
|
155
|
+
{i.id: i for i in r.users},
|
156
|
+
{i.id: i for i in r.chats}
|
157
|
+
)
|
warp_beacon/telegram/handlers.py
CHANGED
@@ -111,6 +111,14 @@ class Handlers(object):
|
|
111
111
|
origin=job.job_origin.value,
|
112
112
|
canonical_name=common_canonical_name
|
113
113
|
)
|
114
|
+
elif job.media_type == JobType.TEXT:
|
115
|
+
self.storage.add_media(
|
116
|
+
tg_file_ids=[None],
|
117
|
+
media_url=job.url,
|
118
|
+
media_type=job.media_type.value,
|
119
|
+
origin=job.job_origin.value,
|
120
|
+
message_text=job.message_text
|
121
|
+
)
|
114
122
|
else:
|
115
123
|
self.storage.add_media(
|
116
124
|
tg_file_ids=[','.join(tg_file_ids)],
|
@@ -215,6 +223,7 @@ class Handlers(object):
|
|
215
223
|
elif ent_len:
|
216
224
|
media_type = JobType[entities[0]["media_type"].upper()]
|
217
225
|
canonical_name = entities[0]["canonical_name"]
|
226
|
+
message_text = entities[0]["message_text"]
|
218
227
|
await self.bot.upload_job(
|
219
228
|
UploadJob(
|
220
229
|
url=url,
|
@@ -228,22 +237,23 @@ class Handlers(object):
|
|
228
237
|
chat_type=message.chat.type,
|
229
238
|
source_username=Utils.extract_message_author(message),
|
230
239
|
canonical_name=canonical_name,
|
231
|
-
message_leftover=msg_leftover
|
240
|
+
message_leftover=msg_leftover,
|
241
|
+
message_text=message_text
|
232
242
|
)
|
233
243
|
)
|
234
244
|
else:
|
235
245
|
if await self.queue_job(DownloadJob.build(
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
246
|
+
url=url,
|
247
|
+
message_id=effective_message_id,
|
248
|
+
chat_id=chat.id,
|
249
|
+
user_id=message.from_user.id,
|
250
|
+
in_process=self.bot.uploader.is_inprocess(uniq_id),
|
251
|
+
uniq_id=uniq_id,
|
252
|
+
job_origin=origin,
|
253
|
+
source_username=Utils.extract_message_author(message),
|
254
|
+
chat_type=chat.type,
|
255
|
+
message_leftover=msg_leftover
|
256
|
+
)):
|
247
257
|
self.bot.uploader.set_inprocess(uniq_id)
|
248
258
|
|
249
259
|
if chat.type not in (ChatType.GROUP, ChatType.SUPERGROUP) and not urls:
|
warp_beacon/uploader/__init__.py
CHANGED
@@ -89,7 +89,7 @@ class AsyncUploader(object):
|
|
89
89
|
while self.allow_loop:
|
90
90
|
try:
|
91
91
|
try:
|
92
|
-
job = self.job_queue.get()
|
92
|
+
job: UploadJob = self.job_queue.get()
|
93
93
|
if job is self.__JOE_BIDEN_WAKEUP:
|
94
94
|
break
|
95
95
|
if job.is_message_to_admin and job.message_text and self.admin_message_callback:
|
@@ -118,7 +118,10 @@ class AsyncUploader(object):
|
|
118
118
|
message_id = job.placeholder_message_id
|
119
119
|
|
120
120
|
if not in_process and not job.job_failed and not job.job_warning and not job.replay:
|
121
|
-
|
121
|
+
if job.media_type == JobType.TEXT:
|
122
|
+
logging.info("Uploading job text: '%s'", job.message_text)
|
123
|
+
else:
|
124
|
+
logging.info("Accepted upload job, file(s): '%s'", path)
|
122
125
|
|
123
126
|
try:
|
124
127
|
if message_id in self.callbacks:
|
@@ -4,15 +4,15 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
|
|
4
4
|
var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
|
5
5
|
var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
|
6
6
|
warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
|
7
|
-
warp_beacon/__version__.py,sha256=
|
7
|
+
warp_beacon/__version__.py,sha256=TZgBJIjZg_hpyHZh7yBDpvAmjXgQy0i383jyxYXsn9A,24
|
8
8
|
warp_beacon/warp_beacon.py,sha256=ADCR30uGXIsDrt9WoiI9Ghu2QtWs0qZIK6x3pQKM_B4,1109
|
9
9
|
warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
|
10
10
|
warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
|
12
12
|
warp_beacon/jobs/__init__.py,sha256=vW5T4jJUla97TNRapX_Y6eJCiPMEbySSlD0SJQKfAXs,189
|
13
|
-
warp_beacon/jobs/abstract.py,sha256=
|
13
|
+
warp_beacon/jobs/abstract.py,sha256=dDGWFJL474_u_Musk-nZ6NfKH6CYdAEQlpX8thl-hPg,3411
|
14
14
|
warp_beacon/jobs/download_job.py,sha256=pfSEZpWVzya0hddU5794p2uQYfm4lHrtM1Ck0T-UrLk,844
|
15
|
-
warp_beacon/jobs/types.py,sha256=
|
15
|
+
warp_beacon/jobs/types.py,sha256=bb73jHm12ahq3BPDwdsWzHB69KmElSpgbj-A0lA3VNk,191
|
16
16
|
warp_beacon/jobs/upload_job.py,sha256=_ul4psPej1jLEs-BMcMR80GbXDSmm38jE9yoZtecclY,741
|
17
17
|
warp_beacon/mediainfo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
18
|
warp_beacon/mediainfo/abstract.py,sha256=ZR2JMuRpoh7nDNov9a8YkAfr6BI2HXnXzQtVrLgDxjs,1185
|
@@ -22,17 +22,17 @@ warp_beacon/mediainfo/video.py,sha256=UBZrhTN5IDI-aYu6tsJEILo9nFkjHhkldGVFmvV7tE
|
|
22
22
|
warp_beacon/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
warp_beacon/scheduler/instagram_human.py,sha256=lOytnonvqtB_8z6TVzrVJ1prqqgE4fktZZl-xpn7SOA,12951
|
24
24
|
warp_beacon/scheduler/scheduler.py,sha256=0u9AIr9fTBmjU1GpOwKlPuNLskeJ4A-S2uAUzb-qO6w,4997
|
25
|
-
warp_beacon/scraper/__init__.py,sha256=
|
25
|
+
warp_beacon/scraper/__init__.py,sha256=EBS7k8vmn3ya_Ud0tPHtBo7xUMUP9tabmYk5Ase_zyw,20818
|
26
26
|
warp_beacon/scraper/abstract.py,sha256=pWbaTu-gDZgi-iFjqMR_uGzPl5KLv-4gTdJ9w6cD4sk,3802
|
27
27
|
warp_beacon/scraper/account_selector.py,sha256=n-466AiTXZ8o5cgcNkNwNiWLoi-EkLC7bHh6El1eIF8,10274
|
28
28
|
warp_beacon/scraper/exceptions.py,sha256=hicAe6_0xN7Ry2gcFX4UvqPWMtF_lX2ihH1njQAaqCA,1496
|
29
29
|
warp_beacon/scraper/fail_handler.py,sha256=5ODu4b8ndZWAcHIXrcUufsWFihetzNUoAi8IgAkreyQ,998
|
30
30
|
warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
|
31
31
|
warp_beacon/scraper/utils.py,sha256=AOZmDki2Pbr84IG-j_wN2UghKCiWFVDYdx6HJl0JTBs,1258
|
32
|
-
warp_beacon/scraper/X/X.py,sha256=
|
32
|
+
warp_beacon/scraper/X/X.py,sha256=lKxNe70iIasLnyN8QA_1rLa70Bd3Y9fL6J4AEdUgNJs,9200
|
33
33
|
warp_beacon/scraper/X/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
warp_beacon/scraper/X/abstract.py,sha256=pCzZPTCtn8pRbBx2SeuBUpMkEHqnOLtwLBAHYceL12Q,5475
|
35
|
-
warp_beacon/scraper/X/types.py,sha256=
|
35
|
+
warp_beacon/scraper/X/types.py,sha256=RrAyODNA8WA0YzznOSK2wr-hstXf3BnEisy06uL-bdA,132
|
36
36
|
warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
37
|
warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
|
38
38
|
warp_beacon/scraper/instagram/instagram.py,sha256=uzqUCVniRa3d9uavoMAz9-9MHvYOh7n_G7UyfgzHgAk,19154
|
@@ -42,23 +42,23 @@ warp_beacon/scraper/youtube/abstract.py,sha256=7CVR2fW6bpWYYKcveRddd6XlgDsfV_Pp3
|
|
42
42
|
warp_beacon/scraper/youtube/music.py,sha256=5AeSBQyUgVCJT2hoBCV2WvlyuV9US09SYJhmBG_P9F8,2755
|
43
43
|
warp_beacon/scraper/youtube/shorts.py,sha256=y0591kpWU35rt5OoWamkcHIstNZ98SXUlUKvYmUsyEY,4030
|
44
44
|
warp_beacon/scraper/youtube/youtube.py,sha256=uYR7XpfP6ZnSvw1Gc4qG_M8jkCyv3maEytFdNWlYPwU,6732
|
45
|
-
warp_beacon/storage/__init__.py,sha256=
|
45
|
+
warp_beacon/storage/__init__.py,sha256=xg3quvc-Lkc-hCZ2lkrTsGqLyMybTN14mwrSuDJMfD4,3403
|
46
46
|
warp_beacon/storage/mongo.py,sha256=qC4ZiO8XXvPnP0rJwz4CJx42pqFsyAjCiW10W5QdT6E,527
|
47
47
|
warp_beacon/telegram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
-
warp_beacon/telegram/bot.py,sha256=
|
48
|
+
warp_beacon/telegram/bot.py,sha256=xeSzUxNZf8zjU5K_UkwtzBzB6nIml1UPI0g1ITQdoEQ,20675
|
49
49
|
warp_beacon/telegram/caption_shortener.py,sha256=EnguNCF52ne7y4P-iJAbI6K3sqoJqJbND_dX5Fhwkv0,1549
|
50
50
|
warp_beacon/telegram/download_status.py,sha256=N-Qg13LVcPskyQNKG1lw50K1lhFtXu003muCRzZ7wiE,1561
|
51
|
-
warp_beacon/telegram/edit_message.py,sha256=
|
52
|
-
warp_beacon/telegram/handlers.py,sha256=
|
51
|
+
warp_beacon/telegram/edit_message.py,sha256=vzSrtlt-QxBU-X9uRSKo7gJftOrvB5Qo3iNsVNptCoE,5684
|
52
|
+
warp_beacon/telegram/handlers.py,sha256=2XJ3v9sVsWa3V3cji9FegO7l5JNqXqXxQrxIOzI2fyE,10793
|
53
53
|
warp_beacon/telegram/placeholder_message.py,sha256=wN9-BRiyrtHG-EvXtZkGJHt2CX71munQ57ITttjt0mw,6400
|
54
54
|
warp_beacon/telegram/progress_bar.py,sha256=IP4xtvLtdJtqdr2C-0YaU428iQGrKurbP4Npr31iW74,5014
|
55
55
|
warp_beacon/telegram/progress_file_reader.py,sha256=e3equyNKlKs764AD-iE9QRsh3YDHTzP78Mx5tdvPPWs,969
|
56
56
|
warp_beacon/telegram/types.py,sha256=Kvdng6uCF1HRoqQgGW1ZYYPJoVuYkFb-LDvMBbW5Hjk,89
|
57
57
|
warp_beacon/telegram/utils.py,sha256=zTF8VQfAWetBSjAPbmNe_Zi_LN5fAcWptJKjLaFNHaE,5073
|
58
|
-
warp_beacon/uploader/__init__.py,sha256=
|
59
|
-
warp_beacon-2.8.
|
60
|
-
warp_beacon-2.8.
|
61
|
-
warp_beacon-2.8.
|
62
|
-
warp_beacon-2.8.
|
63
|
-
warp_beacon-2.8.
|
64
|
-
warp_beacon-2.8.
|
58
|
+
warp_beacon/uploader/__init__.py,sha256=dR0VjIGSr859TTdorA2tKnjH7EpQOXnG71aXhZFaMl0,5863
|
59
|
+
warp_beacon-2.8.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
60
|
+
warp_beacon-2.8.14.dist-info/METADATA,sha256=8eP6Ho7mF0Uq7jz0-MLzRb8niSFPPh3ewdFrmN3BH0I,23236
|
61
|
+
warp_beacon-2.8.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
62
|
+
warp_beacon-2.8.14.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
|
63
|
+
warp_beacon-2.8.14.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
|
64
|
+
warp_beacon-2.8.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|