warp-beacon 2.8.10__tar.gz → 2.8.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.8.10/warp_beacon.egg-info → warp_beacon-2.8.11}/PKG-INFO +2 -1
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/pyproject.toml +2 -1
- warp_beacon-2.8.11/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/X/X.py +66 -1
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/X/types.py +2 -1
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/exceptions.py +3 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/instagram/instagram.py +17 -7
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/instagram/wb_instagrapi.py +79 -1
- {warp_beacon-2.8.10 → warp_beacon-2.8.11/warp_beacon.egg-info}/PKG-INFO +2 -1
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon.egg-info/requires.txt +1 -0
- warp_beacon-2.8.10/warp_beacon/__version__.py +0 -2
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/LICENSE +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/MANIFEST.in +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/README.md +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/assets/cc-group-black.png +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/assets/placeholder.gif +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/etc/.gitignore +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/etc/accounts.json +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/etc/proxies.json +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/etc/warp_beacon.conf +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/setup.cfg +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/setup.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/jobs/abstract.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scheduler/instagram_human.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scheduler/scheduler.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/X/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/X/abstract.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/abstract.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/account_selector.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/fail_handler.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/instagram/captcha.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/link_resolver.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/utils.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/youtube/abstract.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/youtube/music.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/youtube/shorts.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/scraper/youtube/youtube.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/storage/mongo.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/bot.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/caption_shortener.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/download_status.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/edit_message.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/handlers.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/placeholder_message.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/progress_bar.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/progress_file_reader.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/types.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/telegram/utils.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/uploader/__init__.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon/yt_auth.py +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon.egg-info/SOURCES.txt +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.8.10 → warp_beacon-2.8.11}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.8.
|
3
|
+
Version: 2.8.11
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -242,6 +242,7 @@ Requires-Dist: pydub
|
|
242
242
|
Requires-Dist: SpeechRecognition
|
243
243
|
Requires-Dist: playwright
|
244
244
|
Requires-Dist: fake-useragent
|
245
|
+
Requires-Dist: pyotp
|
245
246
|
Dynamic: author
|
246
247
|
Dynamic: home-page
|
247
248
|
Dynamic: license-file
|
@@ -59,6 +59,7 @@ class XScraper(XAbstract):
|
|
59
59
|
'merge_output_format': 'mp4',
|
60
60
|
'dump_single_json': True,
|
61
61
|
'nocheckcertificate': True,
|
62
|
+
'progress_hooks': [self.dlp_on_progress],
|
62
63
|
}
|
63
64
|
|
64
65
|
if self.proxy:
|
@@ -68,6 +69,16 @@ class XScraper(XAbstract):
|
|
68
69
|
ydl_opts["proxy"] = proxy_dsn
|
69
70
|
|
70
71
|
local_file, media_info, media_type, post_text = "", {}, XMediaType.UNKNOWN, ""
|
72
|
+
#tweet_contains_video, tweet_contains_images = False, False
|
73
|
+
|
74
|
+
#with sync_playwright() as p:
|
75
|
+
# with p.chromium.launch(headless=True) as browser:
|
76
|
+
# with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
|
77
|
+
# page = context.new_page()
|
78
|
+
# page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
|
79
|
+
# tweet_contains_video = self.tweet_contains_video(page)
|
80
|
+
# tweet_contains_images = self.tweet_contains_images(page)
|
81
|
+
|
71
82
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
72
83
|
try:
|
73
84
|
media_info = ydl.extract_info(url, download=False)
|
@@ -262,4 +273,58 @@ class XScraper(XAbstract):
|
|
262
273
|
image_urls.append(src)
|
263
274
|
|
264
275
|
img_urls = list(set(image_urls))
|
265
|
-
return img_urls, post_text
|
276
|
+
return img_urls, post_text
|
277
|
+
|
278
|
+
def get_media_type_from_info_and_dom(self, media_info: dict, page: Page) -> XMediaType:
|
279
|
+
is_video = (
|
280
|
+
media_info.get("vcodec") != "none" or
|
281
|
+
media_info.get("ext") in {"mp4", "mov", "mkv"} or
|
282
|
+
any(
|
283
|
+
f.get("vcodec") not in (None, "none")
|
284
|
+
for f in media_info.get("formats", [])
|
285
|
+
)
|
286
|
+
)
|
287
|
+
|
288
|
+
try:
|
289
|
+
image_elements = page.query_selector_all("img")
|
290
|
+
image_urls = [
|
291
|
+
img.get_attribute("src")
|
292
|
+
for img in image_elements
|
293
|
+
if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
|
294
|
+
]
|
295
|
+
has_images = bool(image_urls)
|
296
|
+
except Exception:
|
297
|
+
has_images = False
|
298
|
+
|
299
|
+
if is_video and has_images:
|
300
|
+
return XMediaType.MIXED
|
301
|
+
elif is_video:
|
302
|
+
return XMediaType.VIDEO
|
303
|
+
elif has_images:
|
304
|
+
return XMediaType.IMAGE
|
305
|
+
|
306
|
+
return XMediaType.UNKNOWN
|
307
|
+
|
308
|
+
def tweet_contains_video(self, page: Page) -> bool:
|
309
|
+
try:
|
310
|
+
return bool(
|
311
|
+
page.query_selector("article video") or
|
312
|
+
page.query_selector("div[data-testid='videoPlayer']") or
|
313
|
+
page.query_selector("div[aria-label='Embedded video']")
|
314
|
+
)
|
315
|
+
except Exception:
|
316
|
+
pass
|
317
|
+
return False
|
318
|
+
|
319
|
+
def tweet_contains_images(self, page: Page) -> bool:
|
320
|
+
try:
|
321
|
+
image_elements = page.query_selector_all("img")
|
322
|
+
image_urls = [
|
323
|
+
img.get_attribute("src")
|
324
|
+
for img in image_elements
|
325
|
+
if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
|
326
|
+
]
|
327
|
+
return bool(image_urls)
|
328
|
+
except Exception:
|
329
|
+
pass
|
330
|
+
return False
|
@@ -16,15 +16,17 @@ from urllib.parse import urljoin, urlparse
|
|
16
16
|
import requests
|
17
17
|
import urllib3
|
18
18
|
|
19
|
+
import pyotp
|
20
|
+
|
19
21
|
from instagrapi import exceptions
|
20
|
-
from instagrapi.exceptions import UnknownError as IGUnknownError
|
22
|
+
from instagrapi.exceptions import TwoFactorRequired, UnknownError as IGUnknownError
|
21
23
|
from instagrapi.mixins.story import Story
|
22
24
|
from instagrapi.types import Media
|
23
25
|
from instagrapi.mixins.challenge import ChallengeChoice
|
24
26
|
#from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, ChallengeRequired, \
|
25
27
|
# ChallengeSelfieCaptcha, ChallengeUnknownStep, UnknownError as IGUnknownError
|
26
28
|
|
27
|
-
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, BadProxy, extract_exception_message
|
29
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, BadProxy, TOTPNotProvided, extract_exception_message
|
28
30
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
29
31
|
from warp_beacon.jobs.types import JobType
|
30
32
|
from warp_beacon.jobs.download_job import DownloadJob
|
@@ -112,9 +114,6 @@ class InstagramScraper(ScraperAbstract):
|
|
112
114
|
logging.info("Loading existing session file '%s'", self.inst_session_file)
|
113
115
|
with open(self.inst_session_file, 'r', encoding="utf-8") as f:
|
114
116
|
js = json.loads(f.read())
|
115
|
-
if "warp_timeline_cursor" in js:
|
116
|
-
self.timeline_cursor = js.get("warp_timeline_cursor", {})
|
117
|
-
del js["warp_timeline_cursor"]
|
118
117
|
self.cl.set_settings(js)
|
119
118
|
else:
|
120
119
|
self.download_hndlr(self.login)
|
@@ -122,9 +121,20 @@ class InstagramScraper(ScraperAbstract):
|
|
122
121
|
def login(self) -> None:
|
123
122
|
username = self.account["login"]
|
124
123
|
password = self.account["password"]
|
124
|
+
totp_secret = self.account.get("totp_secret")
|
125
125
|
if username and password:
|
126
|
-
|
127
|
-
|
126
|
+
try:
|
127
|
+
self.cl.login(username=username, password=password)
|
128
|
+
except TwoFactorRequired:
|
129
|
+
logging.warning("Two factor required!")
|
130
|
+
if not totp_secret:
|
131
|
+
logging.critical("Please specify TOTP secret in account config")
|
132
|
+
raise TOTPNotProvided("Please specify TOTP secret in account config")
|
133
|
+
totp = pyotp.TOTP(totp_secret)
|
134
|
+
code = totp.now()
|
135
|
+
logging.info("TOTP code for now: '%s'", code)
|
136
|
+
self.cl.login(username=username, password=password, verification_code=code)
|
137
|
+
self.safe_write_session()
|
128
138
|
|
129
139
|
def validate_session(self) -> int:
|
130
140
|
from warp_beacon.scheduler.instagram_human import InstagramHuman
|
@@ -9,14 +9,68 @@ import requests
|
|
9
9
|
from instagrapi import Client
|
10
10
|
from instagrapi.types import Media, User, Story
|
11
11
|
from instagrapi.exceptions import (
|
12
|
-
|
12
|
+
ClientError,
|
13
13
|
#ClientLoginRequired,
|
14
|
+
ClientNotFoundError,
|
14
15
|
VideoNotDownload,
|
16
|
+
MediaNotFound,
|
15
17
|
PrivateError
|
16
18
|
)
|
17
19
|
|
20
|
+
from instagrapi.extractors import (
|
21
|
+
extract_location,
|
22
|
+
#extract_media_v1,
|
23
|
+
extract_user_short,
|
24
|
+
extract_usertag,
|
25
|
+
extract_resource_v1
|
26
|
+
)
|
27
|
+
|
18
28
|
from warp_beacon.scraper.utils import ScraperUtils
|
19
29
|
|
30
|
+
def extract_media_v1(data):
|
31
|
+
"""Extract media from Private API"""
|
32
|
+
media = deepcopy(data)
|
33
|
+
if "video_versions" in media:
|
34
|
+
# Select Best Quality by Resolutiuon
|
35
|
+
media["video_url"] = sorted(
|
36
|
+
media["video_versions"], key=lambda o: o["height"] * o["width"]
|
37
|
+
)[-1]["url"]
|
38
|
+
if media["media_type"] == 2 and not media.get("product_type"):
|
39
|
+
media["product_type"] = "feed"
|
40
|
+
if "image_versions2" in media:
|
41
|
+
media["thumbnail_url"] = sorted(
|
42
|
+
media["image_versions2"]["candidates"],
|
43
|
+
key=lambda o: o["height"] * o["width"],
|
44
|
+
)[-1]["url"]
|
45
|
+
if media["media_type"] == 8:
|
46
|
+
# remove thumbnail_url and video_url for albums
|
47
|
+
# see resources
|
48
|
+
media.pop("thumbnail_url", "")
|
49
|
+
media.pop("video_url", "")
|
50
|
+
location = media.get("location")
|
51
|
+
media["location"] = location and extract_location(location)
|
52
|
+
media["user"] = extract_user_short(media.get("user"))
|
53
|
+
media["usertags"] = sorted(
|
54
|
+
[
|
55
|
+
extract_usertag(usertag)
|
56
|
+
for usertag in media.get("usertags", {}).get("in", [])
|
57
|
+
],
|
58
|
+
key=lambda tag: tag.user.pk,
|
59
|
+
)
|
60
|
+
media["like_count"] = media.get("like_count", 0)
|
61
|
+
media["has_liked"] = media.get("has_liked", False)
|
62
|
+
#media["sponsor_tags"] = [tag["sponsor"] for tag in media.get("sponsor_tags", [])]
|
63
|
+
media["sponsor_tags"] = [tag["sponsor"] for tag in media.get("sponsor_tags") or []]
|
64
|
+
media["play_count"] = media.get("play_count", 0)
|
65
|
+
media["coauthor_producers"] = media.get("coauthor_producers", [])
|
66
|
+
return Media(
|
67
|
+
caption_text=(media.get("caption") or {}).get("text", ""),
|
68
|
+
resources=[
|
69
|
+
extract_resource_v1(edge) for edge in media.get("carousel_media", [])
|
70
|
+
],
|
71
|
+
**media,
|
72
|
+
)
|
73
|
+
|
20
74
|
class WBClient(Client):
|
21
75
|
"""
|
22
76
|
patched instagrapi
|
@@ -194,6 +248,30 @@ class WBClient(Client):
|
|
194
248
|
self._medias_cache[media_pk]
|
195
249
|
) # return copy of cache (dict changes protection)
|
196
250
|
|
251
|
+
def media_info_v1(self, media_pk: str) -> Media:
|
252
|
+
"""
|
253
|
+
Get Media from PK by Private Mobile API
|
254
|
+
|
255
|
+
Parameters
|
256
|
+
----------
|
257
|
+
media_pk: str
|
258
|
+
Unique identifier of the media
|
259
|
+
|
260
|
+
Returns
|
261
|
+
-------
|
262
|
+
Media
|
263
|
+
An object of Media type
|
264
|
+
"""
|
265
|
+
try:
|
266
|
+
result = self.private_request(f"media/{media_pk}/info/")
|
267
|
+
except ClientNotFoundError as e:
|
268
|
+
raise MediaNotFound(e, media_pk=media_pk, **self.last_json)
|
269
|
+
except ClientError as e:
|
270
|
+
if "Media not found" in str(e):
|
271
|
+
raise MediaNotFound(e, media_pk=media_pk, **self.last_json)
|
272
|
+
raise e
|
273
|
+
return extract_media_v1(result["items"].pop())
|
274
|
+
|
197
275
|
def user_info_by_username(self, username: str, use_cache: bool = True) -> User:
|
198
276
|
"""
|
199
277
|
Get user object from username
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.8.
|
3
|
+
Version: 2.8.11
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -242,6 +242,7 @@ Requires-Dist: pydub
|
|
242
242
|
Requires-Dist: SpeechRecognition
|
243
243
|
Requires-Dist: playwright
|
244
244
|
Requires-Dist: fake-useragent
|
245
|
+
Requires-Dist: pyotp
|
245
246
|
Dynamic: author
|
246
247
|
Dynamic: home-page
|
247
248
|
Dynamic: license-file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|