warp-beacon 2.8.10__py3-none-any.whl → 2.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- __version__ = "2.8.10"
1
+ __version__ = "2.8.12"
2
2
 
@@ -59,6 +59,7 @@ class XScraper(XAbstract):
59
59
  'merge_output_format': 'mp4',
60
60
  'dump_single_json': True,
61
61
  'nocheckcertificate': True,
62
+ 'progress_hooks': [self.dlp_on_progress],
62
63
  }
63
64
 
64
65
  if self.proxy:
@@ -68,6 +69,16 @@ class XScraper(XAbstract):
68
69
  ydl_opts["proxy"] = proxy_dsn
69
70
 
70
71
  local_file, media_info, media_type, post_text = "", {}, XMediaType.UNKNOWN, ""
72
+ #tweet_contains_video, tweet_contains_images = False, False
73
+
74
+ #with sync_playwright() as p:
75
+ # with p.chromium.launch(headless=True) as browser:
76
+ # with browser.new_context(proxy=proxy, ignore_https_errors=True) as context:
77
+ # page = context.new_page()
78
+ # page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
79
+ # tweet_contains_video = self.tweet_contains_video(page)
80
+ # tweet_contains_images = self.tweet_contains_images(page)
81
+
71
82
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
72
83
  try:
73
84
  media_info = ydl.extract_info(url, download=False)
@@ -262,4 +273,58 @@ class XScraper(XAbstract):
262
273
  image_urls.append(src)
263
274
 
264
275
  img_urls = list(set(image_urls))
265
- return img_urls, post_text
276
+ return img_urls, post_text
277
+
278
+ def get_media_type_from_info_and_dom(self, media_info: dict, page: Page) -> XMediaType:
279
+ is_video = (
280
+ media_info.get("vcodec") != "none" or
281
+ media_info.get("ext") in {"mp4", "mov", "mkv"} or
282
+ any(
283
+ f.get("vcodec") not in (None, "none")
284
+ for f in media_info.get("formats", [])
285
+ )
286
+ )
287
+
288
+ try:
289
+ image_elements = page.query_selector_all("img")
290
+ image_urls = [
291
+ img.get_attribute("src")
292
+ for img in image_elements
293
+ if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
294
+ ]
295
+ has_images = bool(image_urls)
296
+ except Exception:
297
+ has_images = False
298
+
299
+ if is_video and has_images:
300
+ return XMediaType.MIXED
301
+ elif is_video:
302
+ return XMediaType.VIDEO
303
+ elif has_images:
304
+ return XMediaType.IMAGE
305
+
306
+ return XMediaType.UNKNOWN
307
+
308
+ def tweet_contains_video(self, page: Page) -> bool:
309
+ try:
310
+ return bool(
311
+ page.query_selector("article video") or
312
+ page.query_selector("div[data-testid='videoPlayer']") or
313
+ page.query_selector("div[aria-label='Embedded video']")
314
+ )
315
+ except Exception:
316
+ pass
317
+ return False
318
+
319
+ def tweet_contains_images(self, page: Page) -> bool:
320
+ try:
321
+ image_elements = page.query_selector_all("img")
322
+ image_urls = [
323
+ img.get_attribute("src")
324
+ for img in image_elements
325
+ if img.get_attribute("src") and "pbs.twimg.com/media" in img.get_attribute("src")
326
+ ]
327
+ return bool(image_urls)
328
+ except Exception:
329
+ pass
330
+ return False
@@ -3,4 +3,5 @@ import enum
3
3
  class XMediaType(enum.Enum):
4
4
  UNKNOWN = 0
5
5
  VIDEO = 1
6
- IMAGE = 2
6
+ IMAGE = 2
7
+ MIXED = 3
@@ -44,6 +44,9 @@ class AllAccountsFailed(ScraperError):
44
44
  class BadProxy(ScraperError):
45
45
  pass
46
46
 
47
+ class TOTPNotProvided(ScraperError):
48
+ pass
49
+
47
50
  class UnknownError(ScraperError):
48
51
  pass
49
52
 
@@ -16,15 +16,17 @@ from urllib.parse import urljoin, urlparse
16
16
  import requests
17
17
  import urllib3
18
18
 
19
+ import pyotp
20
+
19
21
  from instagrapi import exceptions
20
- from instagrapi.exceptions import UnknownError as IGUnknownError
22
+ from instagrapi.exceptions import TwoFactorRequired, UnknownError as IGUnknownError
21
23
  from instagrapi.mixins.story import Story
22
24
  from instagrapi.types import Media
23
25
  from instagrapi.mixins.challenge import ChallengeChoice
24
26
  #from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, ChallengeRequired, \
25
27
  # ChallengeSelfieCaptcha, ChallengeUnknownStep, UnknownError as IGUnknownError
26
28
 
27
- from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, BadProxy, extract_exception_message
29
+ from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, BadProxy, TOTPNotProvided, extract_exception_message
28
30
  from warp_beacon.scraper.abstract import ScraperAbstract
29
31
  from warp_beacon.jobs.types import JobType
30
32
  from warp_beacon.jobs.download_job import DownloadJob
@@ -112,9 +114,6 @@ class InstagramScraper(ScraperAbstract):
112
114
  logging.info("Loading existing session file '%s'", self.inst_session_file)
113
115
  with open(self.inst_session_file, 'r', encoding="utf-8") as f:
114
116
  js = json.loads(f.read())
115
- if "warp_timeline_cursor" in js:
116
- self.timeline_cursor = js.get("warp_timeline_cursor", {})
117
- del js["warp_timeline_cursor"]
118
117
  self.cl.set_settings(js)
119
118
  else:
120
119
  self.download_hndlr(self.login)
@@ -122,9 +121,20 @@ class InstagramScraper(ScraperAbstract):
122
121
  def login(self) -> None:
123
122
  username = self.account["login"]
124
123
  password = self.account["password"]
124
+ totp_secret = self.account.get("totp_secret")
125
125
  if username and password:
126
- self.cl.login(username=username, password=password, verification_code="")
127
- self.safe_write_session()
126
+ try:
127
+ self.cl.login(username=username, password=password)
128
+ except TwoFactorRequired:
129
+ logging.warning("Two factor required!")
130
+ if not totp_secret:
131
+ logging.critical("Please specify TOTP secret in account config")
132
+ raise TOTPNotProvided("Please specify TOTP secret in account config")
133
+ totp = pyotp.TOTP(totp_secret)
134
+ code = totp.now()
135
+ logging.info("TOTP code for now: '%s'", code)
136
+ self.cl.login(username=username, password=password, verification_code=code)
137
+ self.safe_write_session()
128
138
 
129
139
  def validate_session(self) -> int:
130
140
  from warp_beacon.scheduler.instagram_human import InstagramHuman
@@ -393,7 +403,7 @@ class InstagramScraper(ScraperAbstract):
393
403
  if os.path.exists(self.inst_session_file):
394
404
  os.unlink(self.inst_session_file)
395
405
  raise IGRateLimitOccurred("Instagram ratelimit")
396
- except (exceptions.MediaNotFound, exceptions.ClientNotFoundError, exceptions.UserNotFound) as e:
406
+ except (exceptions.MediaNotFound, exceptions.ClientNotFoundError, exceptions.UserNotFound, exceptions.StoryNotFound) as e:
397
407
  raise NotFound(extract_exception_message(e))
398
408
  except IGUnknownError as e:
399
409
  raise UnknownError(extract_exception_message(e))
@@ -9,14 +9,68 @@ import requests
9
9
  from instagrapi import Client
10
10
  from instagrapi.types import Media, User, Story
11
11
  from instagrapi.exceptions import (
12
- #ClientError,
12
+ ClientError,
13
13
  #ClientLoginRequired,
14
+ ClientNotFoundError,
14
15
  VideoNotDownload,
16
+ MediaNotFound,
15
17
  PrivateError
16
18
  )
17
19
 
20
+ from instagrapi.extractors import (
21
+ extract_location,
22
+ #extract_media_v1,
23
+ extract_user_short,
24
+ extract_usertag,
25
+ extract_resource_v1
26
+ )
27
+
18
28
  from warp_beacon.scraper.utils import ScraperUtils
19
29
 
30
+ def extract_media_v1(data):
31
+ """Extract media from Private API"""
32
+ media = deepcopy(data)
33
+ if "video_versions" in media:
34
+ # Select Best Quality by Resolutiuon
35
+ media["video_url"] = sorted(
36
+ media["video_versions"], key=lambda o: o["height"] * o["width"]
37
+ )[-1]["url"]
38
+ if media["media_type"] == 2 and not media.get("product_type"):
39
+ media["product_type"] = "feed"
40
+ if "image_versions2" in media:
41
+ media["thumbnail_url"] = sorted(
42
+ media["image_versions2"]["candidates"],
43
+ key=lambda o: o["height"] * o["width"],
44
+ )[-1]["url"]
45
+ if media["media_type"] == 8:
46
+ # remove thumbnail_url and video_url for albums
47
+ # see resources
48
+ media.pop("thumbnail_url", "")
49
+ media.pop("video_url", "")
50
+ location = media.get("location")
51
+ media["location"] = location and extract_location(location)
52
+ media["user"] = extract_user_short(media.get("user"))
53
+ media["usertags"] = sorted(
54
+ [
55
+ extract_usertag(usertag)
56
+ for usertag in media.get("usertags", {}).get("in", [])
57
+ ],
58
+ key=lambda tag: tag.user.pk,
59
+ )
60
+ media["like_count"] = media.get("like_count", 0)
61
+ media["has_liked"] = media.get("has_liked", False)
62
+ #media["sponsor_tags"] = [tag["sponsor"] for tag in media.get("sponsor_tags", [])]
63
+ media["sponsor_tags"] = [tag["sponsor"] for tag in media.get("sponsor_tags") or []]
64
+ media["play_count"] = media.get("play_count", 0)
65
+ media["coauthor_producers"] = media.get("coauthor_producers", [])
66
+ return Media(
67
+ caption_text=(media.get("caption") or {}).get("text", ""),
68
+ resources=[
69
+ extract_resource_v1(edge) for edge in media.get("carousel_media", [])
70
+ ],
71
+ **media,
72
+ )
73
+
20
74
  class WBClient(Client):
21
75
  """
22
76
  patched instagrapi
@@ -194,6 +248,30 @@ class WBClient(Client):
194
248
  self._medias_cache[media_pk]
195
249
  ) # return copy of cache (dict changes protection)
196
250
 
251
+ def media_info_v1(self, media_pk: str) -> Media:
252
+ """
253
+ Get Media from PK by Private Mobile API
254
+
255
+ Parameters
256
+ ----------
257
+ media_pk: str
258
+ Unique identifier of the media
259
+
260
+ Returns
261
+ -------
262
+ Media
263
+ An object of Media type
264
+ """
265
+ try:
266
+ result = self.private_request(f"media/{media_pk}/info/")
267
+ except ClientNotFoundError as e:
268
+ raise MediaNotFound(e, media_pk=media_pk, **self.last_json)
269
+ except ClientError as e:
270
+ if "Media not found" in str(e):
271
+ raise MediaNotFound(e, media_pk=media_pk, **self.last_json)
272
+ raise e
273
+ return extract_media_v1(result["items"].pop())
274
+
197
275
  def user_info_by_username(self, username: str, use_cache: bool = True) -> User:
198
276
  """
199
277
  Get user object from username
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.8.10
3
+ Version: 2.8.12
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -242,6 +242,7 @@ Requires-Dist: pydub
242
242
  Requires-Dist: SpeechRecognition
243
243
  Requires-Dist: playwright
244
244
  Requires-Dist: fake-useragent
245
+ Requires-Dist: pyotp
245
246
  Dynamic: author
246
247
  Dynamic: home-page
247
248
  Dynamic: license-file
@@ -4,7 +4,7 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
4
4
  var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
5
5
  var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
6
6
  warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
7
- warp_beacon/__version__.py,sha256=Ou4DLKTMIKPZuLHA1_MlWO0lsUTdjc49PjxWE4oYrZU,24
7
+ warp_beacon/__version__.py,sha256=ioCIkmzTwVXvR1lLWlgUjehGwiXaxB8kTGnfQ-3C1FA,24
8
8
  warp_beacon/warp_beacon.py,sha256=ADCR30uGXIsDrt9WoiI9Ghu2QtWs0qZIK6x3pQKM_B4,1109
9
9
  warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
10
10
  warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -25,18 +25,18 @@ warp_beacon/scheduler/scheduler.py,sha256=0u9AIr9fTBmjU1GpOwKlPuNLskeJ4A-S2uAUzb
25
25
  warp_beacon/scraper/__init__.py,sha256=o9-HQEf4yQVNtWuJN4NcLUovejiHhP_KkQ1Xf5EaQvU,20670
26
26
  warp_beacon/scraper/abstract.py,sha256=pWbaTu-gDZgi-iFjqMR_uGzPl5KLv-4gTdJ9w6cD4sk,3802
27
27
  warp_beacon/scraper/account_selector.py,sha256=n-466AiTXZ8o5cgcNkNwNiWLoi-EkLC7bHh6El1eIF8,10274
28
- warp_beacon/scraper/exceptions.py,sha256=EKwoF0oH2xZWbNU-v8DOaWK5skKwa3s1yTIBdlcfMpc,1452
28
+ warp_beacon/scraper/exceptions.py,sha256=hicAe6_0xN7Ry2gcFX4UvqPWMtF_lX2ihH1njQAaqCA,1496
29
29
  warp_beacon/scraper/fail_handler.py,sha256=5ODu4b8ndZWAcHIXrcUufsWFihetzNUoAi8IgAkreyQ,998
30
30
  warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
31
31
  warp_beacon/scraper/utils.py,sha256=AOZmDki2Pbr84IG-j_wN2UghKCiWFVDYdx6HJl0JTBs,1258
32
- warp_beacon/scraper/X/X.py,sha256=KSAXBqIbm5gMbYHYcIXjc7dx3ZW_Qkz2mV-kQ6CrrfM,7953
32
+ warp_beacon/scraper/X/X.py,sha256=3-GBXHcSmGIiDLucJWLpiuaA8EDfP0-B7ws4MiCelkE,9883
33
33
  warp_beacon/scraper/X/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  warp_beacon/scraper/X/abstract.py,sha256=pCzZPTCtn8pRbBx2SeuBUpMkEHqnOLtwLBAHYceL12Q,5475
35
- warp_beacon/scraper/X/types.py,sha256=i36Nu2cHpHCkvoeobBQC3B13Ke_N40tgCCApcm_FBFY,76
35
+ warp_beacon/scraper/X/types.py,sha256=9Y0PJo3vZ1DMQcyfqoE4y2-AQRAetVmIxQwFDZkZy30,87
36
36
  warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
38
- warp_beacon/scraper/instagram/instagram.py,sha256=5rD8kPBk6j59a7Wi1TL6W04KHX6zNO1QYMo3H1OsJHQ,18749
39
- warp_beacon/scraper/instagram/wb_instagrapi.py,sha256=TgpCjFudeEf2VN0qKgaurFcHECsAlqDc1vM4t7tMScs,9202
38
+ warp_beacon/scraper/instagram/instagram.py,sha256=uzqUCVniRa3d9uavoMAz9-9MHvYOh7n_G7UyfgzHgAk,19154
39
+ warp_beacon/scraper/instagram/wb_instagrapi.py,sha256=piPtcN3pB_obsaPX36Sdm2JxEM7d99ZUTwsd4fJY2B4,11544
40
40
  warp_beacon/scraper/youtube/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  warp_beacon/scraper/youtube/abstract.py,sha256=7CVR2fW6bpWYYKcveRddd6XlgDsfV_Pp3dqV2LpefSc,16088
42
42
  warp_beacon/scraper/youtube/music.py,sha256=5AeSBQyUgVCJT2hoBCV2WvlyuV9US09SYJhmBG_P9F8,2755
@@ -56,9 +56,9 @@ warp_beacon/telegram/progress_file_reader.py,sha256=e3equyNKlKs764AD-iE9QRsh3YDH
56
56
  warp_beacon/telegram/types.py,sha256=Kvdng6uCF1HRoqQgGW1ZYYPJoVuYkFb-LDvMBbW5Hjk,89
57
57
  warp_beacon/telegram/utils.py,sha256=zTF8VQfAWetBSjAPbmNe_Zi_LN5fAcWptJKjLaFNHaE,5073
58
58
  warp_beacon/uploader/__init__.py,sha256=1enK6qMWaTZEaK456JwaKOfvCvznHA8cjgceOsrF6Po,5732
59
- warp_beacon-2.8.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
- warp_beacon-2.8.10.dist-info/METADATA,sha256=TymhuXptNclz7M2y-swiwHmk7pFrGgD83ehUHDP1mHs,23215
61
- warp_beacon-2.8.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
- warp_beacon-2.8.10.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
63
- warp_beacon-2.8.10.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
64
- warp_beacon-2.8.10.dist-info/RECORD,,
59
+ warp_beacon-2.8.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
60
+ warp_beacon-2.8.12.dist-info/METADATA,sha256=W7U_kzv74XYVaeNArKB7Gw7UmwGBe2T4Ow6WYmUSl6Y,23236
61
+ warp_beacon-2.8.12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ warp_beacon-2.8.12.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
63
+ warp_beacon-2.8.12.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
64
+ warp_beacon-2.8.12.dist-info/RECORD,,