warp-beacon 2.6.17__py3-none-any.whl → 2.6.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- __version__ = "2.6.17"
1
+ __version__ = "2.6.18"
2
2
 
@@ -47,6 +47,8 @@ class JobSettings(TypedDict):
47
47
  message_leftover: str
48
48
  replay: bool
49
49
  short_text: bool
50
+ scroll_content: bool
51
+ last_pk: int
50
52
 
51
53
  class AbstractJob(ABC):
52
54
  job_id: uuid.UUID = None
@@ -86,6 +88,8 @@ class AbstractJob(ABC):
86
88
  message_leftover: str = ""
87
89
  replay: bool = False
88
90
  short_text: bool = False
91
+ scroll_content: bool = False
92
+ last_pk: int = 0
89
93
 
90
94
  def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
91
95
  if kwargs:
@@ -16,6 +16,32 @@ class InstagramHuman(object):
16
16
  self.scrapler = scrapler
17
17
  self.operations_count = 0
18
18
 
19
+ def watch_content(self, media: list) -> None:
20
+ for m in media[:random.randint(2, 4)]:
21
+ try:
22
+ logging.info("Wathing content with pk '%d'", m.pk)
23
+ content = self.scrapler.cl.media_info(m.pk)
24
+ logging.info("Watched content with id '%d'", content.pk)
25
+ self.operations_count += 1
26
+ time.sleep(random.uniform(2, 5))
27
+ except Exception as e:
28
+ logging.warning("Exception while watching content")
29
+ logging.exception(e)
30
+
31
+ def scroll_content(self, last_pk: int) -> None:
32
+ if random.random() > 0.2:
33
+ logging.info("Starting to watch related reels with media_pk '%d'", last_pk)
34
+ media = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 10), last_media_pk=last_pk)
35
+ self.operations_count += 1
36
+ self.watch_content(media)
37
+
38
+ if random.random() > 0.3:
39
+ time.sleep(random.uniform(2, 5))
40
+ logging.info("Starting to explore reels with media_pk '%d'", last_pk)
41
+ media = self.scrapler.download_hndlr(self.scrapler.cl.explore_reels, amount=random.randint(4, 10), last_media_pk=last_pk)
42
+ self.operations_count += 1
43
+ self.watch_content(media)
44
+
19
45
  def simulate_activity(self) -> None:
20
46
  now = datetime.now()
21
47
  hour = now.hour
@@ -147,6 +147,13 @@ class AsyncDownloader(object):
147
147
  # job retry loop
148
148
  while self.allow_loop.value == 1:
149
149
  try:
150
+ if job.scroll_content and job.last_pk and job.job_origin is Origin.INSTAGRAM:
151
+ logging.info("Scrolling relative content with pk '%s'", job.last_pk)
152
+ operations = actor.scroll_content(last_pk=job.last_pk)
153
+ if operations:
154
+ selector.inc_ig_request_count(amount=operations)
155
+ logging.info("Scrolling done")
156
+ break
150
157
  if job.session_validation and job.job_origin in (Origin.INSTAGRAM, Origin.YOUTUBE):
151
158
  if job.job_origin is Origin.INSTAGRAM:
152
159
  if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="10")):
@@ -361,6 +368,12 @@ class AsyncDownloader(object):
361
368
  )
362
369
  else:
363
370
  self.uploader.queue_task(upload_job)
371
+ # watch related reels to simulate human
372
+ if item.get("last_pk", 0) and "reel/" in job.url:
373
+ self.queue_task(DownloadJob.build(
374
+ scroll_content=True,
375
+ last_pk=int(item.get("last_pk", 0))
376
+ ))
364
377
  else:
365
378
  logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
366
379
  self.uploader.queue_task(job.to_upload_job())
@@ -384,9 +397,6 @@ class AsyncDownloader(object):
384
397
  logging.error("Error inside download worker!")
385
398
  logging.exception(e)
386
399
  self.notify_task_failed(job)
387
- finally:
388
- if actor:
389
- actor.restore_gai()
390
400
  except Empty:
391
401
  pass
392
402
  except Exception as e:
@@ -3,6 +3,7 @@ import pathlib
3
3
 
4
4
  import socket
5
5
  import requests.packages.urllib3.util.connection as urllib3_cn
6
+ import multiprocessing
6
7
 
7
8
  from abc import ABC, abstractmethod
8
9
  from typing import Callable, Union
@@ -22,16 +23,19 @@ class ScraperAbstract(ABC):
22
23
  proxy = None
23
24
 
24
25
  def __init__(self, account: tuple, proxy: dict=None) -> None:
26
+ self._gai_lock = multiprocessing.Lock()
25
27
  self.account_index = account[0]
26
28
  self.account = account[1]
27
29
  self.proxy = proxy
28
30
  if self.account.get("force_ipv6", False):
29
31
  self.force_ipv6()
30
-
31
- def __del__(self) -> None:
32
- if self.account.get("force_ipv6", False):
32
+ else:
33
33
  self.restore_gai()
34
34
 
35
+ #def __del__(self) -> None:
36
+ # if self.account.get("force_ipv6", False):
37
+ # self.restore_gai()
38
+
35
39
  @abstractmethod
36
40
  def download(self, url: str) -> bool:
37
41
  raise NotImplementedError
@@ -86,13 +90,15 @@ class ScraperAbstract(ABC):
86
90
  if urllib3_cn.HAS_IPV6:
87
91
  family = socket.AF_INET6 # force ipv6 only if it is available
88
92
  return family
89
- if self.original_gai_family is None:
90
- self.original_gai_family = urllib3_cn.allowed_gai_family
91
- logging.info("Forcing IPv6 ...")
92
- urllib3_cn.allowed_gai_family = allowed_gai_family
93
+ with self._gai_lock:
94
+ if self.original_gai_family is None:
95
+ self.original_gai_family = urllib3_cn.allowed_gai_family
96
+ logging.info("Forcing IPv6 ...")
97
+ urllib3_cn.allowed_gai_family = allowed_gai_family
93
98
 
94
99
  def restore_gai(self) -> None:
95
- if self.original_gai_family:
96
- logging.info("Restoring normal IP stack ...")
97
- urllib3_cn.allowed_gai_family = self.original_gai_family
98
- self.original_gai_family = None
100
+ with self._gai_lock:
101
+ if self.original_gai_family:
102
+ logging.info("Restoring normal IP stack ...")
103
+ urllib3_cn.allowed_gai_family = self.original_gai_family
104
+ self.original_gai_family = None
@@ -109,6 +109,14 @@ class InstagramScraper(ScraperAbstract):
109
109
  inst_human.simulate_activity()
110
110
  self.safe_write_session()
111
111
  return inst_human.operations_count
112
+
113
+ def scroll_content(self, last_pk: int) -> None:
114
+ from warp_beacon.scheduler.instagram_human import InstagramHuman
115
+ self.load_session()
116
+ inst_human = InstagramHuman(self)
117
+ inst_human.scroll_content(last_pk)
118
+ self.safe_write_session()
119
+ return inst_human.operations_count
112
120
 
113
121
  def scrap(self, url: str) -> tuple[str]:
114
122
  self.load_session()
@@ -208,7 +216,8 @@ class InstagramScraper(ScraperAbstract):
208
216
  self.cl.request_timeout = int(os.environ.get("IG_REQUEST_TIMEOUT", default=60))
209
217
  path = self.download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
210
218
  return {"local_media_path": str(path), "canonical_name": self.extract_canonical_name(media_info), \
211
- "media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration)}}
219
+ "media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration), \
220
+ "last_pk": media_info.pk}}
212
221
 
213
222
  def download_photo(self, url: str, media_info: Media) -> dict:
214
223
  path = str(self.download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp'))
@@ -217,7 +226,7 @@ class InstagramScraper(ScraperAbstract):
217
226
  path = InstagramScraper.convert_webp_to_png(path)
218
227
  if ".heic" in path_lowered:
219
228
  path = InstagramScraper.convert_heic_to_png(path)
220
- return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE}
229
+ return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE, "last_pk": media_info.pk}
221
230
 
222
231
  def download_story(self, story_info: Story) -> dict:
223
232
  path, media_type, media_info = "", JobType.UNKNOWN, {}
@@ -259,7 +268,7 @@ class InstagramScraper(ScraperAbstract):
259
268
  for media_chunk in Utils.chunker(media_info.resources, 10):
260
269
  chunk = []
261
270
  for media in media_chunk:
262
- _media_info = self.download_hndlr(self.cl.media_info, media.pk)
271
+ _media_info = self.download_hndlr(self.cl.media_info, media.pk, use_cache=False)
263
272
  if media.media_type == 1: # photo
264
273
  chunk.append(self.download_photo(url=_media_info.thumbnail_url, media_info=_media_info))
265
274
  elif media.media_type == 2: # video
@@ -287,7 +296,7 @@ class InstagramScraper(ScraperAbstract):
287
296
  try:
288
297
  scrap_type, media_id = self.scrap(job.url)
289
298
  if scrap_type == "media":
290
- media_info = self.download_hndlr(self.cl.media_info, media_id)
299
+ media_info = self.download_hndlr(self.cl.media_info, media_id, use_cache=False)
291
300
  logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
292
301
  if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
293
302
  res.append(self.download_video(url=media_info.video_url, media_info=media_info))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.6.17
3
+ Version: 2.6.18
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -4,13 +4,13 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
4
4
  var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
5
5
  var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
6
6
  warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
7
- warp_beacon/__version__.py,sha256=a1aXAI5iMpUmb-fkAT_xmK2FejqLr_zPFFJYf9nelUY,24
7
+ warp_beacon/__version__.py,sha256=Kin5y99GED8zaOzHXCgrFE-8X5r_9lqAz-PnEwlbvZ8,24
8
8
  warp_beacon/warp_beacon.py,sha256=ED43vNzdjDUJ_9qLCbri0bjWLWEJ69BENGj9i7G6AvM,342
9
9
  warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
10
10
  warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  warp_beacon/compress/video.py,sha256=_PDMVYCyzLYxHv1uZmmzGcG_8rjaZr7BTXsXTTy_oS4,2846
12
12
  warp_beacon/jobs/__init__.py,sha256=ED8_tPle4iL4kqNW0apAVkgNQtRRTnYfAJwBjO1g0JY,180
13
- warp_beacon/jobs/abstract.py,sha256=IF2bKx2DuuH_AP07YAvFxxqnuxjIwg2Glsr8TpWX6Z0,3179
13
+ warp_beacon/jobs/abstract.py,sha256=x8shgG1So1g-Yqu_uzij7yuqherJTIhVhdslOrq69Z4,3263
14
14
  warp_beacon/jobs/download_job.py,sha256=tz_4rKEqnJ9yrtHeDwPGjxoDrePrGhhjCTG5CK5rr-Q,817
15
15
  warp_beacon/jobs/types.py,sha256=Ae8zINgbs7cOcYkYoOCOACA7duyhnIGMQAJ_SJB1QRQ,176
16
16
  warp_beacon/jobs/upload_job.py,sha256=_ul4psPej1jLEs-BMcMR80GbXDSmm38jE9yoZtecclY,741
@@ -20,17 +20,17 @@ warp_beacon/mediainfo/audio.py,sha256=ous88kwQj4bDIChN5wnGil5LqTs0IQHH0d-nyrL0-Z
20
20
  warp_beacon/mediainfo/silencer.py,sha256=qxMuViOoVwUYb60uCVvqHiGrqByR1_4_rqMT-XdMkwc,1813
21
21
  warp_beacon/mediainfo/video.py,sha256=UBZrhTN5IDI-aYu6tsJEILo9nFkjHhkldGVFmvV7tEI,2480
22
22
  warp_beacon/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- warp_beacon/scheduler/instagram_human.py,sha256=buF6rJzp8gVZvHBAXEkAL-uJjm5iQkEWnfPgwM2oH8g,6276
23
+ warp_beacon/scheduler/instagram_human.py,sha256=yoOcWlQQVfvJn3nD5u_VnO9jn0e_OWRvIGY9kLjPFUk,7369
24
24
  warp_beacon/scheduler/scheduler.py,sha256=9OCh7Ta4wY_aTHGAOOZmaKXg56Ftx1N_aV1g6E3ZLKA,4941
25
- warp_beacon/scraper/__init__.py,sha256=x5z9NrK_CPXITOR9Uus3U6DvytNThvGt2yDRGlyakJo,18566
26
- warp_beacon/scraper/abstract.py,sha256=CiOyKCxVYWhPnOUpLAVIRNuHBftN6gmxqATdhjzkaS4,2852
25
+ warp_beacon/scraper/__init__.py,sha256=Pcgg6Dft77_4UM4f4jUQcmfkuf1-MdLhz6tD1swErFo,19127
26
+ warp_beacon/scraper/abstract.py,sha256=HZCglECO13XN3Yjvgt6n9JirxZ1A-8tsE8RudihC4HE,3004
27
27
  warp_beacon/scraper/account_selector.py,sha256=wm8XO020QSvJO43TG7G06a17G0lFtNdx17V8mC8vKbs,7783
28
28
  warp_beacon/scraper/exceptions.py,sha256=EKwoF0oH2xZWbNU-v8DOaWK5skKwa3s1yTIBdlcfMpc,1452
29
29
  warp_beacon/scraper/fail_handler.py,sha256=zcPK3ZVEsu6JmHYcWP7L3naTRK3gWFVRkpP84VBOtJs,964
30
30
  warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
31
31
  warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
33
- warp_beacon/scraper/instagram/instagram.py,sha256=Q08kaTYa1rdQehAdgGftDZog0LAm_nRzErjgIne6KDo,14677
33
+ warp_beacon/scraper/instagram/instagram.py,sha256=Ri0FrwnBQDayRORSF25awtv9Pxdt57MaWvTAXS5QUEo,15046
34
34
  warp_beacon/scraper/youtube/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  warp_beacon/scraper/youtube/abstract.py,sha256=QUSHR3aJjIykuRlBpGnmw3tq3m82AQeeZeWQUdJRvgQ,12538
36
36
  warp_beacon/scraper/youtube/music.py,sha256=5AeSBQyUgVCJT2hoBCV2WvlyuV9US09SYJhmBG_P9F8,2755
@@ -45,9 +45,9 @@ warp_beacon/telegram/handlers.py,sha256=uvR6TPHSqdSxigp3wR-ewiE6t3TvVcbVLVcYGwkg
45
45
  warp_beacon/telegram/placeholder_message.py,sha256=wN9-BRiyrtHG-EvXtZkGJHt2CX71munQ57ITttjt0mw,6400
46
46
  warp_beacon/telegram/utils.py,sha256=1Lq67aRylVJzbwSyvAgjPAGjJZFATkICvAj3TJGuJiM,4635
47
47
  warp_beacon/uploader/__init__.py,sha256=j3qcuKhpchseZLGzSsSiogqe6WdMbkK8d3I-ConhNRs,5687
48
- warp_beacon-2.6.17.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- warp_beacon-2.6.17.dist-info/METADATA,sha256=oWESJ8WFz9XTMXake_tV2ezRui08bIGVwi9b7m3491c,22706
50
- warp_beacon-2.6.17.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
51
- warp_beacon-2.6.17.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
52
- warp_beacon-2.6.17.dist-info/top_level.txt,sha256=4ML0-mXsezLtRXyxQUntL_ktc5HX9npTeQWzvV8kFvA,1161
53
- warp_beacon-2.6.17.dist-info/RECORD,,
48
+ warp_beacon-2.6.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ warp_beacon-2.6.18.dist-info/METADATA,sha256=NNHhNDqvlyumpcAPkopaZlIVn9o8LBnoHmoM9DyDnZE,22706
50
+ warp_beacon-2.6.18.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
51
+ warp_beacon-2.6.18.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
52
+ warp_beacon-2.6.18.dist-info/top_level.txt,sha256=4ML0-mXsezLtRXyxQUntL_ktc5HX9npTeQWzvV8kFvA,1161
53
+ warp_beacon-2.6.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5