warp-beacon 2.6.17__tar.gz → 2.6.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {warp_beacon-2.6.17/warp_beacon.egg-info → warp_beacon-2.6.19}/PKG-INFO +1 -1
  2. warp_beacon-2.6.19/warp_beacon/__version__.py +2 -0
  3. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/jobs/abstract.py +4 -0
  4. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scheduler/instagram_human.py +26 -0
  5. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/__init__.py +13 -3
  6. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/abstract.py +17 -11
  7. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/instagram/instagram.py +13 -4
  8. {warp_beacon-2.6.17 → warp_beacon-2.6.19/warp_beacon.egg-info}/PKG-INFO +1 -1
  9. warp_beacon-2.6.17/warp_beacon/__version__.py +0 -2
  10. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/LICENSE +0 -0
  11. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/MANIFEST.in +0 -0
  12. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/README.md +0 -0
  13. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/assets/placeholder.gif +0 -0
  14. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/etc/.gitignore +0 -0
  15. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/etc/accounts.json +0 -0
  16. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/etc/proxies.json +0 -0
  17. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/etc/warp_beacon.conf +0 -0
  18. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/etc/warp_beacon.service +0 -0
  19. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/pyproject.toml +0 -0
  20. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/setup.cfg +0 -0
  21. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/setup.py +0 -0
  22. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/__init__.py +0 -0
  23. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/compress/__init__.py +0 -0
  24. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/compress/video.py +0 -0
  25. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/jobs/__init__.py +0 -0
  26. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/jobs/download_job.py +0 -0
  27. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/jobs/types.py +0 -0
  28. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/jobs/upload_job.py +0 -0
  29. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/mediainfo/__init__.py +0 -0
  30. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/mediainfo/abstract.py +0 -0
  31. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/mediainfo/audio.py +0 -0
  32. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/mediainfo/silencer.py +0 -0
  33. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/mediainfo/video.py +0 -0
  34. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scheduler/__init__.py +0 -0
  35. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scheduler/scheduler.py +0 -0
  36. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/account_selector.py +0 -0
  37. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/exceptions.py +0 -0
  38. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/fail_handler.py +0 -0
  39. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/instagram/__init__.py +0 -0
  40. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/instagram/captcha.py +0 -0
  41. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/link_resolver.py +0 -0
  42. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/youtube/__init__.py +0 -0
  43. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/youtube/abstract.py +0 -0
  44. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/youtube/music.py +0 -0
  45. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/youtube/shorts.py +0 -0
  46. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/scraper/youtube/youtube.py +0 -0
  47. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/storage/__init__.py +0 -0
  48. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/storage/mongo.py +0 -0
  49. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/__init__.py +0 -0
  50. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/bot.py +0 -0
  51. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/caption_shortener.py +0 -0
  52. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/handlers.py +0 -0
  53. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/placeholder_message.py +0 -0
  54. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/telegram/utils.py +0 -0
  55. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/uploader/__init__.py +0 -0
  56. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/warp_beacon.py +0 -0
  57. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon/yt_auth.py +0 -0
  58. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon.egg-info/SOURCES.txt +0 -0
  59. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon.egg-info/dependency_links.txt +0 -0
  60. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon.egg-info/entry_points.txt +0 -0
  61. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon.egg-info/requires.txt +0 -0
  62. {warp_beacon-2.6.17 → warp_beacon-2.6.19}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.6.17
3
+ Version: 2.6.19
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -0,0 +1,2 @@
1
+ __version__ = "2.6.19"
2
+
@@ -47,6 +47,8 @@ class JobSettings(TypedDict):
47
47
  message_leftover: str
48
48
  replay: bool
49
49
  short_text: bool
50
+ scroll_content: bool
51
+ last_pk: int
50
52
 
51
53
  class AbstractJob(ABC):
52
54
  job_id: uuid.UUID = None
@@ -86,6 +88,8 @@ class AbstractJob(ABC):
86
88
  message_leftover: str = ""
87
89
  replay: bool = False
88
90
  short_text: bool = False
91
+ scroll_content: bool = False
92
+ last_pk: int = 0
89
93
 
90
94
  def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
91
95
  if kwargs:
@@ -16,6 +16,32 @@ class InstagramHuman(object):
16
16
  self.scrapler = scrapler
17
17
  self.operations_count = 0
18
18
 
19
+ def watch_content(self, media: list) -> None:
20
+ for m in media[:random.randint(2, 4)]:
21
+ try:
22
+ logging.info("Wathing content with pk '%d'", m.pk)
23
+ content = self.scrapler.cl.media_info(m.pk)
24
+ logging.info("Watched content with id '%d'", content.pk)
25
+ self.operations_count += 1
26
+ time.sleep(random.uniform(2, 5))
27
+ except Exception as e:
28
+ logging.warning("Exception while watching content")
29
+ logging.exception(e)
30
+
31
+ def scroll_content(self, last_pk: int) -> None:
32
+ if random.random() > 0.2:
33
+ logging.info("Starting to watch related reels with media_pk '%d'", last_pk)
34
+ media = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 10), last_media_pk=last_pk)
35
+ self.operations_count += 1
36
+ self.watch_content(media)
37
+
38
+ if random.random() > 0.3:
39
+ time.sleep(random.uniform(2, 5))
40
+ logging.info("Starting to explore reels with media_pk '%d'", last_pk)
41
+ media = self.scrapler.download_hndlr(self.scrapler.cl.explore_reels, amount=random.randint(4, 10), last_media_pk=last_pk)
42
+ self.operations_count += 1
43
+ self.watch_content(media)
44
+
19
45
  def simulate_activity(self) -> None:
20
46
  now = datetime.now()
21
47
  hour = now.hour
@@ -147,6 +147,13 @@ class AsyncDownloader(object):
147
147
  # job retry loop
148
148
  while self.allow_loop.value == 1:
149
149
  try:
150
+ if job.scroll_content and job.last_pk and job.job_origin is Origin.INSTAGRAM:
151
+ logging.info("Scrolling relative content with pk '%s'", job.last_pk)
152
+ operations = actor.scroll_content(last_pk=job.last_pk)
153
+ if operations:
154
+ selector.inc_ig_request_count(amount=operations)
155
+ logging.info("Scrolling done")
156
+ break
150
157
  if job.session_validation and job.job_origin in (Origin.INSTAGRAM, Origin.YOUTUBE):
151
158
  if job.job_origin is Origin.INSTAGRAM:
152
159
  if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="10")):
@@ -361,6 +368,12 @@ class AsyncDownloader(object):
361
368
  )
362
369
  else:
363
370
  self.uploader.queue_task(upload_job)
371
+ # watch related reels to simulate human
372
+ if item.get("last_pk", 0) and "reel/" in job.url:
373
+ self.queue_task(DownloadJob.build(
374
+ scroll_content=True,
375
+ last_pk=int(item.get("last_pk", 0))
376
+ ))
364
377
  else:
365
378
  logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
366
379
  self.uploader.queue_task(job.to_upload_job())
@@ -384,9 +397,6 @@ class AsyncDownloader(object):
384
397
  logging.error("Error inside download worker!")
385
398
  logging.exception(e)
386
399
  self.notify_task_failed(job)
387
- finally:
388
- if actor:
389
- actor.restore_gai()
390
400
  except Empty:
391
401
  pass
392
402
  except Exception as e:
@@ -3,6 +3,7 @@ import pathlib
3
3
 
4
4
  import socket
5
5
  import requests.packages.urllib3.util.connection as urllib3_cn
6
+ import multiprocessing
6
7
 
7
8
  from abc import ABC, abstractmethod
8
9
  from typing import Callable, Union
@@ -22,16 +23,19 @@ class ScraperAbstract(ABC):
22
23
  proxy = None
23
24
 
24
25
  def __init__(self, account: tuple, proxy: dict=None) -> None:
26
+ self._gai_lock = multiprocessing.Lock()
25
27
  self.account_index = account[0]
26
28
  self.account = account[1]
27
29
  self.proxy = proxy
28
30
  if self.account.get("force_ipv6", False):
29
31
  self.force_ipv6()
30
-
31
- def __del__(self) -> None:
32
- if self.account.get("force_ipv6", False):
32
+ else:
33
33
  self.restore_gai()
34
34
 
35
+ #def __del__(self) -> None:
36
+ # if self.account.get("force_ipv6", False):
37
+ # self.restore_gai()
38
+
35
39
  @abstractmethod
36
40
  def download(self, url: str) -> bool:
37
41
  raise NotImplementedError
@@ -86,13 +90,15 @@ class ScraperAbstract(ABC):
86
90
  if urllib3_cn.HAS_IPV6:
87
91
  family = socket.AF_INET6 # force ipv6 only if it is available
88
92
  return family
89
- if self.original_gai_family is None:
90
- self.original_gai_family = urllib3_cn.allowed_gai_family
91
- logging.info("Forcing IPv6 ...")
92
- urllib3_cn.allowed_gai_family = allowed_gai_family
93
+ with self._gai_lock:
94
+ if self.original_gai_family is None:
95
+ self.original_gai_family = urllib3_cn.allowed_gai_family
96
+ logging.info("Forcing IPv6 ...")
97
+ urllib3_cn.allowed_gai_family = allowed_gai_family
93
98
 
94
99
  def restore_gai(self) -> None:
95
- if self.original_gai_family:
96
- logging.info("Restoring normal IP stack ...")
97
- urllib3_cn.allowed_gai_family = self.original_gai_family
98
- self.original_gai_family = None
100
+ with self._gai_lock:
101
+ if self.original_gai_family:
102
+ logging.info("Restoring normal IP stack ...")
103
+ urllib3_cn.allowed_gai_family = self.original_gai_family
104
+ self.original_gai_family = None
@@ -109,6 +109,14 @@ class InstagramScraper(ScraperAbstract):
109
109
  inst_human.simulate_activity()
110
110
  self.safe_write_session()
111
111
  return inst_human.operations_count
112
+
113
+ def scroll_content(self, last_pk: int) -> None:
114
+ from warp_beacon.scheduler.instagram_human import InstagramHuman
115
+ self.load_session()
116
+ inst_human = InstagramHuman(self)
117
+ inst_human.scroll_content(last_pk)
118
+ self.safe_write_session()
119
+ return inst_human.operations_count
112
120
 
113
121
  def scrap(self, url: str) -> tuple[str]:
114
122
  self.load_session()
@@ -208,7 +216,8 @@ class InstagramScraper(ScraperAbstract):
208
216
  self.cl.request_timeout = int(os.environ.get("IG_REQUEST_TIMEOUT", default=60))
209
217
  path = self.download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
210
218
  return {"local_media_path": str(path), "canonical_name": self.extract_canonical_name(media_info), \
211
- "media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration)}}
219
+ "media_type": JobType.VIDEO, "last_pk": media_info.pk, \
220
+ "media_info": {"duration": round(media_info.video_duration)}}
212
221
 
213
222
  def download_photo(self, url: str, media_info: Media) -> dict:
214
223
  path = str(self.download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp'))
@@ -217,7 +226,7 @@ class InstagramScraper(ScraperAbstract):
217
226
  path = InstagramScraper.convert_webp_to_png(path)
218
227
  if ".heic" in path_lowered:
219
228
  path = InstagramScraper.convert_heic_to_png(path)
220
- return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE}
229
+ return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE, "last_pk": media_info.pk}
221
230
 
222
231
  def download_story(self, story_info: Story) -> dict:
223
232
  path, media_type, media_info = "", JobType.UNKNOWN, {}
@@ -259,7 +268,7 @@ class InstagramScraper(ScraperAbstract):
259
268
  for media_chunk in Utils.chunker(media_info.resources, 10):
260
269
  chunk = []
261
270
  for media in media_chunk:
262
- _media_info = self.download_hndlr(self.cl.media_info, media.pk)
271
+ _media_info = self.download_hndlr(self.cl.media_info, media.pk, use_cache=False)
263
272
  if media.media_type == 1: # photo
264
273
  chunk.append(self.download_photo(url=_media_info.thumbnail_url, media_info=_media_info))
265
274
  elif media.media_type == 2: # video
@@ -287,7 +296,7 @@ class InstagramScraper(ScraperAbstract):
287
296
  try:
288
297
  scrap_type, media_id = self.scrap(job.url)
289
298
  if scrap_type == "media":
290
- media_info = self.download_hndlr(self.cl.media_info, media_id)
299
+ media_info = self.download_hndlr(self.cl.media_info, media_id, use_cache=False)
291
300
  logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
292
301
  if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
293
302
  res.append(self.download_video(url=media_info.video_url, media_info=media_info))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warp_beacon
3
- Version: 2.6.17
3
+ Version: 2.6.19
4
4
  Summary: Telegram bot for expanding external media links
5
5
  Home-page: https://github.com/sb0y/warp_beacon
6
6
  Author: Andrey Bagrintsev
@@ -1,2 +0,0 @@
1
- __version__ = "2.6.17"
2
-
File without changes
File without changes
File without changes
File without changes
File without changes