PyPI - warp-beacon - Versions diffs - 2.6.17__tar.gz → 2.6.18__tar.gz - Mend

warp-beacon 2.6.17tar.gz → 2.6.18tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

{warp_beacon-2.6.17/warp_beacon.egg-info → warp_beacon-2.6.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warp_beacon
-Version: 2.6.17
+Version: 2.6.18
 Summary: Telegram bot for expanding external media links
 Home-page: https://github.com/sb0y/warp_beacon
 Author: Andrey Bagrintsev

warp_beacon-2.6.18/warp_beacon/__version__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ __version__ = "2.6.18"
2	+

{warp_beacon-2.6.17 → warp_beacon-2.6.18}/warp_beacon/jobs/abstract.py RENAMED Viewed

@@ -47,6 +47,8 @@ class JobSettings(TypedDict):
 	message_leftover: str
 	replay: bool
 	short_text: bool
+	scroll_content: bool
+	last_pk: int
 class AbstractJob(ABC):
 	job_id: uuid.UUID = None
@@ -86,6 +88,8 @@ class AbstractJob(ABC):
 	message_leftover: str = ""
 	replay: bool = False
 	short_text: bool = False
+	scroll_content: bool = False
+	last_pk: int = 0
 	def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
 		if kwargs:

{warp_beacon-2.6.17 → warp_beacon-2.6.18}/warp_beacon/scheduler/instagram_human.py RENAMED Viewed

@@ -16,6 +16,32 @@ class InstagramHuman(object):
 		self.scrapler = scrapler
 		self.operations_count = 0
+	def watch_content(self, media: list) -> None:
+		for m in media[:random.randint(2, 4)]:
+			try:
+				logging.info("Wathing content with pk '%d'", m.pk)
+				content = self.scrapler.cl.media_info(m.pk)
+				logging.info("Watched content with id '%d'", content.pk)
+				self.operations_count += 1
+				time.sleep(random.uniform(2, 5))
+			except Exception as e:
+				logging.warning("Exception while watching content")
+				logging.exception(e)
+	def scroll_content(self, last_pk: int) -> None:
+		if random.random() > 0.2:
+			logging.info("Starting to watch related reels with media_pk '%d'", last_pk)
+			media = self.scrapler.download_hndlr(self.scrapler.cl.reels, amount=random.randint(4, 10), last_media_pk=last_pk)
+			self.operations_count += 1
+			self.watch_content(media)
+		if random.random() > 0.3:
+			time.sleep(random.uniform(2, 5))
+			logging.info("Starting to explore reels with media_pk '%d'", last_pk)
+			media = self.scrapler.download_hndlr(self.scrapler.cl.explore_reels, amount=random.randint(4, 10), last_media_pk=last_pk)
+			self.operations_count += 1
+			self.watch_content(media)
 	def simulate_activity(self) -> None:
 		now = datetime.now()
 		hour = now.hour

{warp_beacon-2.6.17 → warp_beacon-2.6.18}/warp_beacon/scraper/__init__.py RENAMED Viewed

@@ -147,6 +147,13 @@ class AsyncDownloader(object):
 							# job retry loop
 							while self.allow_loop.value == 1:
 								try:
+									if job.scroll_content and job.last_pk and job.job_origin is Origin.INSTAGRAM:
+										logging.info("Scrolling relative content with pk '%s'", job.last_pk)
+										operations = actor.scroll_content(last_pk=job.last_pk)
+										if operations:
+											selector.inc_ig_request_count(amount=operations)
+										logging.info("Scrolling done")
+										break
 									if job.session_validation and job.job_origin in (Origin.INSTAGRAM, Origin.YOUTUBE):
 										if job.job_origin is Origin.INSTAGRAM:
 											if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="10")):
@@ -361,6 +368,12 @@ class AsyncDownloader(object):
 										)
 									else:
 										self.uploader.queue_task(upload_job)
+									# watch related reels to simulate human
+									if item.get("last_pk", 0) and "reel/" in job.url:
+										self.queue_task(DownloadJob.build(
+											scroll_content=True,
+											last_pk=int(item.get("last_pk", 0))
+										))
 						else:
 							logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
 							self.uploader.queue_task(job.to_upload_job())
@@ -384,9 +397,6 @@ class AsyncDownloader(object):
 						logging.error("Error inside download worker!")
 						logging.exception(e)
 						self.notify_task_failed(job)
-					finally:
-						if actor:
-							actor.restore_gai()
 				except Empty:
 					pass
 			except Exception as e:

{warp_beacon-2.6.17 → warp_beacon-2.6.18}/warp_beacon/scraper/abstract.py RENAMED Viewed

@@ -3,6 +3,7 @@ import pathlib
 import socket
 import requests.packages.urllib3.util.connection as urllib3_cn
+import multiprocessing
 from abc import ABC, abstractmethod
 from typing import Callable, Union
@@ -22,16 +23,19 @@ class ScraperAbstract(ABC):
 	proxy = None
 	def __init__(self, account: tuple, proxy: dict=None) -> None:
+		self._gai_lock = multiprocessing.Lock()
 		self.account_index = account[0]
 		self.account = account[1]
 		self.proxy = proxy
 		if self.account.get("force_ipv6", False):
 			self.force_ipv6()
-	def __del__(self) -> None:
-		if self.account.get("force_ipv6", False):
+		else:
 			self.restore_gai()
+	#def __del__(self) -> None:
+	#	if self.account.get("force_ipv6", False):
+	#		self.restore_gai()
 	@abstractmethod
 	def download(self, url: str) -> bool:
 		raise NotImplementedError
@@ -86,13 +90,15 @@ class ScraperAbstract(ABC):
 			if urllib3_cn.HAS_IPV6:
 				family = socket.AF_INET6 # force ipv6 only if it is available
 			return family
-		if self.original_gai_family is None:
-			self.original_gai_family = urllib3_cn.allowed_gai_family
-		logging.info("Forcing IPv6 ...")
-		urllib3_cn.allowed_gai_family = allowed_gai_family
+		with self._gai_lock:
+			if self.original_gai_family is None:
+				self.original_gai_family = urllib3_cn.allowed_gai_family
+			logging.info("Forcing IPv6 ...")
+			urllib3_cn.allowed_gai_family = allowed_gai_family
 	def restore_gai(self) -> None:
-		if self.original_gai_family:
-			logging.info("Restoring normal IP stack ...")
-			urllib3_cn.allowed_gai_family = self.original_gai_family
-			self.original_gai_family = None
+		with self._gai_lock:
+			if self.original_gai_family:
+				logging.info("Restoring normal IP stack ...")
+				urllib3_cn.allowed_gai_family = self.original_gai_family
+				self.original_gai_family = None

{warp_beacon-2.6.17 → warp_beacon-2.6.18}/warp_beacon/scraper/instagram/instagram.py RENAMED Viewed

@@ -109,6 +109,14 @@ class InstagramScraper(ScraperAbstract):
 		inst_human.simulate_activity()
 		self.safe_write_session()
 		return inst_human.operations_count
+	def scroll_content(self, last_pk: int) -> None:
+		from warp_beacon.scheduler.instagram_human import InstagramHuman
+		self.load_session()
+		inst_human = InstagramHuman(self)
+		inst_human.scroll_content(last_pk)
+		self.safe_write_session()
+		return inst_human.operations_count
 	def scrap(self, url: str) -> tuple[str]:
 		self.load_session()
@@ -208,7 +216,8 @@ class InstagramScraper(ScraperAbstract):
 		self.cl.request_timeout = int(os.environ.get("IG_REQUEST_TIMEOUT", default=60))
 		path = self.download_hndlr(self.cl.video_download_by_url, url, folder='/tmp')
 		return {"local_media_path": str(path), "canonical_name": self.extract_canonical_name(media_info), \
-			"media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration)}}
+			"media_type": JobType.VIDEO, "media_info": {"duration": round(media_info.video_duration), \
+			"last_pk": media_info.pk}}
 	def download_photo(self, url: str, media_info: Media) -> dict:
 		path = str(self.download_hndlr(self.cl.photo_download_by_url, url, folder='/tmp'))
@@ -217,7 +226,7 @@ class InstagramScraper(ScraperAbstract):
 			path = InstagramScraper.convert_webp_to_png(path)
 		if ".heic" in path_lowered:
 			path = InstagramScraper.convert_heic_to_png(path)
-		return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE}
+		return {"local_media_path": path, "canonical_name": self.extract_canonical_name(media_info), "media_type": JobType.IMAGE, "last_pk": media_info.pk}
 	def download_story(self, story_info: Story) -> dict:
 		path, media_type, media_info = "", JobType.UNKNOWN, {}
@@ -259,7 +268,7 @@ class InstagramScraper(ScraperAbstract):
 		for media_chunk in Utils.chunker(media_info.resources, 10):
 			chunk = []
 			for media in media_chunk:
-				_media_info = self.download_hndlr(self.cl.media_info, media.pk)
+				_media_info = self.download_hndlr(self.cl.media_info, media.pk, use_cache=False)
 				if media.media_type == 1: # photo
 					chunk.append(self.download_photo(url=_media_info.thumbnail_url, media_info=_media_info))
 				elif media.media_type == 2: # video
@@ -287,7 +296,7 @@ class InstagramScraper(ScraperAbstract):
 			try:
 				scrap_type, media_id = self.scrap(job.url)
 				if scrap_type == "media":
-					media_info = self.download_hndlr(self.cl.media_info, media_id)
+					media_info = self.download_hndlr(self.cl.media_info, media_id, use_cache=False)
 					logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
 					if media_info.media_type == 2 and media_info.product_type == "clips": # Reels
 						res.append(self.download_video(url=media_info.video_url, media_info=media_info))

{warp_beacon-2.6.17 → warp_beacon-2.6.18/warp_beacon.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warp_beacon
-Version: 2.6.17
+Version: 2.6.18
 Summary: Telegram bot for expanding external media links
 Home-page: https://github.com/sb0y/warp_beacon
 Author: Andrey Bagrintsev