warp-beacon 2.7.3__tar.gz → 2.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.7.3/warp_beacon.egg-info → warp_beacon-2.7.4}/PKG-INFO +1 -1
- warp_beacon-2.7.4/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/__init__.py +4 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/instagram/instagram.py +47 -11
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/uploader/__init__.py +2 -1
- {warp_beacon-2.7.3 → warp_beacon-2.7.4/warp_beacon.egg-info}/PKG-INFO +1 -1
- warp_beacon-2.7.3/warp_beacon/__version__.py +0 -2
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/LICENSE +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/MANIFEST.in +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/README.md +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/assets/cc-group-black.png +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/assets/placeholder.gif +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/etc/.gitignore +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/etc/accounts.json +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/etc/proxies.json +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/etc/warp_beacon.conf +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/pyproject.toml +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/setup.cfg +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/setup.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/jobs/abstract.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scheduler/instagram_human.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scheduler/scheduler.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/abstract.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/account_selector.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/exceptions.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/fail_handler.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/instagram/captcha.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/instagram/wb_instagrapi.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/link_resolver.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/utils.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/youtube/abstract.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/youtube/music.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/youtube/shorts.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/scraper/youtube/youtube.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/storage/mongo.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/bot.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/caption_shortener.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/download_status.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/edit_message.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/handlers.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/placeholder_message.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/progress_bar.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/progress_file_reader.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/types.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/telegram/utils.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon/yt_auth.py +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon.egg-info/SOURCES.txt +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon.egg-info/requires.txt +0 -0
- {warp_beacon-2.7.3 → warp_beacon-2.7.4}/warp_beacon.egg-info/top_level.txt +0 -0
@@ -392,6 +392,10 @@ class AsyncDownloader(object):
|
|
392
392
|
))
|
393
393
|
else:
|
394
394
|
logging.info("Scrolling in progress, ignoring request")
|
395
|
+
|
396
|
+
# report media seen
|
397
|
+
if job.job_origin is Origin.INSTAGRAM:
|
398
|
+
actor.report_seen(items)
|
395
399
|
else:
|
396
400
|
logging.info("Job already in work in parallel worker. Redirecting job to upload worker.")
|
397
401
|
self.uploader.queue_task(job.to_upload_job())
|
@@ -256,6 +256,7 @@ class InstagramScraper(ScraperAbstract):
|
|
256
256
|
"canonical_name": self.extract_canonical_name(media_info),
|
257
257
|
"media_type": JobType.VIDEO,
|
258
258
|
"last_pk": media_info.pk,
|
259
|
+
"last_id": media_info.id,
|
259
260
|
"media_info": {"duration": round(media_info.video_duration)}
|
260
261
|
}
|
261
262
|
|
@@ -270,7 +271,8 @@ class InstagramScraper(ScraperAbstract):
|
|
270
271
|
"local_media_path": self.rename_local_file(path),
|
271
272
|
"canonical_name": self.extract_canonical_name(media_info),
|
272
273
|
"media_type": JobType.IMAGE,
|
273
|
-
"last_pk": media_info.pk
|
274
|
+
"last_pk": media_info.pk,
|
275
|
+
"last_id": media_info.id
|
274
276
|
}
|
275
277
|
|
276
278
|
def download_story(self, story_info: Story) -> dict:
|
@@ -296,7 +298,14 @@ class InstagramScraper(ScraperAbstract):
|
|
296
298
|
media_type = JobType.VIDEO
|
297
299
|
media_info["duration"] = story_info.video_duration
|
298
300
|
|
299
|
-
return {
|
301
|
+
return {
|
302
|
+
"local_media_path": self.rename_local_file(path),
|
303
|
+
"media_type": media_type,
|
304
|
+
"media_info": media_info,
|
305
|
+
"effective_url": effective_url,
|
306
|
+
"last_pk": story_info.pk,
|
307
|
+
"last_id": story_info.id
|
308
|
+
}
|
300
309
|
|
301
310
|
def download_stories(self, stories: list[Story]) -> dict:
|
302
311
|
chunks = []
|
@@ -306,21 +315,30 @@ class InstagramScraper(ScraperAbstract):
|
|
306
315
|
chunk.append(self.download_story(story_info=story))
|
307
316
|
chunks.append(chunk)
|
308
317
|
|
309
|
-
return {
|
318
|
+
return {
|
319
|
+
"media_type": JobType.COLLECTION,
|
320
|
+
"save_items": True,
|
321
|
+
"items": chunks
|
322
|
+
}
|
310
323
|
|
311
324
|
def download_album(self, media_info: Media) -> dict:
|
312
325
|
chunks = []
|
313
326
|
for media_chunk in Utils.chunker(media_info.resources, 10):
|
314
327
|
chunk = []
|
315
328
|
for media in media_chunk:
|
316
|
-
_media_info = self.download_hndlr(self.cl.
|
329
|
+
_media_info = self.download_hndlr(self.cl.media_info, media.pk)
|
317
330
|
if media.media_type == 1: # photo
|
318
331
|
chunk.append(self.download_photo(url=_media_info.thumbnail_url, media_info=_media_info))
|
319
332
|
elif media.media_type == 2: # video
|
320
333
|
chunk.append(self.download_video(url=_media_info.video_url, media_info=_media_info))
|
321
334
|
chunks.append(chunk)
|
322
335
|
|
323
|
-
return {
|
336
|
+
return {
|
337
|
+
"media_type": JobType.COLLECTION,
|
338
|
+
"items": chunks,
|
339
|
+
"last_pk": media_info.pk,
|
340
|
+
"last_id": media_info.id
|
341
|
+
}
|
324
342
|
|
325
343
|
def extract_canonical_name(self, media: Media) -> str:
|
326
344
|
ret = ""
|
@@ -349,7 +367,7 @@ class InstagramScraper(ScraperAbstract):
|
|
349
367
|
# "chat_id": self.job.chat_id,
|
350
368
|
# "message_id": self.job.placeholder_message_id
|
351
369
|
#})
|
352
|
-
media_info = self.download_hndlr(self.cl.
|
370
|
+
media_info = self.download_hndlr(self.cl.media_info, media_id)
|
353
371
|
logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
|
354
372
|
if media_info.media_type == 2 and media_info.product_type in ("clips", "ad"): # Reels
|
355
373
|
res.append(self.download_video(url=media_info.video_url, media_info=media_info))
|
@@ -357,10 +375,6 @@ class InstagramScraper(ScraperAbstract):
|
|
357
375
|
res.append(self.download_photo(url=media_info.thumbnail_url, media_info=media_info))
|
358
376
|
elif media_info.media_type == 8: # Album
|
359
377
|
res.append(self.download_album(media_info=media_info))
|
360
|
-
try:
|
361
|
-
self.cl.media_seen([media_info.id])
|
362
|
-
except Exception as e:
|
363
|
-
logging.warning("Failed to mark seen with id = '%s'", media_info.id, exc_info=e)
|
364
378
|
elif scrap_type == "story":
|
365
379
|
story_info = self.cl.story_info(media_id)
|
366
380
|
logging.info("media_type for story is '%d'", story_info.media_type)
|
@@ -460,4 +474,26 @@ class InstagramScraper(ScraperAbstract):
|
|
460
474
|
"report_type": ReportType.PROGRESS
|
461
475
|
}
|
462
476
|
self.status_pipe.send(msg)
|
463
|
-
self._download_progress_threshold += 5
|
477
|
+
self._download_progress_threshold += 5
|
478
|
+
|
479
|
+
def report_seen(self, items: dict) -> None:
|
480
|
+
try:
|
481
|
+
seen = []
|
482
|
+
for item in items:
|
483
|
+
if item["media_type"] == JobType.COLLECTION:
|
484
|
+
if item.get("last_id", None):
|
485
|
+
seen.append(item["last_id"])
|
486
|
+
if item.get("items", None):
|
487
|
+
for col_item in item["items"]:
|
488
|
+
last_id = col_item.get("last_id", None)
|
489
|
+
if last_id:
|
490
|
+
seen.append(last_id)
|
491
|
+
else:
|
492
|
+
last_id = item.get("last_id", None)
|
493
|
+
if last_id:
|
494
|
+
seen.append(last_id)
|
495
|
+
|
496
|
+
if seen:
|
497
|
+
self.download_hndlr(self.cl.media_seen, seen)
|
498
|
+
except Exception as e:
|
499
|
+
logging.error("Failed to report seen media!", exc_info=e)
|
@@ -4,6 +4,7 @@ from typing import Callable
|
|
4
4
|
import asyncio
|
5
5
|
import threading
|
6
6
|
import multiprocessing
|
7
|
+
from queue import Empty
|
7
8
|
|
8
9
|
from warp_beacon.jobs.types import JobType
|
9
10
|
from warp_beacon.jobs.upload_job import UploadJob
|
@@ -179,7 +180,7 @@ class AsyncUploader(object):
|
|
179
180
|
logging.info("No callback no call!!")
|
180
181
|
except Exception as e:
|
181
182
|
logging.exception(e)
|
182
|
-
except
|
183
|
+
except Empty:
|
183
184
|
pass
|
184
185
|
except Exception as e:
|
185
186
|
logging.error("Exception occurred inside upload worker!")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|