warp-beacon 2.2.67__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.2.67/warp_beacon.egg-info → warp_beacon-2.3.0}/PKG-INFO +4 -2
- warp_beacon-2.3.0/etc/accounts.json +44 -0
- warp_beacon-2.3.0/etc/proxies.json +6 -0
- warp_beacon-2.3.0/etc/warp_beacon.conf +14 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/setup.py +4 -0
- warp_beacon-2.3.0/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/jobs/abstract.py +4 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/__init__.py +18 -11
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/abstract.py +6 -3
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/account_selector.py +20 -4
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/exceptions.py +1 -1
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/instagram/instagram.py +13 -6
- warp_beacon-2.3.0/warp_beacon/scraper/link_resolver.py +88 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/youtube/abstract.py +16 -4
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/youtube/music.py +0 -3
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/youtube/shorts.py +1 -4
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/youtube/youtube.py +3 -6
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/storage/mongo.py +1 -1
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/telegram/bot.py +3 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/telegram/handlers.py +82 -65
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/telegram/placeholder_message.py +4 -4
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/telegram/utils.py +10 -34
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/uploader/__init__.py +12 -2
- {warp_beacon-2.2.67 → warp_beacon-2.3.0/warp_beacon.egg-info}/PKG-INFO +4 -2
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon.egg-info/SOURCES.txt +2 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon.egg-info/top_level.txt +1 -0
- warp_beacon-2.2.67/etc/accounts.json +0 -72
- warp_beacon-2.2.67/etc/warp_beacon.conf +0 -15
- warp_beacon-2.2.67/warp_beacon/__version__.py +0 -2
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/LICENSE +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/MANIFEST.in +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/README.md +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/assets/placeholder.gif +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/etc/.gitignore +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/pyproject.toml +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/setup.cfg +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scheduler/scheduler.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/fail_handler.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.2.67 → warp_beacon-2.3.0}/warp_beacon.egg-info/requires.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.3.0
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -236,6 +236,8 @@ Requires-Dist: urlextract
|
|
236
236
|
Requires-Dist: pillow
|
237
237
|
Requires-Dist: pymongo
|
238
238
|
Requires-Dist: instagrapi==2.0.0
|
239
|
+
Dynamic: author
|
240
|
+
Dynamic: home-page
|
239
241
|
|
240
242
|
# warp_beacon [](https://github.com/sb0y/warp_beacon/actions/workflows/python-publish.yml) [](https://github.com/sb0y/warp_beacon/actions/workflows/docker-image.yml) [](https://github.com/sb0y/warp_beacon/actions/workflows/build-deb.yml)
|
241
243
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
{
|
2
|
+
"instagram":
|
3
|
+
[
|
4
|
+
{
|
5
|
+
"login": "ig_login",
|
6
|
+
"password": "ig_pass",
|
7
|
+
"imap_server": "gmail.com",
|
8
|
+
"imap_login": "your@mail.login",
|
9
|
+
"imap_password": "your_mail_pass",
|
10
|
+
"auth_details":
|
11
|
+
{
|
12
|
+
"delay_range": [1, 3],
|
13
|
+
"country_code": 7,
|
14
|
+
"locale": "en_US",
|
15
|
+
"timezone_offset": 10800,
|
16
|
+
"user_agent": "Barcelona 291.0.0.31.111 Android (33/13; 600dpi; 1440x3044; samsung; SM-G998B; p3s; exynos2100; en_US; 493450264)",
|
17
|
+
"device":
|
18
|
+
{
|
19
|
+
"app_version": "291.0.0.31.111",
|
20
|
+
"android_version": 33,
|
21
|
+
"android_release": "13.0.0",
|
22
|
+
"dpi": "600dpi",
|
23
|
+
"resolution": "1440x3044",
|
24
|
+
"manufacturer": "Samsung",
|
25
|
+
"device": "p3s",
|
26
|
+
"model": "SM-G998B",
|
27
|
+
"cpu": "exynos2100",
|
28
|
+
"version_code": "493450264"
|
29
|
+
}
|
30
|
+
},
|
31
|
+
"account_admins": "@BelisariusCawl",
|
32
|
+
"proxy_id": "ipv6_proxy",
|
33
|
+
"force_ipv6": true
|
34
|
+
}
|
35
|
+
],
|
36
|
+
"youtube":
|
37
|
+
[
|
38
|
+
{
|
39
|
+
"account_admins": "@BelisariusCawl",
|
40
|
+
"force_ipv6": true,
|
41
|
+
"proxy_id": "ipv6_proxy"
|
42
|
+
}
|
43
|
+
]
|
44
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
TG_TOKEN="your_tg_token"
|
2
|
+
TG_BOT_NAME="your_bot_name"
|
3
|
+
TG_BOT_ADMIN_USERNAME="@BelisariusCawl"
|
4
|
+
TG_API_ID="your_api_id"
|
5
|
+
TG_API_HASH="your_api_has"
|
6
|
+
TG_BOT_ADMIN_USERNAME="@BelisariusCawl"
|
7
|
+
MONGODB_HOST="mongodb"
|
8
|
+
MONGODB_PORT="27017"
|
9
|
+
MONGODB_USER="root"
|
10
|
+
MONGODB_PASSWORD="changeme"
|
11
|
+
ENABLE_DONATES=true
|
12
|
+
SERVICE_ACCOUNTS_FILE=/var/warp_beacon/accounts.json
|
13
|
+
PROXY_FILE=/var/warp_beacon/proxies.json
|
14
|
+
FORCE_IPV6=true
|
@@ -83,6 +83,7 @@ setup(
|
|
83
83
|
"warp_beacon/scraper/youtube/shorts",
|
84
84
|
"warp_beacon/scraper/youtube/music",
|
85
85
|
"warp_beacon/scraper/fail_handler",
|
86
|
+
"warp_beacon/scraper/link_resolver",
|
86
87
|
"warp_beacon/storage/mongo"
|
87
88
|
],
|
88
89
|
#scripts=['scripts/wait_dc_update.py'],
|
@@ -98,6 +99,9 @@ setup(
|
|
98
99
|
),
|
99
100
|
("/var/warp_beacon/",
|
100
101
|
["etc/accounts.json"]
|
102
|
+
),
|
103
|
+
("/var/warp_beacon/",
|
104
|
+
["etc/proxies.json"]
|
101
105
|
)
|
102
106
|
],
|
103
107
|
|
@@ -42,6 +42,8 @@ class JobSettings(TypedDict):
|
|
42
42
|
chat_type: ChatType
|
43
43
|
account_admins: str
|
44
44
|
job_postponed_until: int
|
45
|
+
message_leftover: str
|
46
|
+
replay: bool
|
45
47
|
|
46
48
|
class AbstractJob(ABC):
|
47
49
|
job_id: uuid.UUID = None
|
@@ -76,6 +78,8 @@ class AbstractJob(ABC):
|
|
76
78
|
chat_type: ChatType = None
|
77
79
|
account_admins: str = None
|
78
80
|
job_postponed_until: int = -1
|
81
|
+
message_leftover: str = ""
|
82
|
+
replay: bool = False
|
79
83
|
|
80
84
|
def __init__(self, **kwargs: Unpack[JobSettings]) -> None:
|
81
85
|
if kwargs:
|
@@ -5,7 +5,9 @@ from typing import Optional
|
|
5
5
|
import multiprocessing
|
6
6
|
from queue import Empty
|
7
7
|
|
8
|
-
|
8
|
+
import logging
|
9
|
+
|
10
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YoutubeLiveError, YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed
|
9
11
|
from warp_beacon.mediainfo.video import VideoInfo
|
10
12
|
from warp_beacon.mediainfo.audio import AudioInfo
|
11
13
|
from warp_beacon.mediainfo.silencer import Silencer
|
@@ -18,10 +20,10 @@ from warp_beacon.jobs.types import JobType
|
|
18
20
|
from warp_beacon.scraper.account_selector import AccountSelector
|
19
21
|
from warp_beacon.storage.mongo import DBClient
|
20
22
|
from warp_beacon.scraper.fail_handler import FailHandler
|
21
|
-
|
22
|
-
import logging
|
23
|
+
from warp_beacon.scraper.link_resolver import LinkResolver
|
23
24
|
|
24
25
|
ACC_FILE = os.environ.get("SERVICE_ACCOUNTS_FILE", default="/var/warp_beacon/accounts.json")
|
26
|
+
PROXY_FILE = os.environ.get("PROXY_FILE", default="/var/warp_beacon/proxies.json")
|
25
27
|
|
26
28
|
class AsyncDownloader(object):
|
27
29
|
__JOE_BIDEN_WAKEUP = None
|
@@ -38,7 +40,7 @@ class AsyncDownloader(object):
|
|
38
40
|
def __init__(self, uploader: AsyncUploader, workers_count: int) -> None:
|
39
41
|
self.manager = multiprocessing.Manager()
|
40
42
|
self.allow_loop = self.manager.Value('i', 1)
|
41
|
-
self.acc_selector = AccountSelector(self.manager, ACC_FILE)
|
43
|
+
self.acc_selector = AccountSelector(self.manager, ACC_FILE, PROXY_FILE)
|
42
44
|
self.uploader = uploader
|
43
45
|
self.workers_count = workers_count
|
44
46
|
|
@@ -93,13 +95,18 @@ class AsyncDownloader(object):
|
|
93
95
|
try:
|
94
96
|
job = self.job_queue.get()
|
95
97
|
if job is self.__JOE_BIDEN_WAKEUP:
|
96
|
-
|
98
|
+
break
|
97
99
|
actor = None
|
98
100
|
try:
|
99
101
|
items = []
|
100
102
|
if job.job_origin is Origin.UNKNOWN:
|
101
103
|
logging.warning("Unknown task origin! Skipping.")
|
102
104
|
continue
|
105
|
+
if LinkResolver.resolve_job(job):
|
106
|
+
self.uploader.queue_task(job.to_upload_job(
|
107
|
+
replay=True
|
108
|
+
))
|
109
|
+
continue
|
103
110
|
if not job.in_process:
|
104
111
|
if job.job_postponed_until > 0:
|
105
112
|
if (job.job_postponed_until - time.time()) > 0:
|
@@ -111,16 +118,16 @@ class AsyncDownloader(object):
|
|
111
118
|
self.acc_selector.set_module(job.job_origin)
|
112
119
|
if job.job_origin is Origin.INSTAGRAM:
|
113
120
|
from warp_beacon.scraper.instagram.instagram import InstagramScraper
|
114
|
-
actor = InstagramScraper(selector.get_current())
|
121
|
+
actor = InstagramScraper(selector.get_current(), selector.get_account_proxy())
|
115
122
|
elif job.job_origin is Origin.YT_SHORTS:
|
116
123
|
from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
|
117
|
-
actor = YoutubeShortsScraper(selector.get_current())
|
124
|
+
actor = YoutubeShortsScraper(selector.get_current(), selector.get_account_proxy())
|
118
125
|
elif job.job_origin is Origin.YT_MUSIC:
|
119
126
|
from warp_beacon.scraper.youtube.music import YoutubeMusicScraper
|
120
|
-
actor = YoutubeMusicScraper(selector.get_current())
|
127
|
+
actor = YoutubeMusicScraper(selector.get_current(), selector.get_account_proxy())
|
121
128
|
elif job.job_origin is Origin.YOUTUBE:
|
122
129
|
from warp_beacon.scraper.youtube.youtube import YoutubeScraper
|
123
|
-
actor = YoutubeScraper(selector.get_current())
|
130
|
+
actor = YoutubeScraper(selector.get_current(), selector.get_account_proxy())
|
124
131
|
actor.send_message_to_admin_func = self.send_message_to_admin
|
125
132
|
actor.auth_event = self.auth_event
|
126
133
|
while True:
|
@@ -131,7 +138,7 @@ class AsyncDownloader(object):
|
|
131
138
|
logging.info("done")
|
132
139
|
else:
|
133
140
|
logging.info("Downloading URL '%s'", job.url)
|
134
|
-
items = actor.download(job
|
141
|
+
items = actor.download(job)
|
135
142
|
break
|
136
143
|
except NotFound as e:
|
137
144
|
logging.warning("Not found error occurred!")
|
@@ -196,7 +203,7 @@ class AsyncDownloader(object):
|
|
196
203
|
self.try_next_account(selector, job, report_error="captcha")
|
197
204
|
self.job_queue.put(job)
|
198
205
|
break
|
199
|
-
except
|
206
|
+
except YoutubeLiveError as e:
|
200
207
|
logging.warning("Youtube Live videos are not supported. Skipping.")
|
201
208
|
logging.exception(e)
|
202
209
|
self.uploader.queue_task(job.to_upload_job(
|
@@ -18,15 +18,18 @@ class ScraperAbstract(ABC):
|
|
18
18
|
auth_event = None
|
19
19
|
account = None
|
20
20
|
account_index = 0
|
21
|
+
proxy = None
|
21
22
|
|
22
|
-
|
23
|
+
|
24
|
+
def __init__(self, account: tuple, proxy: dict=None) -> None:
|
23
25
|
self.account_index = account[0]
|
24
26
|
self.account = account[1]
|
25
|
-
|
27
|
+
self.proxy = proxy
|
28
|
+
if self.account.get("force_ipv6", False):
|
26
29
|
self.force_ipv6()
|
27
30
|
|
28
31
|
def __del__(self) -> None:
|
29
|
-
if
|
32
|
+
if self.account.get("force_ipv6", False):
|
30
33
|
self.restore_gai()
|
31
34
|
|
32
35
|
@abstractmethod
|
@@ -1,9 +1,10 @@
|
|
1
|
-
import multiprocessing.managers
|
2
1
|
import os
|
3
2
|
import json
|
4
3
|
import re
|
4
|
+
from typing import Optional
|
5
5
|
|
6
6
|
import multiprocessing
|
7
|
+
import multiprocessing.managers
|
7
8
|
|
8
9
|
from warp_beacon.jobs import Origin
|
9
10
|
|
@@ -11,6 +12,7 @@ import logging
|
|
11
12
|
|
12
13
|
class AccountSelector(object):
|
13
14
|
accounts = []
|
15
|
+
proxies = []
|
14
16
|
current = None
|
15
17
|
current_module_name = None
|
16
18
|
accounts_meta_data = None
|
@@ -18,7 +20,7 @@ class AccountSelector(object):
|
|
18
20
|
manager = None
|
19
21
|
account_index = {}
|
20
22
|
|
21
|
-
def __init__(self, manager: multiprocessing.managers.SyncManager, acc_file_path: str) -> None:
|
23
|
+
def __init__(self, manager: multiprocessing.managers.SyncManager, acc_file_path: str, proxy_file_path: str=None) -> None:
|
22
24
|
self.manager = manager
|
23
25
|
self.accounts_meta_data = self.manager.dict()
|
24
26
|
if os.path.exists(acc_file_path):
|
@@ -29,14 +31,28 @@ class AccountSelector(object):
|
|
29
31
|
#self.load_yt_sessions()
|
30
32
|
for acc_type, _ in self.accounts.items():
|
31
33
|
self.account_index[acc_type] = self.manager.Value('i', 0)
|
34
|
+
if proxy_file_path:
|
35
|
+
with open(proxy_file_path, 'r', encoding="utf-8") as f:
|
36
|
+
self.proxies = json.loads(f.read())
|
32
37
|
else:
|
33
38
|
raise ValueError("Accounts file not found")
|
34
39
|
|
35
40
|
def __del__(self) -> None:
|
36
41
|
pass
|
37
42
|
|
38
|
-
|
39
|
-
|
43
|
+
def get_account_proxy(self) -> Optional[dict]:
|
44
|
+
if self.proxies:
|
45
|
+
try:
|
46
|
+
current_acc_pid = self.get_current()[1].get("proxy_id", "").strip()
|
47
|
+
for proxy in self.proxies:
|
48
|
+
pid = proxy.get("id", "").strip()
|
49
|
+
if pid and current_acc_pid and pid == current_acc_pid:
|
50
|
+
logging.info("Account proxy matched '%s'", proxy)
|
51
|
+
return proxy
|
52
|
+
except Exception as e:
|
53
|
+
logging.warning("Error on selecting account proxy!")
|
54
|
+
logging.exception(e)
|
55
|
+
return None
|
40
56
|
|
41
57
|
def load_yt_sessions(self) -> None:
|
42
58
|
if "youtube" not in self.accounts:
|
@@ -17,11 +17,13 @@ from instagrapi.mixins.story import Story
|
|
17
17
|
#from instagrapi.types import Media
|
18
18
|
from instagrapi import Client
|
19
19
|
from instagrapi.mixins.challenge import ChallengeChoice
|
20
|
-
from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, ChallengeRequired,
|
20
|
+
from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, ChallengeRequired, \
|
21
|
+
ChallengeSelfieCaptcha, ChallengeUnknownStep, UnknownError as IGUnknownError
|
21
22
|
|
22
23
|
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, extract_exception_message
|
23
24
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
24
25
|
from warp_beacon.jobs.types import JobType
|
26
|
+
from warp_beacon.jobs.download_job import DownloadJob
|
25
27
|
from warp_beacon.telegram.utils import Utils
|
26
28
|
|
27
29
|
import logging
|
@@ -32,11 +34,16 @@ class InstagramScraper(ScraperAbstract):
|
|
32
34
|
cl = None
|
33
35
|
inst_session_file = ""
|
34
36
|
|
35
|
-
def __init__(self, account: tuple) -> None:
|
36
|
-
super().__init__(account)
|
37
|
+
def __init__(self, account: tuple, proxy: dict=None) -> None:
|
38
|
+
super().__init__(account, proxy)
|
37
39
|
#
|
38
40
|
self.inst_session_file = INST_SESSION_FILE_TPL % self.account_index
|
39
41
|
self.cl = Client()
|
42
|
+
if self.proxy:
|
43
|
+
proxy_dsn = self.proxy.get("dsn", "")
|
44
|
+
if proxy_dsn:
|
45
|
+
self.cl.set_proxy(proxy_dsn)
|
46
|
+
logging.info("Using proxy DSN '%s'", proxy_dsn)
|
40
47
|
#self.cl.logger.setLevel(logging.DEBUG)
|
41
48
|
self.setup_device()
|
42
49
|
self.cl.challenge_code_handler = self.challenge_code_handler
|
@@ -139,7 +146,7 @@ class InstagramScraper(ScraperAbstract):
|
|
139
146
|
try:
|
140
147
|
ret_val = func(*args, **kwargs)
|
141
148
|
break
|
142
|
-
except (ChallengeRequired, ChallengeSelfieCaptcha) as e:
|
149
|
+
except (ChallengeRequired, ChallengeSelfieCaptcha, ChallengeUnknownStep) as e:
|
143
150
|
logging.warning("Instagram wants Challange!")
|
144
151
|
logging.exception(e)
|
145
152
|
raise CaptchaIssue("a captcha issue arose")
|
@@ -235,11 +242,11 @@ class InstagramScraper(ScraperAbstract):
|
|
235
242
|
|
236
243
|
return {"media_type": JobType.COLLECTION, "items": chunks}
|
237
244
|
|
238
|
-
def download(self,
|
245
|
+
def download(self, job: DownloadJob) -> Optional[list[dict]]:
|
239
246
|
res = []
|
240
247
|
while True:
|
241
248
|
try:
|
242
|
-
scrap_type, media_id = self.scrap(url)
|
249
|
+
scrap_type, media_id = self.scrap(job.url)
|
243
250
|
if scrap_type == "media":
|
244
251
|
media_info = self._download_hndlr(self.cl.media_info, media_id)
|
245
252
|
logging.info("media_type is '%d', product_type is '%s'", media_info.media_type, media_info.product_type)
|
@@ -0,0 +1,88 @@
|
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
import logging
|
4
|
+
import requests
|
5
|
+
from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
|
6
|
+
|
7
|
+
from warp_beacon.jobs import Origin
|
8
|
+
from warp_beacon.jobs.download_job import DownloadJob
|
9
|
+
|
10
|
+
class LinkResolver(object):
|
11
|
+
"Resolve short links"
|
12
|
+
canonical_link_resolve_re = re.compile(r'<link.*rel="canonical".*href="([^"]+)"\s*/?>')
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def extract_youtu_be_link(url: str) -> str:
|
16
|
+
try:
|
17
|
+
response = requests.get(
|
18
|
+
url=url,
|
19
|
+
allow_redirects=False,
|
20
|
+
timeout=int(os.environ.get("REQUESTS_TIMEOUT", default=60))
|
21
|
+
)
|
22
|
+
logging.info("Converted URL '%s' to '%s'", url, response.headers["Location"])
|
23
|
+
return response.headers["Location"]
|
24
|
+
except Exception as e:
|
25
|
+
logging.error("Failed to extract YouTube link!")
|
26
|
+
logging.exception(e)
|
27
|
+
|
28
|
+
return ''
|
29
|
+
|
30
|
+
@staticmethod
|
31
|
+
def extract_youtu_be_link_local(url: str) -> str:
|
32
|
+
try:
|
33
|
+
parsed = urlparse(url)
|
34
|
+
video_id = parsed.path.split('/')[-1] if parsed.path else ''
|
35
|
+
query_params = parse_qsl(parsed.query)
|
36
|
+
query_params.append(('v', video_id))
|
37
|
+
query_params.append(('feature', 'youtu.be'))
|
38
|
+
|
39
|
+
new_url = urlunparse((
|
40
|
+
parsed.scheme,
|
41
|
+
'www.youtube.com',
|
42
|
+
'/watch',
|
43
|
+
'', # path parameters
|
44
|
+
urlencode(query_params),
|
45
|
+
'' # archor
|
46
|
+
))
|
47
|
+
logging.info("Locally converted URL '%s' to '%s'", url, new_url)
|
48
|
+
return new_url
|
49
|
+
except Exception as e:
|
50
|
+
logging.error("Failed to extract YouTube link!")
|
51
|
+
logging.exception(e)
|
52
|
+
|
53
|
+
return ''
|
54
|
+
|
55
|
+
@staticmethod
|
56
|
+
def resolve_ig_share_link(url: str) -> str:
|
57
|
+
'''
|
58
|
+
expected url: https://www.instagram.com/share/reel/BAHtk2AamB
|
59
|
+
result url: https://www.instagram.com/reel/DAKjQgUNzuH/
|
60
|
+
'''
|
61
|
+
try:
|
62
|
+
content = requests.get(
|
63
|
+
url,
|
64
|
+
timeout=int(os.environ.get("REQUESTS_TIMEOUT", default=60)
|
65
|
+
)).text
|
66
|
+
res = re.search(LinkResolver.canonical_link_resolve_re, content)
|
67
|
+
new_url = res.group(1).strip()
|
68
|
+
logging.info("Converted IG share '%s' link to '%s'", url, new_url)
|
69
|
+
return new_url
|
70
|
+
except Exception as e:
|
71
|
+
logging.error("Failed to convert IG share link!")
|
72
|
+
logging.exception(e)
|
73
|
+
|
74
|
+
return url
|
75
|
+
|
76
|
+
@staticmethod
|
77
|
+
def resolve_job(job: DownloadJob) -> bool:
|
78
|
+
ret = False
|
79
|
+
if job.job_origin is Origin.YOUTU_BE:
|
80
|
+
job.url = LinkResolver.extract_youtu_be_link(job.url)
|
81
|
+
job.job_origin = Origin.YOUTUBE
|
82
|
+
ret = True
|
83
|
+
if job.job_origin is Origin.INSTAGRAM:
|
84
|
+
if "share/" in job.url:
|
85
|
+
job.url = LinkResolver.resolve_ig_share_link(job.url)
|
86
|
+
ret = True
|
87
|
+
|
88
|
+
return ret
|
@@ -13,6 +13,7 @@ import http.client
|
|
13
13
|
from PIL import Image
|
14
14
|
import numpy as np
|
15
15
|
|
16
|
+
from warp_beacon.jobs.download_job import DownloadJob
|
16
17
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
17
18
|
#from warp_beacon.mediainfo.abstract import MediaInfoAbstract
|
18
19
|
from warp_beacon.scraper.exceptions import TimeOut, Unavailable, extract_exception_message
|
@@ -80,11 +81,11 @@ class YoutubeAbstract(ScraperAbstract):
|
|
80
81
|
DOWNLOAD_DIR = "/tmp"
|
81
82
|
YT_SESSION_FILE = '/var/warp_beacon/yt_session_%d.json'
|
82
83
|
|
83
|
-
def __init__(self, account: tuple) -> None:
|
84
|
-
|
84
|
+
#def __init__(self, account: tuple, proxy: dict=None) -> None:
|
85
|
+
# super().__init__(account, proxy)
|
85
86
|
|
86
|
-
def __del__(self) -> None:
|
87
|
-
|
87
|
+
#def __del__(self) -> None:
|
88
|
+
# pass
|
88
89
|
|
89
90
|
def rename_local_file(self, filename: str) -> str:
|
90
91
|
if not os.path.exists(filename):
|
@@ -235,4 +236,15 @@ class YoutubeAbstract(ScraperAbstract):
|
|
235
236
|
yt_opts["use_oauth"] = True
|
236
237
|
yt_opts["allow_oauth_cache"] = True
|
237
238
|
yt_opts["token_file"] = self.YT_SESSION_FILE % self.account_index
|
239
|
+
if self.proxy:
|
240
|
+
proxy_dsn = self.proxy.get("dsn", "")
|
241
|
+
if proxy_dsn:
|
242
|
+
logging.info("Using proxy DSN '%s'", proxy_dsn)
|
243
|
+
yt_opts["proxies"] = {"http": proxy_dsn, "https": proxy_dsn}
|
238
244
|
return YouTube(**yt_opts)
|
245
|
+
|
246
|
+
def _download(self, url: str) -> list:
|
247
|
+
raise NotImplementedError("Implement _download method")
|
248
|
+
|
249
|
+
def download(self, job: DownloadJob) -> list:
|
250
|
+
return self._download_hndlr(self._download, job.url)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from warp_beacon.jobs.types import JobType
|
2
2
|
from warp_beacon.scraper.youtube.abstract import YoutubeAbstract
|
3
|
-
from warp_beacon.scraper.exceptions import
|
3
|
+
from warp_beacon.scraper.exceptions import YoutubeLiveError, NotFound, YotubeAgeRestrictedError
|
4
4
|
|
5
5
|
from pytubefix.exceptions import AgeRestrictedError
|
6
6
|
|
@@ -35,7 +35,7 @@ class YoutubeScraper(YoutubeAbstract):
|
|
35
35
|
yt = self.build_yt(url)
|
36
36
|
|
37
37
|
if self.is_live(yt.initial_data):
|
38
|
-
raise
|
38
|
+
raise YoutubeLiveError("Youtube Live is not supported")
|
39
39
|
|
40
40
|
if yt:
|
41
41
|
thumbnail = self._download_hndlr(self.download_thumbnail, yt.video_id)
|
@@ -65,7 +65,4 @@ class YoutubeScraper(YoutubeAbstract):
|
|
65
65
|
except AgeRestrictedError as e:
|
66
66
|
raise YotubeAgeRestrictedError("Youtube Age Restricted error")
|
67
67
|
|
68
|
-
return res
|
69
|
-
|
70
|
-
def download(self, url: str) -> list:
|
71
|
-
return self._download_hndlr(self._download, url)
|
68
|
+
return res
|
@@ -7,7 +7,7 @@ class DBClient(object):
|
|
7
7
|
|
8
8
|
def __init__(self) -> None:
|
9
9
|
self.client = MongoClient(
|
10
|
-
host=os.environ.get("MONGODB_HOST", default='127.0.0.1'),
|
10
|
+
host=os.environ.get("MONGODB_HOST", default='127.0.0.1'),
|
11
11
|
port=int(os.environ.get("MONGODB_PORT", default=27017)),
|
12
12
|
username=os.environ.get("MONGODB_USER", default='root'),
|
13
13
|
password=os.environ.get("MONGODB_PASSWORD", default="changeme")
|
@@ -163,6 +163,9 @@ class Bot(object):
|
|
163
163
|
if job.chat_type in (ChatType.GROUP, ChatType.SUPERGROUP):
|
164
164
|
if job.canonical_name:
|
165
165
|
caption += "\n—\n"
|
166
|
+
if job.message_leftover:
|
167
|
+
caption += job.message_leftover
|
168
|
+
caption += "\n—\n"
|
166
169
|
if job.source_username:
|
167
170
|
caption += f"Requested by <b>@{job.source_username}</b>"
|
168
171
|
if job.source_username and job.url:
|
@@ -11,6 +11,7 @@ from warp_beacon.jobs.download_job import DownloadJob
|
|
11
11
|
from warp_beacon.jobs.upload_job import UploadJob
|
12
12
|
from warp_beacon.jobs import Origin
|
13
13
|
from warp_beacon.jobs.types import JobType
|
14
|
+
from warp_beacon.scraper.link_resolver import LinkResolver
|
14
15
|
|
15
16
|
import logging
|
16
17
|
|
@@ -58,6 +59,73 @@ class Handlers(object):
|
|
58
59
|
"Currently supported: Instagram, YouTube Shorts and YouTube Music."
|
59
60
|
)
|
60
61
|
|
62
|
+
async def upload_wrapper(self, job: UploadJob) -> None:
|
63
|
+
try:
|
64
|
+
if job.replay:
|
65
|
+
logging.info("Replaying job with URL: '%s'", job.url)
|
66
|
+
return await self.queue_job(job.to_download_job(replay=False))
|
67
|
+
|
68
|
+
if job.job_failed and job.job_failed_msg:
|
69
|
+
if job.placeholder_message_id:
|
70
|
+
await self.bot.placeholder.remove(job.chat_id, job.placeholder_message_id)
|
71
|
+
return await self.bot.send_text(chat_id=job.chat_id, text=job.job_failed_msg, reply_id=job.message_id)
|
72
|
+
|
73
|
+
if job.job_warning and job.job_warning_msg:
|
74
|
+
return await self.bot.placeholder.update_text(job.chat_id, job.placeholder_message_id, job.job_warning_msg)
|
75
|
+
|
76
|
+
tg_file_ids = await self.bot.upload_job(job)
|
77
|
+
if tg_file_ids:
|
78
|
+
if job.media_type == JobType.COLLECTION and job.save_items:
|
79
|
+
for chunk in job.media_collection:
|
80
|
+
for i in chunk:
|
81
|
+
self.storage.add_media(
|
82
|
+
tg_file_ids=[i.tg_file_id],
|
83
|
+
media_url=i.effective_url,
|
84
|
+
media_type=i.media_type.value,
|
85
|
+
origin=job.job_origin.value,
|
86
|
+
canonical_name=job.canonical_name
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
self.storage.add_media(
|
90
|
+
tg_file_ids=[','.join(tg_file_ids)],
|
91
|
+
media_url=job.url,
|
92
|
+
media_type=job.media_type.value,
|
93
|
+
origin=job.job_origin.value,
|
94
|
+
canonical_name=job.canonical_name
|
95
|
+
)
|
96
|
+
except Exception as e:
|
97
|
+
logging.error("Exception occurred while performing upload callback!")
|
98
|
+
logging.exception(e)
|
99
|
+
|
100
|
+
async def queue_job(self, job: DownloadJob) -> bool:
|
101
|
+
try:
|
102
|
+
# create placeholder message for long download
|
103
|
+
if not job.placeholder_message_id:
|
104
|
+
job.placeholder_message_id = await self.bot.placeholder.create(
|
105
|
+
chat_id=job.chat_id,
|
106
|
+
reply_id=job.message_id
|
107
|
+
)
|
108
|
+
|
109
|
+
if not job.placeholder_message_id:
|
110
|
+
return await self.bot.send_text(
|
111
|
+
chat_id=job.chat_id,
|
112
|
+
reply_id=job.message_id,
|
113
|
+
text="Failed to create message placeholder. Please check your bot Internet connection."
|
114
|
+
)
|
115
|
+
|
116
|
+
self.bot.uploader.add_callback(
|
117
|
+
job.placeholder_message_id,
|
118
|
+
self.upload_wrapper
|
119
|
+
)
|
120
|
+
|
121
|
+
self.bot.downloader.queue_task(job)
|
122
|
+
except Exception as e:
|
123
|
+
logging.error("Failed to schedule download task!")
|
124
|
+
logging.exception(e)
|
125
|
+
return False
|
126
|
+
|
127
|
+
return True
|
128
|
+
|
61
129
|
async def handler(self, client: Client, message: Message) -> None:
|
62
130
|
if message is None:
|
63
131
|
return
|
@@ -66,7 +134,12 @@ class Handlers(object):
|
|
66
134
|
return
|
67
135
|
chat = message.chat
|
68
136
|
effective_message_id = message.id
|
69
|
-
|
137
|
+
urls_raw = self.url_extractor.find_urls(message_text)
|
138
|
+
urls, msg_leftover = [], ''
|
139
|
+
if urls_raw:
|
140
|
+
msg_leftover = Utils.compute_leftover(urls_raw, message_text)
|
141
|
+
# remove duplicates
|
142
|
+
urls = list(set(urls_raw))
|
70
143
|
|
71
144
|
reply_text = "Wut?"
|
72
145
|
if not urls:
|
@@ -74,13 +147,11 @@ class Handlers(object):
|
|
74
147
|
else:
|
75
148
|
for url in urls:
|
76
149
|
origin = Utils.extract_origin(url)
|
77
|
-
if origin is Origin.INSTAGRAM:
|
78
|
-
url = Utils.resolve_ig_share_link(url)
|
79
150
|
if origin is Origin.YOUTU_BE:
|
80
|
-
|
81
|
-
if
|
82
|
-
|
83
|
-
|
151
|
+
new_url = LinkResolver.extract_youtu_be_link_local(url)
|
152
|
+
if new_url:
|
153
|
+
url = new_url
|
154
|
+
origin = Origin.YOUTUBE
|
84
155
|
if origin is Origin.UNKNOWN:
|
85
156
|
logging.info("Only Instagram, YouTube Shorts and YouTube Music are now supported. Skipping.")
|
86
157
|
continue
|
@@ -119,64 +190,13 @@ class Handlers(object):
|
|
119
190
|
chat_id=chat.id,
|
120
191
|
chat_type=message.chat.type,
|
121
192
|
source_username=Utils.extract_message_author(message),
|
122
|
-
canonical_name=canonical_name
|
193
|
+
canonical_name=canonical_name,
|
194
|
+
message_leftover=msg_leftover
|
123
195
|
)
|
124
196
|
)
|
125
197
|
else:
|
126
|
-
|
127
|
-
try:
|
128
|
-
if job.job_failed and job.job_failed_msg:
|
129
|
-
if job.placeholder_message_id:
|
130
|
-
await self.bot.placeholder.remove(chat.id, job.placeholder_message_id)
|
131
|
-
return await self.bot.send_text(chat_id=chat.id, text=job.job_failed_msg, reply_id=job.message_id)
|
132
|
-
if job.job_warning and job.job_warning_msg:
|
133
|
-
return await self.bot.placeholder.update_text(chat.id, job.placeholder_message_id, job.job_warning_msg)
|
134
|
-
tg_file_ids = await self.bot.upload_job(job)
|
135
|
-
if tg_file_ids:
|
136
|
-
if job.media_type == JobType.COLLECTION and job.save_items:
|
137
|
-
for chunk in job.media_collection:
|
138
|
-
for i in chunk:
|
139
|
-
self.storage.add_media(
|
140
|
-
tg_file_ids=[i.tg_file_id],
|
141
|
-
media_url=i.effective_url,
|
142
|
-
media_type=i.media_type.value,
|
143
|
-
origin=job.job_origin.value,
|
144
|
-
canonical_name=job.canonical_name
|
145
|
-
)
|
146
|
-
else:
|
147
|
-
self.storage.add_media(
|
148
|
-
tg_file_ids=[','.join(tg_file_ids)],
|
149
|
-
media_url=job.url,
|
150
|
-
media_type=job.media_type.value,
|
151
|
-
origin=job.job_origin.value,
|
152
|
-
canonical_name=job.canonical_name
|
153
|
-
)
|
154
|
-
except Exception as e:
|
155
|
-
logging.error("Exception occurred while performing upload callback!")
|
156
|
-
logging.exception(e)
|
157
|
-
|
158
|
-
try:
|
159
|
-
# create placeholder message for long download
|
160
|
-
placeholder_message_id = await self.bot.placeholder.create(
|
161
|
-
chat_id=chat.id,
|
162
|
-
reply_id=effective_message_id
|
163
|
-
)
|
164
|
-
|
165
|
-
if not placeholder_message_id:
|
166
|
-
await self.bot.send_text(
|
167
|
-
chat_id=chat.id,
|
168
|
-
reply_id=effective_message_id,
|
169
|
-
text="Failed to create message placeholder. Please check your bot Internet connection.")
|
170
|
-
return
|
171
|
-
|
172
|
-
self.bot.uploader.add_callback(
|
173
|
-
placeholder_message_id,
|
174
|
-
upload_wrapper
|
175
|
-
)
|
176
|
-
|
177
|
-
self.bot.downloader.queue_task(DownloadJob.build(
|
198
|
+
if await self.queue_job(DownloadJob.build(
|
178
199
|
url=url,
|
179
|
-
placeholder_message_id=placeholder_message_id,
|
180
200
|
message_id=effective_message_id,
|
181
201
|
chat_id=chat.id,
|
182
202
|
in_process=self.bot.uploader.is_inprocess(uniq_id),
|
@@ -184,11 +204,8 @@ class Handlers(object):
|
|
184
204
|
job_origin=origin,
|
185
205
|
source_username=Utils.extract_message_author(message),
|
186
206
|
chat_type=chat.type
|
187
|
-
))
|
207
|
+
)):
|
188
208
|
self.bot.uploader.set_inprocess(uniq_id)
|
189
|
-
except Exception as e:
|
190
|
-
logging.error("Failed to schedule download task!")
|
191
|
-
logging.exception(e)
|
192
209
|
|
193
210
|
if chat.type not in (ChatType.GROUP, ChatType.SUPERGROUP) and not urls:
|
194
211
|
await self.bot.send_text(text=reply_text, reply_id=effective_message_id, chat_id=chat.id)
|
@@ -3,13 +3,13 @@ import time
|
|
3
3
|
from enum import Enum
|
4
4
|
from typing import Optional
|
5
5
|
|
6
|
-
from pyrogram.types import Message
|
6
|
+
#from pyrogram.types import Message
|
7
7
|
from pyrogram.errors import RPCError, FloodWait
|
8
8
|
from pyrogram.enums import ParseMode
|
9
9
|
|
10
10
|
import warp_beacon
|
11
11
|
from warp_beacon.telegram.utils import Utils
|
12
|
-
from warp_beacon.mediainfo.video import VideoInfo
|
12
|
+
#from warp_beacon.mediainfo.video import VideoInfo
|
13
13
|
|
14
14
|
import logging
|
15
15
|
|
@@ -131,7 +131,7 @@ class PlaceholderMessage(object):
|
|
131
131
|
reply = await self.reuse_ph_animation(chat_id, reply_id, text)
|
132
132
|
except ValueError as e:
|
133
133
|
logging.warning("Failed to reuse tg_file_id!")
|
134
|
-
logging.exception(e)
|
134
|
+
#logging.exception(e)
|
135
135
|
reply = await self.reuse_ph_document(chat_id, reply_id, text)
|
136
136
|
self.placeholder.pl_type = PlaceholderType.DOCUMENT
|
137
137
|
elif self.placeholder.pl_type == PlaceholderType.DOCUMENT:
|
@@ -139,7 +139,7 @@ class PlaceholderMessage(object):
|
|
139
139
|
reply = await self.reuse_ph_document(chat_id, reply_id, text)
|
140
140
|
except ValueError as e:
|
141
141
|
logging.warning("Failed to reuse tg_file_id!")
|
142
|
-
logging.exception(e)
|
142
|
+
#logging.exception(e)
|
143
143
|
reply = await self.reuse_ph_animation(chat_id, reply_id, text)
|
144
144
|
self.placeholder.pl_type = PlaceholderType.ANIMATION
|
145
145
|
else:
|
@@ -1,7 +1,6 @@
|
|
1
1
|
from typing import Union
|
2
2
|
|
3
3
|
import re
|
4
|
-
import requests
|
5
4
|
|
6
5
|
from pyrogram.types import Message
|
7
6
|
|
@@ -12,7 +11,6 @@ import logging
|
|
12
11
|
|
13
12
|
class Utils(object):
|
14
13
|
expected_patronum_compiled_re = re.compile(r'Expected ([A-Z]+), got ([A-Z]+) file id instead')
|
15
|
-
canonical_link_resolve_re = re.compile(r'<link.*rel="canonical".*href="([^"]+)"\s*/?>')
|
16
14
|
|
17
15
|
@staticmethod
|
18
16
|
def extract_file_id(message: Message) -> Union[None, str]:
|
@@ -45,37 +43,6 @@ class Utils(object):
|
|
45
43
|
|
46
44
|
return Origin.UNKNOWN
|
47
45
|
|
48
|
-
@staticmethod
|
49
|
-
def extract_youtu_be_link(url: str) -> str:
|
50
|
-
try:
|
51
|
-
response = requests.get(
|
52
|
-
url=url,
|
53
|
-
allow_redirects=False
|
54
|
-
)
|
55
|
-
return response.headers["Location"]
|
56
|
-
except Exception as e:
|
57
|
-
logging.error("Failed to extract YouTube link!")
|
58
|
-
logging.exception(e)
|
59
|
-
|
60
|
-
return ''
|
61
|
-
|
62
|
-
@staticmethod
|
63
|
-
def resolve_ig_share_link(url: str) -> str:
|
64
|
-
# expected url: https://www.instagram.com/share/reel/BAHtk2AamB
|
65
|
-
# result url: https://www.instagram.com/reel/DAKjQgUNzuH/
|
66
|
-
try:
|
67
|
-
if "instagram.com/" in url and "share/" in url:
|
68
|
-
content = requests.get(url).text
|
69
|
-
res = re.search(Utils.canonical_link_resolve_re, content)
|
70
|
-
new_url = res.group(1).strip()
|
71
|
-
logging.info("Converted IG share link to '%s'", new_url)
|
72
|
-
return new_url
|
73
|
-
except Exception as e:
|
74
|
-
logging.error("Failed to convert IG share link!")
|
75
|
-
logging.exception(e)
|
76
|
-
|
77
|
-
return url
|
78
|
-
|
79
46
|
@staticmethod
|
80
47
|
def parse_expected_patronum_error(err_text: str) -> tuple:
|
81
48
|
'''
|
@@ -111,4 +78,13 @@ class Utils(object):
|
|
111
78
|
return message.sender_chat.username
|
112
79
|
if message.sender_chat.title:
|
113
80
|
return message.sender_chat.title
|
114
|
-
return ''
|
81
|
+
return ''
|
82
|
+
|
83
|
+
@staticmethod
|
84
|
+
def compute_leftover(urls: list, message: str) -> str:
|
85
|
+
msg_leftover = ""
|
86
|
+
if len(message) > sum(len(u) for u in urls):
|
87
|
+
msg_leftover = message
|
88
|
+
for u in urls:
|
89
|
+
msg_leftover = msg_leftover.replace(u, '')
|
90
|
+
return msg_leftover.strip()
|
@@ -81,10 +81,11 @@ class AsyncUploader(object):
|
|
81
81
|
try:
|
82
82
|
job = self.job_queue.get()
|
83
83
|
if job is self.__JOE_BIDEN_WAKEUP:
|
84
|
-
|
84
|
+
break
|
85
85
|
if job.is_message_to_admin and job.message_text and self.admin_message_callback:
|
86
86
|
asyncio.ensure_future(self.admin_message_callback(job.message_text, job.account_admins, job.yt_auth), loop=self.loop)
|
87
87
|
continue
|
88
|
+
|
88
89
|
path = ""
|
89
90
|
if job.media_type == JobType.COLLECTION:
|
90
91
|
for i in job.media_collection:
|
@@ -92,11 +93,14 @@ class AsyncUploader(object):
|
|
92
93
|
path += "%s; " % j.local_media_path
|
93
94
|
else:
|
94
95
|
path = job.local_media_path
|
96
|
+
|
95
97
|
in_process = job.in_process
|
96
98
|
uniq_id = job.uniq_id
|
97
99
|
message_id = job.placeholder_message_id
|
98
|
-
|
100
|
+
|
101
|
+
if not in_process and not job.job_failed and not job.job_warning and not job.replay:
|
99
102
|
logging.info("Accepted upload job, file(s): '%s'", path)
|
103
|
+
|
100
104
|
try:
|
101
105
|
if message_id in self.callbacks:
|
102
106
|
if job.job_failed:
|
@@ -106,6 +110,12 @@ class AsyncUploader(object):
|
|
106
110
|
self.process_done(uniq_id)
|
107
111
|
self.remove_callback(message_id)
|
108
112
|
continue
|
113
|
+
|
114
|
+
if job.replay:
|
115
|
+
asyncio.ensure_future(self.callbacks[message_id]["callback"](job), loop=self.loop)
|
116
|
+
self.remove_callback(message_id)
|
117
|
+
continue
|
118
|
+
|
109
119
|
if job.job_warning:
|
110
120
|
logging.info("Job warning occurred ...")
|
111
121
|
if job.job_warning_msg:
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: warp_beacon
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.3.0
|
4
4
|
Summary: Telegram bot for expanding external media links
|
5
5
|
Home-page: https://github.com/sb0y/warp_beacon
|
6
6
|
Author: Andrey Bagrintsev
|
@@ -236,6 +236,8 @@ Requires-Dist: urlextract
|
|
236
236
|
Requires-Dist: pillow
|
237
237
|
Requires-Dist: pymongo
|
238
238
|
Requires-Dist: instagrapi==2.0.0
|
239
|
+
Dynamic: author
|
240
|
+
Dynamic: home-page
|
239
241
|
|
240
242
|
# warp_beacon [](https://github.com/sb0y/warp_beacon/actions/workflows/python-publish.yml) [](https://github.com/sb0y/warp_beacon/actions/workflows/docker-image.yml) [](https://github.com/sb0y/warp_beacon/actions/workflows/build-deb.yml)
|
241
243
|
|
@@ -7,6 +7,7 @@ setup.py
|
|
7
7
|
assets/placeholder.gif
|
8
8
|
etc/.gitignore
|
9
9
|
etc/accounts.json
|
10
|
+
etc/proxies.json
|
10
11
|
etc/warp_beacon.conf
|
11
12
|
etc/warp_beacon.service
|
12
13
|
warp_beacon/__init__.py
|
@@ -37,6 +38,7 @@ warp_beacon/scraper/abstract.py
|
|
37
38
|
warp_beacon/scraper/account_selector.py
|
38
39
|
warp_beacon/scraper/exceptions.py
|
39
40
|
warp_beacon/scraper/fail_handler.py
|
41
|
+
warp_beacon/scraper/link_resolver.py
|
40
42
|
warp_beacon/scraper/instagram/__init__.py
|
41
43
|
warp_beacon/scraper/instagram/instagram.py
|
42
44
|
warp_beacon/scraper/youtube/__init__.py
|
@@ -20,6 +20,7 @@ warp_beacon/scraper/exceptions
|
|
20
20
|
warp_beacon/scraper/fail_handler
|
21
21
|
warp_beacon/scraper/instagram
|
22
22
|
warp_beacon/scraper/instagram/instagram
|
23
|
+
warp_beacon/scraper/link_resolver
|
23
24
|
warp_beacon/scraper/types
|
24
25
|
warp_beacon/scraper/youtube
|
25
26
|
warp_beacon/scraper/youtube/abstract
|
@@ -1,72 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"instagram":
|
3
|
-
[
|
4
|
-
{
|
5
|
-
"login": "ig_account0",
|
6
|
-
"password": "ig_password",
|
7
|
-
"imap_server": "imap.gmail.com",
|
8
|
-
"imap_login": "user@gmail.com",
|
9
|
-
"imap_password": "",
|
10
|
-
"auth_details":
|
11
|
-
{
|
12
|
-
"delay_range": [1, 3],
|
13
|
-
"country_code": 7,
|
14
|
-
"locale": "en_US",
|
15
|
-
"timezone_offset": 10800,
|
16
|
-
"user_agent": "Barcelona 291.0.0.31.111 Android (33/13; 600dpi; 1440x3044; samsung; SM-G998B; p3s; exynos2100; en_US; 493450264)",
|
17
|
-
"device":
|
18
|
-
{
|
19
|
-
"app_version": "291.0.0.31.111",
|
20
|
-
"android_version": 33,
|
21
|
-
"android_release": "13.0.0",
|
22
|
-
"dpi": "600dpi",
|
23
|
-
"resolution": "1440x3044",
|
24
|
-
"manufacturer": "Samsung",
|
25
|
-
"device": "p3s",
|
26
|
-
"model": "SM-G998B",
|
27
|
-
"cpu": "exynos2100",
|
28
|
-
"version_code": "493450264"
|
29
|
-
}
|
30
|
-
},
|
31
|
-
"account_admins": "@BelisariusCawl"
|
32
|
-
},
|
33
|
-
{
|
34
|
-
"login": "ig_account1",
|
35
|
-
"password": "passwd",
|
36
|
-
"imap_server": "imap.gmail.com",
|
37
|
-
"imap_login": "mail_login1",
|
38
|
-
"imap_password": "imap_password1",
|
39
|
-
"auth_details":
|
40
|
-
{
|
41
|
-
"delay_range": [1, 3],
|
42
|
-
"country_code": 7,
|
43
|
-
"locale": "en_US",
|
44
|
-
"timezone_offset": 10800,
|
45
|
-
"user_agent": "Barcelona 291.0.0.31.111 Android (33/13; 600dpi; 1440x3044; samsung; SM-G998B; p3s; exynos2100; en_US; 493450264)",
|
46
|
-
"device":
|
47
|
-
{
|
48
|
-
"app_version": "291.0.0.31.111",
|
49
|
-
"android_version": 33,
|
50
|
-
"android_release": "13.0.0",
|
51
|
-
"dpi": "600dpi",
|
52
|
-
"resolution": "1440x3044",
|
53
|
-
"manufacturer": "Samsung",
|
54
|
-
"device": "p3s",
|
55
|
-
"model": "SM-G998B",
|
56
|
-
"cpu": "exynos2100",
|
57
|
-
"version_code": "493450264"
|
58
|
-
}
|
59
|
-
},
|
60
|
-
"account_admins": "@BelisariusCawl"
|
61
|
-
}
|
62
|
-
],
|
63
|
-
"youtube":
|
64
|
-
[
|
65
|
-
{
|
66
|
-
"account_admins": "@BelisariusCawl"
|
67
|
-
},
|
68
|
-
{
|
69
|
-
"account_admins": "@Lazyteh"
|
70
|
-
}
|
71
|
-
]
|
72
|
-
}
|
@@ -1,15 +0,0 @@
|
|
1
|
-
TG_TOKEN=""
|
2
|
-
TG_BOT_NAME=""
|
3
|
-
TG_BOT_ADMINS_USERNAMES=""
|
4
|
-
TG_API_ID=""
|
5
|
-
TG_API_HASH=""
|
6
|
-
TG_BOT_NAME=""
|
7
|
-
IG_MAX_RETRIES=10
|
8
|
-
IG_REQUEST_TIMEOUT=30
|
9
|
-
MONGODB_HOST="mongodb"
|
10
|
-
MONGODB_PORT="27017"
|
11
|
-
MONGODB_USER="root"
|
12
|
-
MONGODB_PASSWORD="changeme"
|
13
|
-
ENABLE_DONATES=true
|
14
|
-
SERVICE_ACCOUNTS_FILE=/var/warp_beacon/accounts.json
|
15
|
-
FORCE_IPV6=true
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|