warp-beacon 2.4.1__tar.gz → 2.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warp_beacon-2.4.1/warp_beacon.egg-info → warp_beacon-2.4.3}/PKG-INFO +1 -1
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/setup.py +1 -0
- warp_beacon-2.4.3/warp_beacon/__version__.py +2 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scheduler/instagram_human.py +4 -2
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scheduler/scheduler.py +25 -9
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/__init__.py +17 -8
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/abstract.py +2 -2
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/account_selector.py +10 -1
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/instagram/instagram.py +4 -1
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/youtube/abstract.py +42 -64
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/bot.py +29 -9
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/handlers.py +8 -4
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/uploader/__init__.py +17 -4
- {warp_beacon-2.4.1 → warp_beacon-2.4.3/warp_beacon.egg-info}/PKG-INFO +1 -1
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon.egg-info/top_level.txt +1 -0
- warp_beacon-2.4.1/warp_beacon/__version__.py +0 -2
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/LICENSE +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/MANIFEST.in +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/README.md +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/assets/placeholder.gif +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/etc/.gitignore +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/etc/accounts.json +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/etc/proxies.json +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/etc/warp_beacon.conf +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/etc/warp_beacon.service +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/pyproject.toml +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/setup.cfg +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/compress/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/compress/video.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/jobs/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/jobs/abstract.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/jobs/download_job.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/jobs/types.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/jobs/upload_job.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/mediainfo/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/mediainfo/abstract.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/mediainfo/audio.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/mediainfo/silencer.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/mediainfo/video.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scheduler/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/exceptions.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/fail_handler.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/instagram/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/link_resolver.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/youtube/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/youtube/music.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/youtube/shorts.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/scraper/youtube/youtube.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/storage/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/storage/mongo.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/__init__.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/caption_shortener.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/placeholder_message.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/telegram/utils.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/warp_beacon.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon/yt_auth.py +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon.egg-info/SOURCES.txt +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon.egg-info/dependency_links.txt +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon.egg-info/entry_points.txt +0 -0
- {warp_beacon-2.4.1 → warp_beacon-2.4.3}/warp_beacon.egg-info/requires.txt +0 -0
@@ -115,12 +115,14 @@ class InstagramHuman(object):
|
|
115
115
|
if isinstance(random_friend, UserShort):
|
116
116
|
target_user_id = random_friend.pk
|
117
117
|
logging.info("user_info with target_user_id = '%s' ...", target_user_id)
|
118
|
-
self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
118
|
+
#self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
119
|
+
self.scrapler.download_hndlr(self.scrapler.cl.user_info_v1, target_user_id)
|
119
120
|
time.sleep(random.uniform(2, 5))
|
120
121
|
elif isinstance(random_friend, str):
|
121
122
|
target_user_id = self.scrapler.download_hndlr(self.scrapler.cl.user_id_from_username, random_friend)
|
122
123
|
logging.info("user_info with target_user_id = '%s' ...", target_user_id)
|
123
|
-
self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
124
|
+
#self.scrapler.download_hndlr(self.scrapler.cl.user_info, target_user_id)
|
125
|
+
self.scrapler.download_hndlr(self.scrapler.cl.user_info_v1, target_user_id)
|
124
126
|
|
125
127
|
time.sleep(random.uniform(2, 5))
|
126
128
|
|
@@ -5,11 +5,11 @@ import datetime
|
|
5
5
|
import threading
|
6
6
|
import json
|
7
7
|
|
8
|
+
import logging
|
9
|
+
|
8
10
|
from warp_beacon.jobs import Origin
|
9
11
|
import warp_beacon
|
10
12
|
|
11
|
-
import logging
|
12
|
-
|
13
13
|
class IGScheduler(object):
|
14
14
|
state_file = "/var/warp_beacon/scheduler_state.json"
|
15
15
|
yt_sessions_dir = "/var/warp_beacon"
|
@@ -30,7 +30,6 @@ class IGScheduler(object):
|
|
30
30
|
try:
|
31
31
|
with open(self.state_file, 'w+', encoding="utf-8") as f:
|
32
32
|
f.write(json.dumps(self.state))
|
33
|
-
self.load_yt_sessions()
|
34
33
|
except Exception as e:
|
35
34
|
logging.error("Failed to save Scheduler state!")
|
36
35
|
logging.exception(e)
|
@@ -48,7 +47,13 @@ class IGScheduler(object):
|
|
48
47
|
with open(yt_sess_file, 'r', encoding="utf-8") as f:
|
49
48
|
yt_sess_data = json.loads(f.read())
|
50
49
|
exp = yt_sess_data.get("expires", "")
|
51
|
-
self.state["yt_sess_exp"].append({
|
50
|
+
self.state["yt_sess_exp"].append({
|
51
|
+
"expires": exp,
|
52
|
+
"file_path": yt_sess_file,
|
53
|
+
"access_token": yt_sess_data.get("access_token", ""),
|
54
|
+
"refresh_token": yt_sess_data.get("refresh_token", ""),
|
55
|
+
"expires_in": yt_sess_data.get("expires_in", ""),
|
56
|
+
})
|
52
57
|
except Exception as e:
|
53
58
|
logging.error("Failed to load yt sessions!")
|
54
59
|
logging.exception(e)
|
@@ -60,6 +65,7 @@ class IGScheduler(object):
|
|
60
65
|
self.state = json.loads(f.read())
|
61
66
|
if "remaining" in self.state:
|
62
67
|
logging.info("Next scheduler activity in '%d' seconds", int(self.state["remaining"]))
|
68
|
+
self.load_yt_sessions()
|
63
69
|
except Exception as e:
|
64
70
|
logging.error("Failed to load Scheduler state!")
|
65
71
|
logging.exception(e)
|
@@ -113,23 +119,33 @@ class IGScheduler(object):
|
|
113
119
|
|
114
120
|
return False
|
115
121
|
|
122
|
+
def yt_nearest_expire(self) -> int:
|
123
|
+
return int(min(self.state["yt_sess_exp"], key=lambda x: x.get("expires", 0)).get("expires", 0))
|
124
|
+
|
116
125
|
def do_work(self) -> None:
|
117
126
|
logging.info("Scheduler thread started ...")
|
118
127
|
self.load_state()
|
119
128
|
while self.running:
|
120
129
|
try:
|
130
|
+
yt_expires = self.yt_nearest_expire()
|
131
|
+
ig_sched = self.state["remaining"]
|
132
|
+
min_val = min(yt_expires, ig_sched)
|
133
|
+
#max_val = max(yt_expires, ig_sched)
|
121
134
|
now = datetime.datetime.now()
|
122
|
-
if 4 <= now.hour < 7:
|
135
|
+
if 4 <= now.hour < 7 and min_val != yt_expires:
|
123
136
|
logging.info("Scheduler is paused due to night hours (4:00 - 7:00)")
|
124
137
|
self.state["remaining"] = 10800
|
125
138
|
self.save_state()
|
126
139
|
|
127
|
-
if
|
128
|
-
self.state["remaining"] = randrange(
|
129
|
-
logging.info("Next scheduler activity in '%s' seconds",
|
140
|
+
if ig_sched <= 0:
|
141
|
+
self.state["remaining"] = randrange(9292, 26200)
|
142
|
+
logging.info("Next scheduler activity in '%s' seconds", ig_sched)
|
143
|
+
|
144
|
+
if yt_expires <= time.time() + 60:
|
145
|
+
self.validate_yt_session()
|
130
146
|
|
131
147
|
start_time = time.time()
|
132
|
-
self.event.wait(timeout=
|
148
|
+
self.event.wait(timeout=min_val)
|
133
149
|
elapsed = time.time() - start_time
|
134
150
|
self.state["remaining"] -= elapsed
|
135
151
|
|
@@ -85,6 +85,7 @@ class AsyncDownloader(object):
|
|
85
85
|
cur_acc = selector.get_current()
|
86
86
|
logging.info("Current account: '%s'", str(cur_acc))
|
87
87
|
job.account_switches += 1
|
88
|
+
selector.reset_ig_request_count()
|
88
89
|
|
89
90
|
def do_work(self, selector: AccountSelector) -> None:
|
90
91
|
logging.info("download worker started")
|
@@ -136,11 +137,17 @@ class AsyncDownloader(object):
|
|
136
137
|
from warp_beacon.scraper.youtube.youtube import YoutubeScraper
|
137
138
|
actor = YoutubeScraper(selector.get_current(), proxy)
|
138
139
|
actor.send_message_to_admin_func = self.send_message_to_admin
|
140
|
+
actor.request_yt_auth = self.request_yt_auth
|
139
141
|
actor.auth_event = self.auth_event
|
140
142
|
# job retry loop
|
141
143
|
while self.allow_loop.value == 1:
|
142
144
|
try:
|
143
145
|
if job.session_validation:
|
146
|
+
if job.job_origin is Origin.INSTAGRAM:
|
147
|
+
if selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="20")):
|
148
|
+
logging.info("The account request limit has been reached. Selecting the next account.")
|
149
|
+
selector.reset_ig_request_count()
|
150
|
+
selector.next()
|
144
151
|
logging.info("Validating '%s' session ...", job.job_origin.value)
|
145
152
|
actor.validate_session()
|
146
153
|
logging.info("done")
|
@@ -281,13 +288,13 @@ class AsyncDownloader(object):
|
|
281
288
|
break
|
282
289
|
self.send_message_to_admin(
|
283
290
|
f"Task <code>{job.job_id}</code> failed. URL: {job.url}. Reason: '<b>UnknownError</b>'."
|
284
|
-
f"Exception
|
291
|
+
f"Exception:<br><pre code=\"python\">{exception_msg}<br></pre>"
|
285
292
|
)
|
286
293
|
self.uploader.queue_task(job.to_upload_job(
|
287
294
|
job_failed=True,
|
288
|
-
job_failed_msg=
|
289
|
-
f"Task <code>{job.job_id}</code> failed. URL: {job.url}. Reason: '<b>UnknownError</b>'.\n"
|
290
|
-
f"Exception
|
295
|
+
job_failed_msg=f"Unknown error occured. Please <a href=\"https://github.com/sb0y/warp_beacon/issues\">create issue</a> with service logs.\n"
|
296
|
+
f"Task <code>{job.job_id}</code> failed. URL: {job.url}. Reason: '<b>UnknownError</b>'.\n"
|
297
|
+
f"Exception:<br><pre code=\"python\">{exception_msg}</pre>"
|
291
298
|
))
|
292
299
|
break
|
293
300
|
|
@@ -398,10 +405,12 @@ class AsyncDownloader(object):
|
|
398
405
|
def notify_task_failed(self, job: DownloadJob) -> None:
|
399
406
|
self.uploader.queue_task(job.to_upload_job(job_failed=True))
|
400
407
|
|
401
|
-
def send_message_to_admin(self, text: str, account_admins: str = None,
|
408
|
+
def send_message_to_admin(self, text: str, account_admins: str = None, _: object = None) -> None:
|
402
409
|
self.uploader.queue_task(UploadJob.build(
|
403
410
|
is_message_to_admin=True,
|
404
411
|
message_text=text,
|
405
|
-
account_admins=account_admins
|
406
|
-
|
407
|
-
|
412
|
+
account_admins=account_admins
|
413
|
+
))
|
414
|
+
|
415
|
+
def request_yt_auth(self) -> None:
|
416
|
+
self.uploader.queue_task(UploadJob.build(yt_auth=True))
|
@@ -14,13 +14,13 @@ import logging
|
|
14
14
|
|
15
15
|
class ScraperAbstract(ABC):
|
16
16
|
original_gai_family = None
|
17
|
-
send_message_to_admin_func = None
|
17
|
+
send_message_to_admin_func: Callable = lambda: None
|
18
|
+
request_yt_auth: Callable = lambda: None
|
18
19
|
auth_event = None
|
19
20
|
account = None
|
20
21
|
account_index = 0
|
21
22
|
proxy = None
|
22
23
|
|
23
|
-
|
24
24
|
def __init__(self, account: tuple, proxy: dict=None) -> None:
|
25
25
|
self.account_index = account[0]
|
26
26
|
self.account = account[1]
|
@@ -125,8 +125,12 @@ class AccountSelector(object):
|
|
125
125
|
for index, _ in enumerate(lst):
|
126
126
|
self.accounts_meta_data[module_name].insert(index, {"auth_fails": 0, "rate_limits": 0, "captcha": 0})
|
127
127
|
|
128
|
-
def
|
128
|
+
def get_module_name(self, module_origin: Origin) -> str:
|
129
129
|
module_name = 'youtube' if next((s for s in ("yt", "youtube", "youtu_be") if s in module_origin.value), None) else 'instagram'
|
130
|
+
return module_name
|
131
|
+
|
132
|
+
def set_module(self, module_origin: Origin) -> None:
|
133
|
+
module_name = self.get_module_name(module_origin)
|
130
134
|
self.current_module_name = module_name
|
131
135
|
if self.current is None:
|
132
136
|
self.current = self.accounts[self.current_module_name][self.account_index[self.current_module_name].value]
|
@@ -163,6 +167,11 @@ class AccountSelector(object):
|
|
163
167
|
def get_current(self) -> tuple:
|
164
168
|
idx = self.account_index[self.current_module_name].value
|
165
169
|
return (idx, self.accounts[self.current_module_name][idx])
|
170
|
+
|
171
|
+
def get_current_for_module(self, module_origin: Origin) -> tuple:
|
172
|
+
module_name = self.get_module_name(module_origin)
|
173
|
+
idx = self.account_index[module_name].value
|
174
|
+
return (idx, self.accounts[module_name][idx])
|
166
175
|
|
167
176
|
def get_meta_data(self) -> dict:
|
168
177
|
idx = self.account_index[self.current_module_name].value - 1
|
@@ -20,7 +20,7 @@ from instagrapi.mixins.challenge import ChallengeChoice
|
|
20
20
|
from instagrapi.exceptions import LoginRequired, PleaseWaitFewMinutes, MediaNotFound, ClientNotFoundError, UserNotFound, ChallengeRequired, \
|
21
21
|
ChallengeSelfieCaptcha, ChallengeUnknownStep, UnknownError as IGUnknownError
|
22
22
|
|
23
|
-
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, extract_exception_message
|
23
|
+
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, IGRateLimitOccurred, CaptchaIssue, BadProxy, extract_exception_message
|
24
24
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
25
25
|
from warp_beacon.jobs.types import JobType
|
26
26
|
from warp_beacon.jobs.download_job import DownloadJob
|
@@ -152,6 +152,9 @@ class InstagramScraper(ScraperAbstract):
|
|
152
152
|
try:
|
153
153
|
ret_val = func(*args, **kwargs)
|
154
154
|
break
|
155
|
+
except urllib3.exceptions.ProxyError as e:
|
156
|
+
logging.warning("Proxy error!")
|
157
|
+
raise BadProxy(extract_exception_message(e.original_error))
|
155
158
|
except (ChallengeRequired, ChallengeSelfieCaptcha, ChallengeUnknownStep) as e:
|
156
159
|
logging.warning("Instagram wants Challange!")
|
157
160
|
logging.exception(e)
|
@@ -15,81 +15,49 @@ import pytubefix.exceptions
|
|
15
15
|
import requests
|
16
16
|
from PIL import Image
|
17
17
|
import numpy as np
|
18
|
+
import urllib3
|
18
19
|
from urllib.parse import urlparse, parse_qs
|
19
20
|
|
20
21
|
import pytubefix
|
21
22
|
from pytubefix import YouTube
|
22
23
|
from pytubefix.innertube import _default_clients
|
23
24
|
from pytubefix.streams import Stream
|
24
|
-
from pytubefix.innertube import InnerTube, _client_id, _client_secret
|
25
25
|
#from pytubefix.exceptions import VideoUnavailable, VideoPrivate, MaxRetriesExceeded
|
26
|
-
from pytubefix import request
|
27
26
|
import yt_dlp
|
28
27
|
|
29
28
|
from warp_beacon.jobs.download_job import DownloadJob
|
30
29
|
from warp_beacon.scraper.abstract import ScraperAbstract
|
31
|
-
|
32
|
-
from warp_beacon.scraper.exceptions import TimeOut, Unavailable, extract_exception_message
|
33
|
-
|
34
|
-
def patched_fetch_bearer_token(self) -> None:
|
35
|
-
"""Fetch an OAuth token."""
|
36
|
-
# Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
|
37
|
-
start_time = int(time.time() - 30)
|
38
|
-
data = {
|
39
|
-
'client_id': _client_id,
|
40
|
-
'scope': 'https://www.googleapis.com/auth/youtube'
|
41
|
-
}
|
42
|
-
response = request._execute_request(
|
43
|
-
'https://oauth2.googleapis.com/device/code',
|
44
|
-
'POST',
|
45
|
-
headers={
|
46
|
-
'Content-Type': 'application/json'
|
47
|
-
},
|
48
|
-
data=data
|
49
|
-
)
|
50
|
-
response_data = json.loads(response.read())
|
51
|
-
verification_url = response_data['verification_url']
|
52
|
-
user_code = response_data['user_code']
|
53
|
-
|
54
|
-
logging.warning("Please open %s and input code '%s'", verification_url, user_code)
|
55
|
-
self.send_message_to_admin_func(
|
56
|
-
f"Please open {verification_url} and input code <code>{user_code}</code>.\n\n"
|
57
|
-
"Please select a Google account with verified age.\n"
|
58
|
-
"This will allow you to avoid error the <b>AgeRestrictedError</b> when accessing some content.",
|
59
|
-
account_admins=self.wb_account.get("account_admins", None),
|
60
|
-
yt_auth=True)
|
61
|
-
self.auth_event.wait()
|
62
|
-
|
63
|
-
data = {
|
64
|
-
'client_id': _client_id,
|
65
|
-
'client_secret': _client_secret,
|
66
|
-
'device_code': response_data['device_code'],
|
67
|
-
'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
|
68
|
-
}
|
69
|
-
response = request._execute_request(
|
70
|
-
'https://oauth2.googleapis.com/token',
|
71
|
-
'POST',
|
72
|
-
headers={
|
73
|
-
'Content-Type': 'application/json'
|
74
|
-
},
|
75
|
-
data=data
|
76
|
-
)
|
77
|
-
response_data = json.loads(response.read())
|
78
|
-
|
79
|
-
self.access_token = response_data['access_token']
|
80
|
-
self.refresh_token = response_data['refresh_token']
|
81
|
-
self.expires = start_time + response_data['expires_in']
|
82
|
-
self.cache_tokens()
|
30
|
+
from warp_beacon.yt_auth import YtAuth
|
31
|
+
from warp_beacon.scraper.exceptions import TimeOut, Unavailable, BadProxy, extract_exception_message
|
83
32
|
|
84
33
|
class YoutubeAbstract(ScraperAbstract):
|
85
34
|
DOWNLOAD_DIR = "/tmp"
|
86
35
|
YT_SESSION_FILE = '/var/warp_beacon/yt_session_%d.json'
|
87
36
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
37
|
+
def validate_session(self) -> None:
|
38
|
+
try:
|
39
|
+
logging.info("Validating YT session(s) ...")
|
40
|
+
session_dir = os.path.dirname(self.YT_SESSION_FILE)
|
41
|
+
for f in os.listdir(session_dir):
|
42
|
+
if f.startswith("yt_session") and f.endswith(".json"):
|
43
|
+
yt_sess_file = f"{session_dir}/{f}"
|
44
|
+
if os.path.exists(yt_sess_file):
|
45
|
+
account_index = int(f.split('_')[-1].rstrip('.json'))
|
46
|
+
logging.info("Validating YT session #%d ...", account_index)
|
47
|
+
yt_sess_data = {}, 0
|
48
|
+
with open(yt_sess_file, 'r', encoding="utf-8") as f:
|
49
|
+
yt_sess_data = json.loads(f.read())
|
50
|
+
exp = int(yt_sess_data.get("expires", 0))
|
51
|
+
if exp <= time.time():
|
52
|
+
yt_auth = YtAuth(account_index=account_index)
|
53
|
+
requests_data = yt_auth.refresh_token(refresh_token=yt_sess_data.get("refresh_token", ""))
|
54
|
+
if requests_data:
|
55
|
+
yt_sess_data.update(requests_data)
|
56
|
+
if yt_auth.safe_write_session(yt_sess_data):
|
57
|
+
logging.info("YT session #%d validated", account_index)
|
58
|
+
except Exception as e:
|
59
|
+
logging.error("Failed to refresh Youtube session!")
|
60
|
+
logging.exception(e)
|
93
61
|
|
94
62
|
def rename_local_file(self, filename: str) -> str:
|
95
63
|
if not os.path.exists(filename):
|
@@ -214,6 +182,9 @@ class YoutubeAbstract(ScraperAbstract):
|
|
214
182
|
#except http.client.IncompleteRead as e:
|
215
183
|
except KeyError:
|
216
184
|
raise Unavailable("Library failed")
|
185
|
+
except urllib3.exceptions.ProxyError as e:
|
186
|
+
logging.warning("Proxy error!")
|
187
|
+
raise BadProxy(extract_exception_message(e.original_error))
|
217
188
|
except (socket.timeout,
|
218
189
|
ssl.SSLError,
|
219
190
|
http.client.IncompleteRead,
|
@@ -256,11 +227,6 @@ class YoutubeAbstract(ScraperAbstract):
|
|
256
227
|
#logging.info("bytes: %d, bytes remaining: %d", chunk, bytes_remaining)
|
257
228
|
|
258
229
|
def build_yt(self, url: str, session: bool = True) -> YouTube:
|
259
|
-
if session:
|
260
|
-
InnerTube.send_message_to_admin_func = self.send_message_to_admin_func
|
261
|
-
InnerTube.auth_event = self.auth_event
|
262
|
-
InnerTube.wb_account = self.account
|
263
|
-
InnerTube.fetch_bearer_token = patched_fetch_bearer_token
|
264
230
|
_default_clients["ANDROID"]["innertube_context"]["context"]["client"]["clientVersion"] = "19.08.35"
|
265
231
|
_default_clients["ANDROID_MUSIC"] = _default_clients["ANDROID"]
|
266
232
|
yt_opts = {"url": url, "on_progress_callback": self.yt_on_progress}
|
@@ -269,6 +235,18 @@ class YoutubeAbstract(ScraperAbstract):
|
|
269
235
|
yt_opts["use_oauth"] = True
|
270
236
|
yt_opts["allow_oauth_cache"] = True
|
271
237
|
yt_opts["token_file"] = self.YT_SESSION_FILE % self.account_index
|
238
|
+
if not os.path.exists(yt_opts["token_file"]):
|
239
|
+
logging.warning("YT session '%s' file is not found", yt_opts["token_file"])
|
240
|
+
self.request_yt_auth()
|
241
|
+
self.auth_event.wait()
|
242
|
+
yt_auth = YtAuth(account_index=self.account_index)
|
243
|
+
device_code = yt_auth.load_device_code()
|
244
|
+
if device_code:
|
245
|
+
auth_data = yt_auth.confirm_token(device_code=device_code)
|
246
|
+
if auth_data:
|
247
|
+
yt_auth.safe_write_session(auth_data)
|
248
|
+
else:
|
249
|
+
logging.error("Failed to fetch YT auth token!")
|
272
250
|
if self.proxy:
|
273
251
|
proxy_dsn = self.proxy.get("dsn", "")
|
274
252
|
logging.info("Using proxy DSN '%s'", proxy_dsn)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import os
|
2
2
|
import signal
|
3
|
+
from typing import Optional, Union
|
3
4
|
|
4
5
|
import logging
|
5
6
|
|
@@ -74,6 +75,7 @@ class Bot(object):
|
|
74
75
|
self.uploader = AsyncUploader(
|
75
76
|
storage=self.storage,
|
76
77
|
admin_message_callback=self.send_text_to_admin,
|
78
|
+
request_yt_auth_callback=self.request_yt_auth,
|
77
79
|
pool_size=int(os.environ.get("UPLOAD_POOL_SIZE", default=workers_amount)),
|
78
80
|
loop=self.client.loop
|
79
81
|
)
|
@@ -130,7 +132,31 @@ class Bot(object):
|
|
130
132
|
|
131
133
|
return 0
|
132
134
|
|
133
|
-
async def
|
135
|
+
async def request_yt_auth(self) -> None:
|
136
|
+
from warp_beacon.yt_auth import YtAuth
|
137
|
+
acc_index, acc = self.downloader.acc_selector.get_current_for_module(Origin.YOUTUBE)
|
138
|
+
yt_auth = YtAuth(account_index=acc_index)
|
139
|
+
data = yt_auth.fetch_token()
|
140
|
+
if all(data.values()):
|
141
|
+
if yt_auth.store_device_code(data["device_code"]):
|
142
|
+
reply_markup = InlineKeyboardMarkup(
|
143
|
+
[
|
144
|
+
[
|
145
|
+
InlineKeyboardButton("✅ Done", callback_data=f"confirm_yt_auth:{acc_index}")
|
146
|
+
]
|
147
|
+
]
|
148
|
+
)
|
149
|
+
await self.send_text_to_admin(
|
150
|
+
f"Please open {data['verification_url']} and input code <code>{data['user_code']}</code>.\n\n"
|
151
|
+
"Please authorize with a Google account with verified age.\n"
|
152
|
+
"This will allow you to avoid error the <b>AgeRestrictedError</b> when accessing some content.",
|
153
|
+
account_admins=acc.get("account_admins", None),
|
154
|
+
reply_markup=reply_markup
|
155
|
+
)
|
156
|
+
else:
|
157
|
+
logging.error("Wrong YT auth dataset: '%s'", str(data))
|
158
|
+
|
159
|
+
async def send_text_to_admin(self, text: str, account_admins: str = None, reply_markup: Optional[Union[InlineKeyboardMarkup]] = None) -> list[int]:
|
134
160
|
try:
|
135
161
|
admins = None
|
136
162
|
if account_admins:
|
@@ -145,14 +171,8 @@ class Bot(object):
|
|
145
171
|
for adm in admins_array:
|
146
172
|
adm = adm.strip()
|
147
173
|
msg_opts = {"chat_id": adm, "text": text, "parse_mode": ParseMode.HTML}
|
148
|
-
if
|
149
|
-
msg_opts["reply_markup"] =
|
150
|
-
[
|
151
|
-
[
|
152
|
-
InlineKeyboardButton("✅ Done", callback_data="auth_process_done")
|
153
|
-
]
|
154
|
-
]
|
155
|
-
)
|
174
|
+
if reply_markup:
|
175
|
+
msg_opts["reply_markup"] = reply_markup
|
156
176
|
message_reply = await self.client.send_message(**msg_opts)
|
157
177
|
msg_ids.append(message_reply.id)
|
158
178
|
return msg_ids
|
@@ -1,5 +1,6 @@
|
|
1
|
+
import os
|
1
2
|
from pyrogram import Client
|
2
|
-
from pyrogram.types import Message, CallbackQuery
|
3
|
+
from pyrogram.types import Message, CallbackQuery, InlineKeyboardButton, InlineKeyboardMarkup
|
3
4
|
from pyrogram.enums import ChatType, ParseMode
|
4
5
|
from pyrogram.types import BotCommand
|
5
6
|
|
@@ -24,12 +25,12 @@ class Handlers(object):
|
|
24
25
|
self.bot = bot
|
25
26
|
self.storage = bot.storage
|
26
27
|
|
27
|
-
async def help(self,
|
28
|
+
async def help(self, _: Client, message: Message) -> None:
|
28
29
|
"""Send a message when the command /help is issued."""
|
29
30
|
await self.bot.send_text(text="Send me a link to remote media", reply_id=message.id, chat_id=message.chat.id)
|
30
31
|
#await message.reply_text("<code>test</code>\n<b>bold</b>\n<pre code=\"python\">print('hello')</pre> @BelisariusCawl", parse_mode=ParseMode.HTML)
|
31
32
|
|
32
|
-
async def random(self,
|
33
|
+
async def random(self, _: Client, message: Message) -> None:
|
33
34
|
d = self.storage.get_random()
|
34
35
|
if not d:
|
35
36
|
await message.reply_text("No random content yet. Try to send link first.")
|
@@ -48,7 +49,10 @@ class Handlers(object):
|
|
48
49
|
)
|
49
50
|
)
|
50
51
|
|
51
|
-
async def
|
52
|
+
async def yt_auth(self, _: Client, __: Message) -> None:
|
53
|
+
await self.bot.request_yt_auth()
|
54
|
+
|
55
|
+
async def start(self, _: Client, message: Message) -> None:
|
52
56
|
bot_name = await self.bot.client.get_me()
|
53
57
|
await self.bot.client.set_bot_commands([
|
54
58
|
BotCommand("start", "Start bot"),
|
@@ -19,13 +19,21 @@ class AsyncUploader(object):
|
|
19
19
|
in_process = set()
|
20
20
|
loop = None
|
21
21
|
admin_message_callback = None
|
22
|
+
request_yt_auth_callback = None
|
22
23
|
pool_size = 1
|
23
24
|
|
24
|
-
def __init__(self,
|
25
|
+
def __init__(self,
|
26
|
+
loop: asyncio.AbstractEventLoop,
|
27
|
+
storage: Storage,
|
28
|
+
admin_message_callback: Callable,
|
29
|
+
request_yt_auth_callback: Callable,
|
30
|
+
pool_size: int=min(32, os.cpu_count() + 4)
|
31
|
+
) -> None:
|
25
32
|
self.storage = storage
|
26
33
|
self.loop = loop
|
27
34
|
self.job_queue = multiprocessing.Queue()
|
28
35
|
self.admin_message_callback = admin_message_callback
|
36
|
+
self.request_yt_auth_callback = request_yt_auth_callback
|
29
37
|
self.pool_size = pool_size
|
30
38
|
|
31
39
|
def __del__(self) -> None:
|
@@ -80,10 +88,15 @@ class AsyncUploader(object):
|
|
80
88
|
if job is self.__JOE_BIDEN_WAKEUP:
|
81
89
|
break
|
82
90
|
if job.is_message_to_admin and job.message_text and self.admin_message_callback:
|
83
|
-
#asyncio.ensure_future(self.admin_message_callback(job.message_text, job.account_admins, job.yt_auth), loop=self.loop)
|
84
91
|
self.loop.call_soon_threadsafe(
|
85
92
|
asyncio.create_task,
|
86
|
-
self.admin_message_callback(job.message_text, job.account_admins
|
93
|
+
self.admin_message_callback(job.message_text, job.account_admins)
|
94
|
+
)
|
95
|
+
continue
|
96
|
+
if job.yt_auth and self.request_yt_auth_callback:
|
97
|
+
self.loop.call_soon_threadsafe(
|
98
|
+
asyncio.create_task,
|
99
|
+
self.request_yt_auth_callback()
|
87
100
|
)
|
88
101
|
continue
|
89
102
|
|
@@ -91,7 +104,7 @@ class AsyncUploader(object):
|
|
91
104
|
if job.media_type == JobType.COLLECTION:
|
92
105
|
for i in job.media_collection:
|
93
106
|
for j in i:
|
94
|
-
path += "
|
107
|
+
path += f"{j.local_media_path}; "
|
95
108
|
else:
|
96
109
|
path = job.local_media_path
|
97
110
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|