warp-beacon 2.6.33__py3-none-any.whl → 2.6.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warp_beacon/__version__.py +1 -1
- warp_beacon/scraper/__init__.py +4 -8
- warp_beacon/scraper/account_selector.py +49 -8
- warp_beacon/scraper/utils.py +2 -38
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/METADATA +1 -1
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/RECORD +10 -10
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/WHEEL +0 -0
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/entry_points.txt +0 -0
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/licenses/LICENSE +0 -0
- {warp_beacon-2.6.33.dist-info → warp_beacon-2.6.35.dist-info}/top_level.txt +0 -0
warp_beacon/__version__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "2.6.
|
1
|
+
__version__ = "2.6.35"
|
2
2
|
|
warp_beacon/scraper/__init__.py
CHANGED
@@ -8,7 +8,6 @@ from queue import Empty
|
|
8
8
|
|
9
9
|
import logging
|
10
10
|
|
11
|
-
from warp_beacon.scraper.utils import Utils
|
12
11
|
from warp_beacon.scraper.exceptions import NotFound, UnknownError, TimeOut, Unavailable, FileTooBig, YoutubeLiveError, \
|
13
12
|
YotubeAgeRestrictedError, IGRateLimitOccurred, CaptchaIssue, AllAccountsFailed, BadProxy
|
14
13
|
from warp_beacon.mediainfo.video import VideoInfo
|
@@ -49,7 +48,6 @@ class AsyncDownloader(object):
|
|
49
48
|
self.auth_event = multiprocessing.Event()
|
50
49
|
self.manager = multiprocessing.Manager()
|
51
50
|
self.process_context = self.manager.Namespace()
|
52
|
-
self.process_context.ig_session_client_id = Utils.get_ig_session_id()
|
53
51
|
self.allow_loop = self.manager.Value('i', 1)
|
54
52
|
self.scrolling_now = self.manager.Value('i', 0)
|
55
53
|
self.acc_selector = AccountSelector(self.manager, ACC_FILE, PROXY_FILE)
|
@@ -100,7 +98,7 @@ class AsyncDownloader(object):
|
|
100
98
|
job.account_switches += 1
|
101
99
|
selector.reset_ig_request_count()
|
102
100
|
|
103
|
-
def do_work(self, selector: AccountSelector,
|
101
|
+
def do_work(self, selector: AccountSelector, _: Namespace) -> None:
|
104
102
|
logging.info("download worker started")
|
105
103
|
# pymongo is not fork-safe so new connect to DB required
|
106
104
|
fail_handler = FailHandler(DBClient())
|
@@ -141,13 +139,12 @@ class AsyncDownloader(object):
|
|
141
139
|
proxy = selector.get_current_proxy()
|
142
140
|
if job.job_origin is Origin.INSTAGRAM:
|
143
141
|
from warp_beacon.scraper.instagram.instagram import InstagramScraper
|
144
|
-
Utils.maybe_rotate_ig_client_session(context)
|
145
|
-
actor = InstagramScraper(client_session_id=context.ig_session_client_id, account=selector.get_current(), proxy=proxy)
|
146
|
-
selector.inc_ig_request_count()
|
147
142
|
if not job.scroll_content and selector.get_ig_request_count() >= int(os.environ.get("IG_REQUESTS_PER_ACCOUNT", default="10")):
|
148
143
|
logging.info("The account request limit has been reached. Selecting the next account.")
|
149
144
|
selector.reset_ig_request_count()
|
150
145
|
selector.next()
|
146
|
+
actor = InstagramScraper(client_session_id=selector.get_ig_session_id(), account=selector.get_current(), proxy=proxy)
|
147
|
+
selector.inc_ig_request_count()
|
151
148
|
elif job.job_origin is Origin.YT_SHORTS:
|
152
149
|
from warp_beacon.scraper.youtube.shorts import YoutubeShortsScraper
|
153
150
|
actor = YoutubeShortsScraper(selector.get_current(), proxy)
|
@@ -434,8 +431,7 @@ class AsyncDownloader(object):
|
|
434
431
|
|
435
432
|
def stop_all(self) -> None:
|
436
433
|
self.allow_loop.value = 0
|
437
|
-
self.acc_selector.
|
438
|
-
Utils.save_ig_session_id(self.process_context.ig_session_client_id)
|
434
|
+
self.acc_selector.save_state()
|
439
435
|
for proc in self.workers:
|
440
436
|
if proc.is_alive():
|
441
437
|
logging.info("stopping process #%d", proc.pid)
|
@@ -3,29 +3,35 @@ import time
|
|
3
3
|
import random
|
4
4
|
import json
|
5
5
|
import re
|
6
|
+
import uuid
|
6
7
|
from typing import Optional, List
|
7
|
-
|
8
8
|
from itertools import cycle
|
9
|
+
|
10
|
+
import logging
|
11
|
+
|
9
12
|
import multiprocessing
|
10
13
|
import multiprocessing.managers
|
11
14
|
|
12
15
|
from warp_beacon.jobs import Origin
|
13
16
|
|
14
|
-
import logging
|
15
|
-
|
16
17
|
class AccountSelector(object):
|
17
|
-
accounts =
|
18
|
-
proxies =
|
18
|
+
accounts = None
|
19
|
+
proxies = None
|
19
20
|
current = None
|
20
21
|
current_module_name = None
|
21
22
|
accounts_meta_data = None
|
22
23
|
session_dir = "/var/warp_beacon"
|
23
24
|
manager = None
|
24
|
-
account_index =
|
25
|
+
account_index = None
|
25
26
|
current_proxy = None
|
26
27
|
ig_request_count = None
|
28
|
+
ig_accounts_session_id = None
|
27
29
|
|
28
30
|
def __init__(self, manager: multiprocessing.managers.SyncManager, acc_file_path: str, proxy_file_path: str=None) -> None:
|
31
|
+
self.accounts = []
|
32
|
+
self.proxies = []
|
33
|
+
self.account_index = {}
|
34
|
+
self.ig_accounts_session_id = self.load_ig_sessions_id()
|
29
35
|
self.manager = manager
|
30
36
|
self.accounts_meta_data = self.manager.dict()
|
31
37
|
if os.path.exists(acc_file_path):
|
@@ -45,6 +51,31 @@ class AccountSelector(object):
|
|
45
51
|
else:
|
46
52
|
raise ValueError("Accounts file not found")
|
47
53
|
|
54
|
+
def save_state(self) -> None:
|
55
|
+
self.save_ig_request_count()
|
56
|
+
self.save_ig_sessions_id()
|
57
|
+
|
58
|
+
def save_ig_sessions_id(self) -> None:
|
59
|
+
try:
|
60
|
+
with open(f"{self.session_dir}/ig_session_client_id", "w+", encoding="utf-8") as f:
|
61
|
+
f.write(json.dumps(self.ig_accounts_session_id))
|
62
|
+
except Exception as e:
|
63
|
+
logging.warning("Failed to save session ig_session_client_id!")
|
64
|
+
logging.exception(e)
|
65
|
+
|
66
|
+
def load_ig_sessions_id(self) -> dict:
|
67
|
+
ig_sessions_client_id = {}
|
68
|
+
try:
|
69
|
+
sess_file = f"{self.session_dir}/ig_sessions_client_id.json"
|
70
|
+
if os.path.exists(sess_file):
|
71
|
+
with open(sess_file, 'r', encoding="utf-8") as f:
|
72
|
+
ig_sessions_client_id = json.loads(f.read())
|
73
|
+
except Exception as e:
|
74
|
+
logging.warning("Failed to read session ig_session_client_id!")
|
75
|
+
logging.exception(e)
|
76
|
+
|
77
|
+
return ig_sessions_client_id
|
78
|
+
|
48
79
|
def save_ig_request_count(self) -> None:
|
49
80
|
try:
|
50
81
|
state = {"ig_count": self.ig_request_count.value}
|
@@ -209,7 +240,7 @@ class AccountSelector(object):
|
|
209
240
|
return (idx, self.accounts[module_name][idx])
|
210
241
|
|
211
242
|
def get_meta_data(self) -> dict:
|
212
|
-
idx = self.account_index[self.current_module_name].value - 1
|
243
|
+
idx = self.account_index[self.current_module_name].value# - 1
|
213
244
|
return self.accounts_meta_data[self.current_module_name][idx]
|
214
245
|
|
215
246
|
def count_service_accounts(self, mod_name: Origin) -> int:
|
@@ -225,4 +256,14 @@ class AccountSelector(object):
|
|
225
256
|
self.ig_request_count.value = 0
|
226
257
|
|
227
258
|
def get_ig_request_count(self) -> int:
|
228
|
-
return self.ig_request_count.value
|
259
|
+
return self.ig_request_count.value
|
260
|
+
|
261
|
+
def get_ig_session_id(self) -> str:
|
262
|
+
idx = self.account_index[self.current_module_name].value
|
263
|
+
if idx not in self.ig_accounts_session_id:
|
264
|
+
self.ig_accounts_session_id[idx] = str(uuid.uuid4())
|
265
|
+
else:
|
266
|
+
if random.random() > 0.95:
|
267
|
+
self.ig_accounts_session_id[idx] = str(uuid.uuid4())
|
268
|
+
logging.info("Rotated client_session_id — simulating app restart")
|
269
|
+
return self.ig_accounts_session_id[idx]
|
warp_beacon/scraper/utils.py
CHANGED
@@ -1,40 +1,4 @@
|
|
1
|
-
import
|
2
|
-
from multiprocessing.managers import Namespace
|
3
|
-
import random
|
4
|
-
import uuid
|
5
|
-
import logging
|
1
|
+
#import logging
|
6
2
|
|
7
3
|
class Utils(object):
|
8
|
-
|
9
|
-
|
10
|
-
@staticmethod
|
11
|
-
def get_ig_session_id() -> str:
|
12
|
-
ig_session_client_id = ""
|
13
|
-
try:
|
14
|
-
sess_file = f"{Utils.session_dir}/ig_session_client_id"
|
15
|
-
if os.path.exists(sess_file):
|
16
|
-
with open(sess_file, 'r', encoding="utf-8") as f:
|
17
|
-
ig_session_client_id = f.read().strip()
|
18
|
-
except Exception as e:
|
19
|
-
logging.warning("Failed to read session ig_session_client_id!")
|
20
|
-
logging.exception(e)
|
21
|
-
|
22
|
-
if not ig_session_client_id:
|
23
|
-
ig_session_client_id = str(uuid.uuid4())
|
24
|
-
|
25
|
-
return ig_session_client_id
|
26
|
-
|
27
|
-
@staticmethod
|
28
|
-
def save_ig_session_id(ig_session_client_id: str) -> None:
|
29
|
-
try:
|
30
|
-
with open(f"{Utils.session_dir}/ig_session_client_id", "w+", encoding="utf-8") as f:
|
31
|
-
f.write(ig_session_client_id)
|
32
|
-
except Exception as e:
|
33
|
-
logging.warning("Failed to save session ig_session_client_id!")
|
34
|
-
logging.exception(e)
|
35
|
-
|
36
|
-
@staticmethod
|
37
|
-
def maybe_rotate_ig_client_session(context: Namespace) -> None:
|
38
|
-
if random.random() > 0.95:
|
39
|
-
context.ig_session_client_id = str(uuid.uuid4())
|
40
|
-
logging.info("Rotated client_session_id — simulating app restart")
|
4
|
+
pass
|
@@ -4,7 +4,7 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
|
|
4
4
|
var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
|
5
5
|
var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
|
6
6
|
warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
|
7
|
-
warp_beacon/__version__.py,sha256=
|
7
|
+
warp_beacon/__version__.py,sha256=GBPjTCfxnfCRE053qGQV0boaaA5UUJxjblOZowA_xx0,24
|
8
8
|
warp_beacon/warp_beacon.py,sha256=ED43vNzdjDUJ_9qLCbri0bjWLWEJ69BENGj9i7G6AvM,342
|
9
9
|
warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
|
10
10
|
warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -22,13 +22,13 @@ warp_beacon/mediainfo/video.py,sha256=UBZrhTN5IDI-aYu6tsJEILo9nFkjHhkldGVFmvV7tE
|
|
22
22
|
warp_beacon/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
warp_beacon/scheduler/instagram_human.py,sha256=0LaRUu0MBBuEOQeFzuq22HIYfJL9pTK_7udsXfef0Fk,8204
|
24
24
|
warp_beacon/scheduler/scheduler.py,sha256=9OCh7Ta4wY_aTHGAOOZmaKXg56Ftx1N_aV1g6E3ZLKA,4941
|
25
|
-
warp_beacon/scraper/__init__.py,sha256=
|
25
|
+
warp_beacon/scraper/__init__.py,sha256=AaUd30PSJi5CCqP_UQ25Hbz6W7Y0CEyza1eV4ji4xeU,20029
|
26
26
|
warp_beacon/scraper/abstract.py,sha256=yP76ae9U73LYW2lDN6XWkXDkWX1h2UVOrkqv0Sqpu2Y,2985
|
27
|
-
warp_beacon/scraper/account_selector.py,sha256=
|
27
|
+
warp_beacon/scraper/account_selector.py,sha256=dcM2rFAUICr-LYs26HFmXPZsn0yxHplH_mEZU8oNKDk,9570
|
28
28
|
warp_beacon/scraper/exceptions.py,sha256=EKwoF0oH2xZWbNU-v8DOaWK5skKwa3s1yTIBdlcfMpc,1452
|
29
29
|
warp_beacon/scraper/fail_handler.py,sha256=zcPK3ZVEsu6JmHYcWP7L3naTRK3gWFVRkpP84VBOtJs,964
|
30
30
|
warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
|
31
|
-
warp_beacon/scraper/utils.py,sha256=
|
31
|
+
warp_beacon/scraper/utils.py,sha256=xJfCVhiLjVPoFVupE10sTzX7UpgNWbx2EeYIsoYXGDk,43
|
32
32
|
warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
33
|
warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
|
34
34
|
warp_beacon/scraper/instagram/instagram.py,sha256=zbkF-1lU5lxJ_2m8i8mGRX0EQMLVSflsOafBM1FEC6s,15588
|
@@ -46,9 +46,9 @@ warp_beacon/telegram/handlers.py,sha256=uvR6TPHSqdSxigp3wR-ewiE6t3TvVcbVLVcYGwkg
|
|
46
46
|
warp_beacon/telegram/placeholder_message.py,sha256=wN9-BRiyrtHG-EvXtZkGJHt2CX71munQ57ITttjt0mw,6400
|
47
47
|
warp_beacon/telegram/utils.py,sha256=1Lq67aRylVJzbwSyvAgjPAGjJZFATkICvAj3TJGuJiM,4635
|
48
48
|
warp_beacon/uploader/__init__.py,sha256=j3qcuKhpchseZLGzSsSiogqe6WdMbkK8d3I-ConhNRs,5687
|
49
|
-
warp_beacon-2.6.
|
50
|
-
warp_beacon-2.6.
|
51
|
-
warp_beacon-2.6.
|
52
|
-
warp_beacon-2.6.
|
53
|
-
warp_beacon-2.6.
|
54
|
-
warp_beacon-2.6.
|
49
|
+
warp_beacon-2.6.35.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
50
|
+
warp_beacon-2.6.35.dist-info/METADATA,sha256=j1lOtEDstP9jLnR2PmVPO4eTu6kzFEAoqDtdBUFkQFA,22706
|
51
|
+
warp_beacon-2.6.35.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
|
52
|
+
warp_beacon-2.6.35.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
|
53
|
+
warp_beacon-2.6.35.dist-info/top_level.txt,sha256=aFsWDQBplsMOyVMGGJ8iu-auZ25z1e_IB4tM2M8kW1A,1187
|
54
|
+
warp_beacon-2.6.35.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|