warp-beacon 2.8.1__py3-none-any.whl → 2.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warp_beacon/__version__.py +1 -1
- warp_beacon/scheduler/instagram_human.py +2 -2
- warp_beacon/scraper/X/X.py +32 -13
- warp_beacon/scraper/X/abstract.py +1 -1
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/METADATA +1 -1
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/RECORD +10 -10
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/WHEEL +0 -0
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/entry_points.txt +0 -0
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/licenses/LICENSE +0 -0
- {warp_beacon-2.8.1.dist-info → warp_beacon-2.8.3.dist-info}/top_level.txt +0 -0
warp_beacon/__version__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
__version__ = "2.8.
|
1
|
+
__version__ = "2.8.3"
|
2
2
|
|
@@ -151,7 +151,7 @@ class InstagramHuman(object):
|
|
151
151
|
|
152
152
|
def scroll_content(self, last_pk: int) -> None:
|
153
153
|
#timeline_initialized = False
|
154
|
-
if random.random() > 0.
|
154
|
+
if random.random() > 0.95:
|
155
155
|
#timeline_initialized = True
|
156
156
|
#self.browse_timeline()
|
157
157
|
logging.info("Starting to watch related reels with media_pk '%d'", last_pk)
|
@@ -159,7 +159,7 @@ class InstagramHuman(object):
|
|
159
159
|
self.operations_count += 1
|
160
160
|
self.watch_content(media)
|
161
161
|
|
162
|
-
if random.random() > 0.
|
162
|
+
if random.random() > 0.95:
|
163
163
|
time.sleep(random.uniform(2, 20))
|
164
164
|
#if not timeline_initialized:
|
165
165
|
# self.browse_timeline()
|
warp_beacon/scraper/X/X.py
CHANGED
@@ -7,6 +7,7 @@ import requests
|
|
7
7
|
import yt_dlp
|
8
8
|
from playwright.sync_api import sync_playwright
|
9
9
|
|
10
|
+
from warp_beacon.telegram.utils import Utils
|
10
11
|
from warp_beacon.scraper.utils import ScraperUtils
|
11
12
|
from warp_beacon.scraper.X.types import XMediaType
|
12
13
|
from warp_beacon.jobs.types import JobType
|
@@ -77,27 +78,45 @@ class XScraper(XAbstract):
|
|
77
78
|
job_type = JobType.VIDEO
|
78
79
|
except yt_dlp.utils.DownloadError as e:
|
79
80
|
msg = str(e).lower()
|
80
|
-
if "
|
81
|
+
if "no video could be found in this tweet" in msg:
|
81
82
|
logging.warning("[X] yt_dlp failed to extract info. Falling back to image scraping.")
|
82
83
|
media_type = XMediaType.IMAGE
|
83
84
|
else:
|
84
85
|
raise
|
85
|
-
|
86
|
+
|
87
|
+
images = []
|
86
88
|
if media_type == XMediaType.IMAGE:
|
87
89
|
job_type = JobType.IMAGE
|
88
90
|
images, post_text = self.download_images(url, timeout)
|
89
91
|
if images:
|
90
|
-
|
91
|
-
|
92
|
-
|
92
|
+
if len(images) > 1:
|
93
|
+
job_type = JobType.COLLECTION
|
94
|
+
else:
|
95
|
+
local_file = images[0]
|
96
|
+
|
97
|
+
if job_type == JobType.COLLECTION:
|
98
|
+
chunks = []
|
99
|
+
for media_chunk in Utils.chunker(images, 10):
|
100
|
+
chunk = []
|
101
|
+
for media in media_chunk:
|
102
|
+
chunk.append({
|
103
|
+
"local_media_path": media,
|
104
|
+
"canonical_name": post_text,
|
105
|
+
"media_type": JobType.IMAGE
|
106
|
+
})
|
107
|
+
chunks.append(chunk)
|
93
108
|
res.append({
|
94
|
-
"
|
95
|
-
"
|
96
|
-
'progress_hooks': [self.dlp_on_progress],
|
97
|
-
#"thumb": thumbnail,
|
98
|
-
"canonical_name": post_text,
|
99
|
-
"media_type": job_type
|
109
|
+
"media_type": JobType.COLLECTION,
|
110
|
+
"items": chunks
|
100
111
|
})
|
112
|
+
else:
|
113
|
+
if local_file:
|
114
|
+
res.append({
|
115
|
+
"local_media_path": local_file,
|
116
|
+
"performer": media_info.get("uploader", "Unknown"),
|
117
|
+
"canonical_name": post_text,
|
118
|
+
"media_type": job_type
|
119
|
+
})
|
101
120
|
|
102
121
|
return res
|
103
122
|
|
@@ -198,13 +217,13 @@ class XScraper(XAbstract):
|
|
198
217
|
|
199
218
|
with sync_playwright() as p:
|
200
219
|
with p.chromium.launch(headless=True) as browser:
|
201
|
-
with browser.new_context(proxy=proxy
|
220
|
+
with browser.new_context(proxy=proxy) as context:
|
202
221
|
page = context.new_page()
|
203
222
|
page.goto(url, wait_until="networkidle", timeout=(timeout*1000))
|
204
223
|
|
205
224
|
#page.wait_for_timeout(3000)
|
206
225
|
page.wait_for_selector("img[src*='pbs.twimg.com/media']", timeout=(timeout*1000))
|
207
|
-
text_element = page.
|
226
|
+
text_element = page.query_selector('[data-testid="tweetText"]')
|
208
227
|
text = text_element.inner_text()
|
209
228
|
|
210
229
|
image_elements = page.query_selector_all("img")
|
@@ -24,7 +24,7 @@ from warp_beacon.telegram.types import ReportType
|
|
24
24
|
|
25
25
|
class XAbstract(ScraperAbstract):
|
26
26
|
DOWNLOAD_DIR = "/tmp"
|
27
|
-
X_MAX_RETRIES_DEFAULT =
|
27
|
+
X_MAX_RETRIES_DEFAULT = 2
|
28
28
|
X_PAUSE_BEFORE_RETRY_DEFAULT = 3
|
29
29
|
X_TIMEOUT_DEFAULT = 15
|
30
30
|
X_TIMEOUT_INCREMENT_DEFAULT = 20
|
@@ -4,7 +4,7 @@ var/warp_beacon/accounts.json,sha256=OsXdncs6h88xrF_AP6_WDCK1waGBn9SR-uYdIeK37GM
|
|
4
4
|
var/warp_beacon/placeholder.gif,sha256=cE5CGJVaop4Sx21zx6j4AyoHU0ncmvQuS2o6hJfEH88,6064
|
5
5
|
var/warp_beacon/proxies.json,sha256=VnjlQDXumOEq72ZFjbh6IqHS1TEHqn8HPYAZqWCeSIA,95
|
6
6
|
warp_beacon/__init__.py,sha256=_rThNODmz0nDp_n4mWo_HKaNFE5jk1_7cRhHyYaencI,163
|
7
|
-
warp_beacon/__version__.py,sha256=
|
7
|
+
warp_beacon/__version__.py,sha256=wFes6herhJvqX5NDKqYVfqjb67DmWdV9Toohi3aNFW4,23
|
8
8
|
warp_beacon/warp_beacon.py,sha256=ADCR30uGXIsDrt9WoiI9Ghu2QtWs0qZIK6x3pQKM_B4,1109
|
9
9
|
warp_beacon/yt_auth.py,sha256=GUTKqYr_tzDC-07Lx_ahWXSag8EyLxXBUnQbDBIkEmk,6022
|
10
10
|
warp_beacon/compress/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -20,7 +20,7 @@ warp_beacon/mediainfo/audio.py,sha256=ous88kwQj4bDIChN5wnGil5LqTs0IQHH0d-nyrL0-Z
|
|
20
20
|
warp_beacon/mediainfo/silencer.py,sha256=qxMuViOoVwUYb60uCVvqHiGrqByR1_4_rqMT-XdMkwc,1813
|
21
21
|
warp_beacon/mediainfo/video.py,sha256=UBZrhTN5IDI-aYu6tsJEILo9nFkjHhkldGVFmvV7tEI,2480
|
22
22
|
warp_beacon/scheduler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
warp_beacon/scheduler/instagram_human.py,sha256=
|
23
|
+
warp_beacon/scheduler/instagram_human.py,sha256=lOytnonvqtB_8z6TVzrVJ1prqqgE4fktZZl-xpn7SOA,12951
|
24
24
|
warp_beacon/scheduler/scheduler.py,sha256=Bf4sGXjX75Dox3q-yzUHhagtzUAj3hl5GzfnZya-_io,4995
|
25
25
|
warp_beacon/scraper/__init__.py,sha256=o9-HQEf4yQVNtWuJN4NcLUovejiHhP_KkQ1Xf5EaQvU,20670
|
26
26
|
warp_beacon/scraper/abstract.py,sha256=pWbaTu-gDZgi-iFjqMR_uGzPl5KLv-4gTdJ9w6cD4sk,3802
|
@@ -29,9 +29,9 @@ warp_beacon/scraper/exceptions.py,sha256=EKwoF0oH2xZWbNU-v8DOaWK5skKwa3s1yTIBdlc
|
|
29
29
|
warp_beacon/scraper/fail_handler.py,sha256=5ODu4b8ndZWAcHIXrcUufsWFihetzNUoAi8IgAkreyQ,998
|
30
30
|
warp_beacon/scraper/link_resolver.py,sha256=Rc9ZuMyOo3iPywDHwjngy-WRQ2SXhJwxcg-5ripx7tM,2447
|
31
31
|
warp_beacon/scraper/utils.py,sha256=AOZmDki2Pbr84IG-j_wN2UghKCiWFVDYdx6HJl0JTBs,1258
|
32
|
-
warp_beacon/scraper/X/X.py,sha256=
|
32
|
+
warp_beacon/scraper/X/X.py,sha256=dAw6kb0YYQ4-Y3Ya4r4F5XBgZ4KmjVIfXFNx96Xb138,7188
|
33
33
|
warp_beacon/scraper/X/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
-
warp_beacon/scraper/X/abstract.py,sha256=
|
34
|
+
warp_beacon/scraper/X/abstract.py,sha256=pCzZPTCtn8pRbBx2SeuBUpMkEHqnOLtwLBAHYceL12Q,5475
|
35
35
|
warp_beacon/scraper/X/types.py,sha256=i36Nu2cHpHCkvoeobBQC3B13Ke_N40tgCCApcm_FBFY,76
|
36
36
|
warp_beacon/scraper/instagram/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
37
|
warp_beacon/scraper/instagram/captcha.py,sha256=9UYziuqB3Tsat_ET6ex-cnZDbi6yCnsXHSpmE8MuUHk,4651
|
@@ -56,9 +56,9 @@ warp_beacon/telegram/progress_file_reader.py,sha256=e3equyNKlKs764AD-iE9QRsh3YDH
|
|
56
56
|
warp_beacon/telegram/types.py,sha256=Kvdng6uCF1HRoqQgGW1ZYYPJoVuYkFb-LDvMBbW5Hjk,89
|
57
57
|
warp_beacon/telegram/utils.py,sha256=zTF8VQfAWetBSjAPbmNe_Zi_LN5fAcWptJKjLaFNHaE,5073
|
58
58
|
warp_beacon/uploader/__init__.py,sha256=1enK6qMWaTZEaK456JwaKOfvCvznHA8cjgceOsrF6Po,5732
|
59
|
-
warp_beacon-2.8.
|
60
|
-
warp_beacon-2.8.
|
61
|
-
warp_beacon-2.8.
|
62
|
-
warp_beacon-2.8.
|
63
|
-
warp_beacon-2.8.
|
64
|
-
warp_beacon-2.8.
|
59
|
+
warp_beacon-2.8.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
60
|
+
warp_beacon-2.8.3.dist-info/METADATA,sha256=TeoJgsE_VnzVPdEzsKzoBvUWetvXgK9-9O2LAUPZ8Tw,23214
|
61
|
+
warp_beacon-2.8.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
62
|
+
warp_beacon-2.8.3.dist-info/entry_points.txt,sha256=eSB61Rb89d56WY0O-vEIQwkn18J-4CMrJcLA_R_8h3g,119
|
63
|
+
warp_beacon-2.8.3.dist-info/top_level.txt,sha256=RraB0PWGvRK2zPYkuICKNgStLG1C5s7rPHHJEHJbkgA,1510
|
64
|
+
warp_beacon-2.8.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|