unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +16 -6
- unrealon-1.1.4.dist-info/METADATA +658 -0
- unrealon-1.1.4.dist-info/RECORD +54 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
- unrealon_browser/__init__.py +3 -6
- unrealon_browser/core/browser_manager.py +86 -84
- unrealon_browser/dto/models/config.py +2 -0
- unrealon_browser/managers/captcha.py +165 -185
- unrealon_browser/managers/cookies.py +57 -28
- unrealon_browser/managers/logger_bridge.py +94 -34
- unrealon_browser/managers/profile.py +186 -158
- unrealon_browser/managers/stealth.py +58 -47
- unrealon_driver/__init__.py +8 -21
- unrealon_driver/exceptions.py +5 -0
- unrealon_driver/html_analyzer/__init__.py +32 -0
- unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
- unrealon_driver/html_analyzer/config.py +64 -0
- unrealon_driver/html_analyzer/manager.py +247 -0
- unrealon_driver/html_analyzer/models.py +115 -0
- unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
- unrealon_driver/models/__init__.py +31 -0
- unrealon_driver/models/websocket.py +98 -0
- unrealon_driver/parser/__init__.py +4 -23
- unrealon_driver/parser/cli_manager.py +6 -5
- unrealon_driver/parser/daemon_manager.py +242 -66
- unrealon_driver/parser/managers/__init__.py +0 -21
- unrealon_driver/parser/managers/config.py +15 -3
- unrealon_driver/parser/parser_manager.py +225 -395
- unrealon_driver/smart_logging/__init__.py +24 -0
- unrealon_driver/smart_logging/models.py +44 -0
- unrealon_driver/smart_logging/smart_logger.py +406 -0
- unrealon_driver/smart_logging/unified_logger.py +525 -0
- unrealon_driver/websocket/__init__.py +31 -0
- unrealon_driver/websocket/client.py +249 -0
- unrealon_driver/websocket/config.py +188 -0
- unrealon_driver/websocket/manager.py +90 -0
- unrealon-1.1.1.dist-info/METADATA +0 -722
- unrealon-1.1.1.dist-info/RECORD +0 -82
- unrealon_bridge/__init__.py +0 -114
- unrealon_bridge/cli.py +0 -316
- unrealon_bridge/client/__init__.py +0 -93
- unrealon_bridge/client/base.py +0 -78
- unrealon_bridge/client/commands.py +0 -89
- unrealon_bridge/client/connection.py +0 -90
- unrealon_bridge/client/events.py +0 -65
- unrealon_bridge/client/health.py +0 -38
- unrealon_bridge/client/html_parser.py +0 -146
- unrealon_bridge/client/logging.py +0 -139
- unrealon_bridge/client/proxy.py +0 -70
- unrealon_bridge/client/scheduler.py +0 -450
- unrealon_bridge/client/session.py +0 -70
- unrealon_bridge/configs/__init__.py +0 -14
- unrealon_bridge/configs/bridge_config.py +0 -212
- unrealon_bridge/configs/bridge_config.yaml +0 -39
- unrealon_bridge/models/__init__.py +0 -138
- unrealon_bridge/models/base.py +0 -28
- unrealon_bridge/models/command.py +0 -41
- unrealon_bridge/models/events.py +0 -40
- unrealon_bridge/models/html_parser.py +0 -79
- unrealon_bridge/models/logging.py +0 -55
- unrealon_bridge/models/parser.py +0 -63
- unrealon_bridge/models/proxy.py +0 -41
- unrealon_bridge/models/requests.py +0 -95
- unrealon_bridge/models/responses.py +0 -88
- unrealon_bridge/models/scheduler.py +0 -592
- unrealon_bridge/models/session.py +0 -28
- unrealon_bridge/server/__init__.py +0 -91
- unrealon_bridge/server/base.py +0 -171
- unrealon_bridge/server/handlers/__init__.py +0 -23
- unrealon_bridge/server/handlers/command.py +0 -110
- unrealon_bridge/server/handlers/html_parser.py +0 -139
- unrealon_bridge/server/handlers/logging.py +0 -95
- unrealon_bridge/server/handlers/parser.py +0 -95
- unrealon_bridge/server/handlers/proxy.py +0 -75
- unrealon_bridge/server/handlers/scheduler.py +0 -545
- unrealon_bridge/server/handlers/session.py +0 -66
- unrealon_driver/browser/__init__.py +0 -8
- unrealon_driver/browser/config.py +0 -74
- unrealon_driver/browser/manager.py +0 -416
- unrealon_driver/parser/managers/browser.py +0 -51
- unrealon_driver/parser/managers/logging.py +0 -609
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -6,6 +6,7 @@ Adapted from reliable unrealparser implementation with browser automation integr
|
|
|
6
6
|
import asyncio
|
|
7
7
|
from typing import Optional, Dict, Any, List
|
|
8
8
|
from datetime import datetime, timezone
|
|
9
|
+
import logging
|
|
9
10
|
|
|
10
11
|
# Browser DTOs
|
|
11
12
|
from unrealon_browser.dto import (
|
|
@@ -19,11 +20,13 @@ from unrealon_browser.dto import (
|
|
|
19
20
|
# Playwright imports
|
|
20
21
|
from playwright.async_api import Page, BrowserContext, Browser
|
|
21
22
|
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
class CaptchaDetector:
|
|
24
27
|
"""
|
|
25
28
|
Captcha detection and manual resolution system
|
|
26
|
-
|
|
29
|
+
|
|
27
30
|
Key features from unrealparser:
|
|
28
31
|
- Automated captcha detection using multiple indicators
|
|
29
32
|
- Manual resolution through headful mode switching
|
|
@@ -31,9 +34,10 @@ class CaptchaDetector:
|
|
|
31
34
|
- Proxy-based captcha tracking
|
|
32
35
|
- Support for multiple captcha types (reCAPTCHA, Cloudflare, etc.)
|
|
33
36
|
"""
|
|
34
|
-
|
|
35
|
-
def __init__(self):
|
|
37
|
+
|
|
38
|
+
def __init__(self, logger_bridge=None):
|
|
36
39
|
"""Initialize captcha detection manager"""
|
|
40
|
+
self.logger_bridge = logger_bridge
|
|
37
41
|
self._captcha_indicators = {
|
|
38
42
|
# Common captcha element selectors
|
|
39
43
|
"recaptcha_selectors": [
|
|
@@ -89,91 +93,91 @@ class CaptchaDetector:
|
|
|
89
93
|
"bot detection",
|
|
90
94
|
],
|
|
91
95
|
}
|
|
92
|
-
|
|
96
|
+
|
|
93
97
|
# Statistics
|
|
94
98
|
self._captchas_detected = 0
|
|
95
99
|
self._captchas_solved = 0
|
|
96
100
|
self._detection_history: List[CaptchaDetection] = []
|
|
97
|
-
|
|
101
|
+
|
|
102
|
+
def _logger(self, message: str, level: str = "info") -> None:
|
|
103
|
+
if self.logger_bridge:
|
|
104
|
+
if level == "info":
|
|
105
|
+
self.logger_bridge.log_info(message)
|
|
106
|
+
elif level == "error":
|
|
107
|
+
self.logger_bridge.log_error(message)
|
|
108
|
+
elif level == "warning":
|
|
109
|
+
self.logger_bridge.log_warning(message)
|
|
110
|
+
else:
|
|
111
|
+
self.logger_bridge.log_info(message)
|
|
112
|
+
else:
|
|
113
|
+
if level == "info":
|
|
114
|
+
logger.info(message)
|
|
115
|
+
elif level == "error":
|
|
116
|
+
logger.error(message)
|
|
117
|
+
elif level == "warning":
|
|
118
|
+
logger.warning(message)
|
|
119
|
+
else:
|
|
120
|
+
logger.info(message)
|
|
121
|
+
|
|
98
122
|
async def detect_captcha(self, page: Page) -> CaptchaDetection:
|
|
99
123
|
"""
|
|
100
124
|
Detect if current page contains captcha challenge
|
|
101
|
-
|
|
125
|
+
|
|
102
126
|
Args:
|
|
103
127
|
page: Playwright page instance
|
|
104
|
-
|
|
128
|
+
|
|
105
129
|
Returns:
|
|
106
130
|
CaptchaDetection with detection details
|
|
107
131
|
"""
|
|
108
132
|
# Playwright is always available (required dependency)
|
|
109
133
|
try:
|
|
110
134
|
if not page:
|
|
111
|
-
return CaptchaDetection(
|
|
112
|
-
detected=False,
|
|
113
|
-
captcha_type=CaptchaType.UNKNOWN,
|
|
114
|
-
page_url="unknown",
|
|
115
|
-
page_title="Error: No page provided"
|
|
116
|
-
)
|
|
135
|
+
return CaptchaDetection(detected=False, captcha_type=CaptchaType.UNKNOWN, page_url="unknown", page_title="Error: No page provided")
|
|
117
136
|
except Exception:
|
|
118
|
-
return CaptchaDetection(
|
|
119
|
-
|
|
120
|
-
captcha_type=CaptchaType.UNKNOWN,
|
|
121
|
-
page_url="unknown",
|
|
122
|
-
page_title="Error: Page access error"
|
|
123
|
-
)
|
|
124
|
-
|
|
137
|
+
return CaptchaDetection(detected=False, captcha_type=CaptchaType.UNKNOWN, page_url="unknown", page_title="Error: Page access error")
|
|
138
|
+
|
|
125
139
|
try:
|
|
126
140
|
current_url = page.url
|
|
127
141
|
page_title = await page.title()
|
|
128
142
|
page_content = await page.content()
|
|
129
|
-
|
|
143
|
+
|
|
130
144
|
# Initialize result
|
|
131
|
-
result = CaptchaDetection(
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
print(f"🔍 Checking for captcha on: {current_url}")
|
|
139
|
-
print(f" Page title: {page_title}")
|
|
140
|
-
|
|
145
|
+
result = CaptchaDetection(detected=False, captcha_type=CaptchaType.UNKNOWN, page_url=current_url, page_title=page_title)
|
|
146
|
+
|
|
147
|
+
self._logger(f"🔍 Checking for captcha on: {current_url}", "info")
|
|
148
|
+
self._logger(f" Page title: {page_title}", "info")
|
|
149
|
+
|
|
141
150
|
# Check for reCAPTCHA
|
|
142
151
|
if await self._check_recaptcha(page, page_content, page_title):
|
|
143
152
|
result.detected = True
|
|
144
153
|
result.captcha_type = CaptchaType.RECAPTCHA
|
|
145
|
-
|
|
146
|
-
|
|
154
|
+
self._logger("🤖 reCAPTCHA detected!", "warning")
|
|
155
|
+
|
|
147
156
|
# Check for Cloudflare
|
|
148
157
|
elif await self._check_cloudflare(page, page_content, page_title):
|
|
149
158
|
result.detected = True
|
|
150
159
|
result.captcha_type = CaptchaType.CLOUDFLARE
|
|
151
|
-
|
|
152
|
-
|
|
160
|
+
self._logger("☁️ Cloudflare challenge detected!", "warning")
|
|
161
|
+
|
|
153
162
|
# Check for generic captcha
|
|
154
163
|
elif await self._check_generic_captcha(page, page_content, page_title):
|
|
155
164
|
result.detected = True
|
|
156
165
|
result.captcha_type = CaptchaType.IMAGE_CAPTCHA
|
|
157
|
-
|
|
158
|
-
|
|
166
|
+
self._logger("🖼️ Generic captcha detected!", "warning")
|
|
167
|
+
|
|
159
168
|
if result.detected:
|
|
160
169
|
self._captchas_detected += 1
|
|
161
170
|
self._detection_history.append(result)
|
|
162
|
-
|
|
171
|
+
self._logger(f"⚠️ Captcha detected: {result.captcha_type.value}", "warning")
|
|
163
172
|
else:
|
|
164
|
-
|
|
165
|
-
|
|
173
|
+
self._logger("✅ No captcha detected", "info")
|
|
174
|
+
|
|
166
175
|
return result
|
|
167
|
-
|
|
176
|
+
|
|
168
177
|
except Exception as e:
|
|
169
|
-
|
|
170
|
-
return CaptchaDetection(
|
|
171
|
-
|
|
172
|
-
captcha_type=CaptchaType.UNKNOWN,
|
|
173
|
-
page_url=current_url if 'current_url' in locals() else "unknown",
|
|
174
|
-
page_title="Error during detection"
|
|
175
|
-
)
|
|
176
|
-
|
|
178
|
+
self._logger(f"❌ Error during captcha detection: {e}", "error")
|
|
179
|
+
return CaptchaDetection(detected=False, captcha_type=CaptchaType.UNKNOWN, page_url=current_url if "current_url" in locals() else "unknown", page_title="Error during detection")
|
|
180
|
+
|
|
177
181
|
async def _check_recaptcha(self, page: Page, content: str, title: str) -> bool:
|
|
178
182
|
"""Check for reCAPTCHA indicators"""
|
|
179
183
|
try:
|
|
@@ -181,111 +185,106 @@ class CaptchaDetector:
|
|
|
181
185
|
for selector in self._captcha_indicators["recaptcha_selectors"]:
|
|
182
186
|
element = await page.query_selector(selector)
|
|
183
187
|
if element:
|
|
184
|
-
|
|
188
|
+
self._logger(f" Found reCAPTCHA element: {selector}", "info")
|
|
185
189
|
return True
|
|
186
|
-
|
|
190
|
+
|
|
187
191
|
# Check content for reCAPTCHA indicators
|
|
188
192
|
content_lower = content.lower()
|
|
189
193
|
if "recaptcha" in content_lower or "g-recaptcha" in content_lower:
|
|
190
|
-
|
|
194
|
+
self._logger(" Found reCAPTCHA in page content", "info")
|
|
191
195
|
return True
|
|
192
|
-
|
|
196
|
+
|
|
193
197
|
return False
|
|
194
|
-
|
|
198
|
+
|
|
195
199
|
except Exception as e:
|
|
196
|
-
|
|
200
|
+
self._logger(f" Error checking reCAPTCHA: {e}", "error")
|
|
197
201
|
return False
|
|
198
|
-
|
|
202
|
+
|
|
199
203
|
async def _check_cloudflare(self, page: Page, content: str, title: str) -> bool:
|
|
200
204
|
"""Check for Cloudflare challenge indicators"""
|
|
201
205
|
try:
|
|
202
206
|
title_lower = title.lower()
|
|
203
207
|
content_lower = content.lower()
|
|
204
|
-
|
|
208
|
+
|
|
205
209
|
# Check title indicators
|
|
206
210
|
for indicator in self._captcha_indicators["captcha_title_indicators"]:
|
|
207
211
|
if indicator in title_lower:
|
|
208
|
-
|
|
212
|
+
self._logger(f" Found Cloudflare title indicator: {indicator}", "info")
|
|
209
213
|
return True
|
|
210
|
-
|
|
214
|
+
|
|
211
215
|
# Check for Cloudflare elements
|
|
212
216
|
for selector in self._captcha_indicators["cloudflare_selectors"]:
|
|
213
217
|
element = await page.query_selector(selector)
|
|
214
218
|
if element:
|
|
215
|
-
|
|
219
|
+
self._logger(f" Found Cloudflare element: {selector}", "info")
|
|
216
220
|
return True
|
|
217
|
-
|
|
221
|
+
|
|
218
222
|
# Check content for Cloudflare indicators
|
|
219
223
|
if "cloudflare" in content_lower or "checking your browser" in content_lower:
|
|
220
|
-
|
|
224
|
+
self._logger(" Found Cloudflare in page content", "info")
|
|
221
225
|
return True
|
|
222
|
-
|
|
226
|
+
|
|
223
227
|
return False
|
|
224
|
-
|
|
228
|
+
|
|
225
229
|
except Exception as e:
|
|
226
|
-
|
|
230
|
+
self._logger(f" Error checking Cloudflare: {e}", "error")
|
|
227
231
|
return False
|
|
228
|
-
|
|
232
|
+
|
|
229
233
|
async def _check_generic_captcha(self, page: Page, content: str, title: str) -> bool:
|
|
230
234
|
"""Check for generic captcha indicators"""
|
|
231
235
|
try:
|
|
232
236
|
content_lower = content.lower()
|
|
233
237
|
title_lower = title.lower()
|
|
234
|
-
|
|
238
|
+
|
|
235
239
|
# Check for generic captcha elements
|
|
236
240
|
for selector in self._captcha_indicators["generic_captcha_selectors"]:
|
|
237
241
|
element = await page.query_selector(selector)
|
|
238
242
|
if element:
|
|
239
|
-
|
|
243
|
+
self._logger(f" Found generic captcha element: {selector}", "info")
|
|
240
244
|
return True
|
|
241
|
-
|
|
245
|
+
|
|
242
246
|
# Check content for captcha text
|
|
243
247
|
for indicator in self._captcha_indicators["captcha_text_indicators"]:
|
|
244
248
|
if indicator in content_lower:
|
|
245
|
-
|
|
249
|
+
self._logger(f" Found captcha text indicator: {indicator}", "info")
|
|
246
250
|
return True
|
|
247
|
-
|
|
251
|
+
|
|
248
252
|
# Check title for captcha indicators
|
|
249
253
|
for indicator in self._captcha_indicators["captcha_title_indicators"]:
|
|
250
254
|
if indicator in title_lower:
|
|
251
|
-
|
|
255
|
+
self._logger(f" Found captcha title indicator: {indicator}", "info")
|
|
252
256
|
return True
|
|
253
|
-
|
|
257
|
+
|
|
254
258
|
return False
|
|
255
|
-
|
|
259
|
+
|
|
256
260
|
except Exception as e:
|
|
257
|
-
|
|
261
|
+
self._logger(f" Error checking generic captcha: {e}", "error")
|
|
258
262
|
return False
|
|
259
|
-
|
|
260
|
-
async def handle_captcha_interactive(
|
|
261
|
-
self,
|
|
262
|
-
browser_manager,
|
|
263
|
-
detection_result: CaptchaDetection,
|
|
264
|
-
timeout_seconds: int = 300
|
|
265
|
-
) -> Dict[str, Any]:
|
|
263
|
+
|
|
264
|
+
async def handle_captcha_interactive(self, browser_manager, detection_result: CaptchaDetection, timeout_seconds: int = 300) -> Dict[str, Any]:
|
|
266
265
|
"""
|
|
267
266
|
Handle captcha through interactive manual resolution
|
|
268
|
-
|
|
267
|
+
|
|
269
268
|
Args:
|
|
270
269
|
browser_manager: BrowserManager instance for context switching
|
|
271
270
|
detection_result: Captcha detection result
|
|
272
271
|
timeout_seconds: Maximum time to wait for manual resolution
|
|
273
|
-
|
|
272
|
+
|
|
274
273
|
Returns:
|
|
275
274
|
Resolution result with success status and details
|
|
276
275
|
"""
|
|
277
276
|
try:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
277
|
+
self._logger(f"\n🤖 Manual captcha resolution required!", "info")
|
|
278
|
+
self._logger(f" Captcha type: {detection_result.captcha_type.value}", "info")
|
|
279
|
+
self._logger(f" Page URL: {detection_result.page_url}", "info")
|
|
280
|
+
self._logger(f" Timeout: {timeout_seconds} seconds", "info")
|
|
281
|
+
|
|
283
282
|
# Check if browser is already in headed mode
|
|
284
|
-
current_mode = getattr(browser_manager.config,
|
|
285
|
-
|
|
283
|
+
current_mode = getattr(browser_manager.config, "mode", BrowserMode.HEADLESS)
|
|
284
|
+
|
|
286
285
|
if current_mode == BrowserMode.HEADLESS:
|
|
287
|
-
|
|
288
|
-
|
|
286
|
+
self._logger("🔄 Switching to headed mode for manual captcha resolution...", "info")
|
|
287
|
+
|
|
289
288
|
# Create new headed browser context
|
|
290
289
|
headed_browser_manager = await self._create_headed_context(browser_manager)
|
|
291
290
|
if not headed_browser_manager:
|
|
@@ -294,120 +293,111 @@ class CaptchaDetector:
|
|
|
294
293
|
"error": "Failed to create headed browser context",
|
|
295
294
|
"captcha_type": detection_result.captcha_type.value,
|
|
296
295
|
}
|
|
297
|
-
|
|
296
|
+
|
|
298
297
|
# Use headed browser for resolution
|
|
299
298
|
target_browser = headed_browser_manager
|
|
300
299
|
else:
|
|
301
|
-
|
|
300
|
+
self._logger("👀 Browser already in headed mode", "info")
|
|
302
301
|
target_browser = browser_manager
|
|
303
|
-
|
|
302
|
+
|
|
304
303
|
# Wait for manual captcha resolution
|
|
305
|
-
resolution_result = await self._wait_for_captcha_solution(
|
|
306
|
-
|
|
307
|
-
)
|
|
308
|
-
|
|
304
|
+
resolution_result = await self._wait_for_captcha_solution(target_browser, detection_result, timeout_seconds)
|
|
305
|
+
|
|
309
306
|
# Save cookies after successful resolution
|
|
310
307
|
if resolution_result["success"]:
|
|
311
308
|
cookies_saved = await target_browser.save_cookies_for_current_proxy_async()
|
|
312
309
|
resolution_result["cookies_saved"] = cookies_saved
|
|
313
|
-
|
|
310
|
+
|
|
314
311
|
if cookies_saved:
|
|
315
|
-
|
|
312
|
+
self._logger("💾 Cookies saved after captcha resolution", "info")
|
|
316
313
|
self._captchas_solved += 1
|
|
317
314
|
else:
|
|
318
|
-
|
|
319
|
-
|
|
315
|
+
self._logger("⚠️ Failed to save cookies after captcha resolution", "warning")
|
|
316
|
+
|
|
320
317
|
# Clean up headed browser if created
|
|
321
|
-
if current_mode == BrowserMode.HEADLESS and
|
|
318
|
+
if current_mode == BrowserMode.HEADLESS and "headed_browser_manager" in locals():
|
|
322
319
|
await self._cleanup_headed_context(headed_browser_manager)
|
|
323
|
-
|
|
320
|
+
|
|
324
321
|
return resolution_result
|
|
325
|
-
|
|
322
|
+
|
|
326
323
|
except Exception as e:
|
|
327
|
-
|
|
324
|
+
self._logger(f"❌ Error during interactive captcha handling: {e}", "error")
|
|
328
325
|
return {
|
|
329
326
|
"success": False,
|
|
330
327
|
"error": str(e),
|
|
331
328
|
"captcha_type": detection_result.captcha_type.value,
|
|
332
329
|
}
|
|
333
|
-
|
|
330
|
+
|
|
334
331
|
async def _create_headed_context(self, browser_manager) -> Optional[Any]:
|
|
335
332
|
"""Create a new headed browser context for manual interaction"""
|
|
336
333
|
try:
|
|
337
|
-
|
|
338
|
-
|
|
334
|
+
self._logger("🔄 Creating headed browser context...", "info")
|
|
335
|
+
|
|
339
336
|
# This is a simplified approach - in practice you'd create a new
|
|
340
337
|
# BrowserManager instance with headed configuration
|
|
341
338
|
# For now, we'll assume the existing browser can be used
|
|
342
|
-
|
|
339
|
+
|
|
343
340
|
return browser_manager
|
|
344
|
-
|
|
341
|
+
|
|
345
342
|
except Exception as e:
|
|
346
|
-
|
|
343
|
+
self._logger(f"❌ Error creating headed context: {e}", "error")
|
|
347
344
|
return None
|
|
348
|
-
|
|
345
|
+
|
|
349
346
|
async def _cleanup_headed_context(self, browser_manager) -> None:
|
|
350
347
|
"""Clean up headed browser context"""
|
|
351
348
|
try:
|
|
352
|
-
|
|
349
|
+
self._logger("🧹 Cleaning up headed browser context...", "info")
|
|
353
350
|
# Implementation depends on how headed context was created
|
|
354
351
|
pass
|
|
355
|
-
|
|
352
|
+
|
|
356
353
|
except Exception as e:
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
async def _wait_for_captcha_solution(
|
|
360
|
-
self,
|
|
361
|
-
browser_manager,
|
|
362
|
-
detection_result: CaptchaDetection,
|
|
363
|
-
timeout_seconds: int
|
|
364
|
-
) -> Dict[str, Any]:
|
|
354
|
+
self._logger(f"❌ Error cleaning up headed context: {e}", "error")
|
|
355
|
+
|
|
356
|
+
async def _wait_for_captcha_solution(self, browser_manager, detection_result: CaptchaDetection, timeout_seconds: int) -> Dict[str, Any]:
|
|
365
357
|
"""
|
|
366
358
|
Wait for manual captcha solution with periodic checks
|
|
367
|
-
|
|
359
|
+
|
|
368
360
|
Args:
|
|
369
361
|
browser_manager: Browser manager instance
|
|
370
362
|
detection_result: Original captcha detection result
|
|
371
363
|
timeout_seconds: Maximum wait time
|
|
372
|
-
|
|
364
|
+
|
|
373
365
|
Returns:
|
|
374
366
|
Resolution result
|
|
375
367
|
"""
|
|
376
368
|
try:
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
369
|
+
self._logger(f"\n⏳ Waiting for manual captcha resolution...", "info")
|
|
370
|
+
self._logger(" Please solve the captcha in the browser window", "info")
|
|
371
|
+
self._logger(" The system will automatically detect when it's solved", "info")
|
|
372
|
+
self._logger(f" Timeout: {timeout_seconds} seconds", "info")
|
|
373
|
+
|
|
382
374
|
start_time = datetime.now(timezone.utc)
|
|
383
375
|
check_interval = 5 # Check every 5 seconds
|
|
384
|
-
|
|
376
|
+
|
|
385
377
|
while True:
|
|
386
378
|
# Check if timeout exceeded
|
|
387
379
|
elapsed = (datetime.now(timezone.utc) - start_time).total_seconds()
|
|
388
380
|
if elapsed > timeout_seconds:
|
|
389
|
-
|
|
381
|
+
self._logger(f"⏰ Captcha resolution timeout ({timeout_seconds}s)", "warning")
|
|
390
382
|
return {
|
|
391
383
|
"success": False,
|
|
392
384
|
"error": f"Timeout after {timeout_seconds} seconds",
|
|
393
385
|
"captcha_type": detection_result.captcha_type.value,
|
|
394
386
|
"elapsed_seconds": elapsed,
|
|
395
387
|
}
|
|
396
|
-
|
|
388
|
+
|
|
397
389
|
# Check if captcha is still present
|
|
398
|
-
if hasattr(browser_manager,
|
|
390
|
+
if hasattr(browser_manager, "_page") and browser_manager._page:
|
|
399
391
|
current_detection = await self.detect_captcha(browser_manager._page)
|
|
400
|
-
|
|
392
|
+
|
|
401
393
|
if not current_detection.detected:
|
|
402
|
-
|
|
403
|
-
|
|
394
|
+
self._logger("✅ Captcha appears to be solved!", "info")
|
|
395
|
+
|
|
404
396
|
# Verify by checking page content/URL changes
|
|
405
|
-
verification_result = await self._verify_captcha_solution(
|
|
406
|
-
|
|
407
|
-
)
|
|
408
|
-
|
|
397
|
+
verification_result = await self._verify_captcha_solution(browser_manager, detection_result)
|
|
398
|
+
|
|
409
399
|
if verification_result["verified"]:
|
|
410
|
-
|
|
400
|
+
self._logger("🎉 Captcha solution verified!", "info")
|
|
411
401
|
return {
|
|
412
402
|
"success": True,
|
|
413
403
|
"captcha_type": detection_result.captcha_type.value,
|
|
@@ -415,59 +405,55 @@ class CaptchaDetector:
|
|
|
415
405
|
"verification": verification_result,
|
|
416
406
|
}
|
|
417
407
|
else:
|
|
418
|
-
|
|
419
|
-
|
|
408
|
+
self._logger("⚠️ Captcha solution not verified, continuing to wait...", "warning")
|
|
409
|
+
|
|
420
410
|
# Show progress
|
|
421
411
|
remaining = timeout_seconds - elapsed
|
|
422
|
-
|
|
423
|
-
|
|
412
|
+
self._logger(f" ⏳ Still waiting... {remaining:.0f}s remaining", "info")
|
|
413
|
+
|
|
424
414
|
# Wait before next check
|
|
425
415
|
await asyncio.sleep(check_interval)
|
|
426
|
-
|
|
416
|
+
|
|
427
417
|
except Exception as e:
|
|
428
|
-
|
|
418
|
+
self._logger(f"❌ Error waiting for captcha solution: {e}", "error")
|
|
429
419
|
return {
|
|
430
420
|
"success": False,
|
|
431
421
|
"error": str(e),
|
|
432
422
|
"captcha_type": detection_result.captcha_type.value,
|
|
433
423
|
}
|
|
434
|
-
|
|
435
|
-
async def _verify_captcha_solution(
|
|
436
|
-
self,
|
|
437
|
-
browser_manager,
|
|
438
|
-
original_detection: CaptchaDetection
|
|
439
|
-
) -> Dict[str, Any]:
|
|
424
|
+
|
|
425
|
+
async def _verify_captcha_solution(self, browser_manager, original_detection: CaptchaDetection) -> Dict[str, Any]:
|
|
440
426
|
"""
|
|
441
427
|
Verify that captcha was actually solved by checking page changes
|
|
442
|
-
|
|
428
|
+
|
|
443
429
|
Args:
|
|
444
430
|
browser_manager: Browser manager instance
|
|
445
431
|
original_detection: Original captcha detection result
|
|
446
|
-
|
|
432
|
+
|
|
447
433
|
Returns:
|
|
448
434
|
Verification result
|
|
449
435
|
"""
|
|
450
436
|
try:
|
|
451
|
-
if not hasattr(browser_manager,
|
|
437
|
+
if not hasattr(browser_manager, "_page") or not browser_manager._page:
|
|
452
438
|
return {"verified": False, "reason": "No page available"}
|
|
453
|
-
|
|
439
|
+
|
|
454
440
|
page = browser_manager._page
|
|
455
441
|
current_url = page.url
|
|
456
442
|
current_title = await page.title()
|
|
457
|
-
|
|
443
|
+
|
|
458
444
|
# Check if URL changed (common after successful captcha)
|
|
459
445
|
url_changed = current_url != original_detection.page_url
|
|
460
|
-
|
|
446
|
+
|
|
461
447
|
# Check if title changed
|
|
462
448
|
title_changed = True # We don't have original title, assume changed is good
|
|
463
|
-
|
|
449
|
+
|
|
464
450
|
# Check if captcha elements are gone
|
|
465
451
|
captcha_elements_gone = not (await self.detect_captcha(page)).detected
|
|
466
|
-
|
|
452
|
+
|
|
467
453
|
# Look for success indicators
|
|
468
454
|
content = await page.content()
|
|
469
455
|
content_lower = content.lower()
|
|
470
|
-
|
|
456
|
+
|
|
471
457
|
success_indicators = [
|
|
472
458
|
"welcome",
|
|
473
459
|
"dashboard",
|
|
@@ -478,15 +464,12 @@ class CaptchaDetector:
|
|
|
478
464
|
"main",
|
|
479
465
|
"success",
|
|
480
466
|
]
|
|
481
|
-
|
|
467
|
+
|
|
482
468
|
has_success_indicators = any(indicator in content_lower for indicator in success_indicators)
|
|
483
|
-
|
|
469
|
+
|
|
484
470
|
# Determine if solution is verified
|
|
485
|
-
verified = (
|
|
486
|
-
|
|
487
|
-
(url_changed or has_success_indicators)
|
|
488
|
-
)
|
|
489
|
-
|
|
471
|
+
verified = captcha_elements_gone and (url_changed or has_success_indicators)
|
|
472
|
+
|
|
490
473
|
return {
|
|
491
474
|
"verified": verified,
|
|
492
475
|
"url_changed": url_changed,
|
|
@@ -495,35 +478,32 @@ class CaptchaDetector:
|
|
|
495
478
|
"current_url": current_url,
|
|
496
479
|
"current_title": current_title,
|
|
497
480
|
}
|
|
498
|
-
|
|
481
|
+
|
|
499
482
|
except Exception as e:
|
|
500
|
-
|
|
483
|
+
self._logger(f"❌ Error verifying captcha solution: {e}", "error")
|
|
501
484
|
return {"verified": False, "reason": str(e)}
|
|
502
|
-
|
|
485
|
+
|
|
503
486
|
def get_statistics(self) -> Dict[str, Any]:
|
|
504
487
|
"""Get captcha detection statistics"""
|
|
505
488
|
return {
|
|
506
489
|
"captchas_detected": self._captchas_detected,
|
|
507
490
|
"captchas_solved": self._captchas_solved,
|
|
508
|
-
"success_rate": (
|
|
509
|
-
(self._captchas_solved / self._captchas_detected * 100)
|
|
510
|
-
if self._captchas_detected > 0 else 0
|
|
511
|
-
),
|
|
491
|
+
"success_rate": ((self._captchas_solved / self._captchas_detected * 100) if self._captchas_detected > 0 else 0),
|
|
512
492
|
"detection_history_count": len(self._detection_history),
|
|
513
493
|
"supported_types": [t.value for t in CaptchaType],
|
|
514
494
|
}
|
|
515
|
-
|
|
495
|
+
|
|
516
496
|
def print_statistics(self) -> None:
|
|
517
497
|
"""Print captcha detection statistics"""
|
|
518
498
|
stats = self.get_statistics()
|
|
519
|
-
|
|
499
|
+
|
|
520
500
|
print(f"\n🤖 Captcha Detection Statistics:")
|
|
521
501
|
print(f" Captchas detected: {stats['captchas_detected']}")
|
|
522
502
|
print(f" Captchas solved: {stats['captchas_solved']}")
|
|
523
503
|
print(f" Success rate: {stats['success_rate']:.1f}%")
|
|
524
504
|
print(f" Detection history: {stats['detection_history_count']} events")
|
|
525
505
|
print(f" Supported types: {', '.join(stats['supported_types'])}")
|
|
526
|
-
|
|
506
|
+
|
|
527
507
|
# Show recent detections
|
|
528
508
|
if self._detection_history:
|
|
529
509
|
print(" Recent detections:")
|
|
@@ -531,12 +511,12 @@ class CaptchaDetector:
|
|
|
531
511
|
print(f" {detection.detected_at.strftime('%H:%M:%S')} - {detection.captcha_type.value} on {detection.page_url}")
|
|
532
512
|
else:
|
|
533
513
|
print(" No captcha detections yet")
|
|
534
|
-
|
|
514
|
+
|
|
535
515
|
def get_detection_history(self) -> List[CaptchaDetection]:
|
|
536
516
|
"""Get captcha detection history"""
|
|
537
517
|
return self._detection_history.copy()
|
|
538
|
-
|
|
518
|
+
|
|
539
519
|
def clear_detection_history(self) -> None:
|
|
540
520
|
"""Clear captcha detection history"""
|
|
541
521
|
self._detection_history.clear()
|
|
542
|
-
|
|
522
|
+
self._logger("🧹 Cleared captcha detection history", "info")
|