unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +16 -6
- unrealon-1.1.4.dist-info/METADATA +658 -0
- unrealon-1.1.4.dist-info/RECORD +54 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
- unrealon_browser/__init__.py +3 -6
- unrealon_browser/core/browser_manager.py +86 -84
- unrealon_browser/dto/models/config.py +2 -0
- unrealon_browser/managers/captcha.py +165 -185
- unrealon_browser/managers/cookies.py +57 -28
- unrealon_browser/managers/logger_bridge.py +94 -34
- unrealon_browser/managers/profile.py +186 -158
- unrealon_browser/managers/stealth.py +58 -47
- unrealon_driver/__init__.py +8 -21
- unrealon_driver/exceptions.py +5 -0
- unrealon_driver/html_analyzer/__init__.py +32 -0
- unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
- unrealon_driver/html_analyzer/config.py +64 -0
- unrealon_driver/html_analyzer/manager.py +247 -0
- unrealon_driver/html_analyzer/models.py +115 -0
- unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
- unrealon_driver/models/__init__.py +31 -0
- unrealon_driver/models/websocket.py +98 -0
- unrealon_driver/parser/__init__.py +4 -23
- unrealon_driver/parser/cli_manager.py +6 -5
- unrealon_driver/parser/daemon_manager.py +242 -66
- unrealon_driver/parser/managers/__init__.py +0 -21
- unrealon_driver/parser/managers/config.py +15 -3
- unrealon_driver/parser/parser_manager.py +225 -395
- unrealon_driver/smart_logging/__init__.py +24 -0
- unrealon_driver/smart_logging/models.py +44 -0
- unrealon_driver/smart_logging/smart_logger.py +406 -0
- unrealon_driver/smart_logging/unified_logger.py +525 -0
- unrealon_driver/websocket/__init__.py +31 -0
- unrealon_driver/websocket/client.py +249 -0
- unrealon_driver/websocket/config.py +188 -0
- unrealon_driver/websocket/manager.py +90 -0
- unrealon-1.1.1.dist-info/METADATA +0 -722
- unrealon-1.1.1.dist-info/RECORD +0 -82
- unrealon_bridge/__init__.py +0 -114
- unrealon_bridge/cli.py +0 -316
- unrealon_bridge/client/__init__.py +0 -93
- unrealon_bridge/client/base.py +0 -78
- unrealon_bridge/client/commands.py +0 -89
- unrealon_bridge/client/connection.py +0 -90
- unrealon_bridge/client/events.py +0 -65
- unrealon_bridge/client/health.py +0 -38
- unrealon_bridge/client/html_parser.py +0 -146
- unrealon_bridge/client/logging.py +0 -139
- unrealon_bridge/client/proxy.py +0 -70
- unrealon_bridge/client/scheduler.py +0 -450
- unrealon_bridge/client/session.py +0 -70
- unrealon_bridge/configs/__init__.py +0 -14
- unrealon_bridge/configs/bridge_config.py +0 -212
- unrealon_bridge/configs/bridge_config.yaml +0 -39
- unrealon_bridge/models/__init__.py +0 -138
- unrealon_bridge/models/base.py +0 -28
- unrealon_bridge/models/command.py +0 -41
- unrealon_bridge/models/events.py +0 -40
- unrealon_bridge/models/html_parser.py +0 -79
- unrealon_bridge/models/logging.py +0 -55
- unrealon_bridge/models/parser.py +0 -63
- unrealon_bridge/models/proxy.py +0 -41
- unrealon_bridge/models/requests.py +0 -95
- unrealon_bridge/models/responses.py +0 -88
- unrealon_bridge/models/scheduler.py +0 -592
- unrealon_bridge/models/session.py +0 -28
- unrealon_bridge/server/__init__.py +0 -91
- unrealon_bridge/server/base.py +0 -171
- unrealon_bridge/server/handlers/__init__.py +0 -23
- unrealon_bridge/server/handlers/command.py +0 -110
- unrealon_bridge/server/handlers/html_parser.py +0 -139
- unrealon_bridge/server/handlers/logging.py +0 -95
- unrealon_bridge/server/handlers/parser.py +0 -95
- unrealon_bridge/server/handlers/proxy.py +0 -75
- unrealon_bridge/server/handlers/scheduler.py +0 -545
- unrealon_bridge/server/handlers/session.py +0 -66
- unrealon_driver/browser/__init__.py +0 -8
- unrealon_driver/browser/config.py +0 -74
- unrealon_driver/browser/manager.py +0 -416
- unrealon_driver/parser/managers/browser.py +0 -51
- unrealon_driver/parser/managers/logging.py +0 -609
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
unrealon/__init__.py,sha256=EEqg3RwVJZpXddYS8ANBYoOLa2hE2B-JDY8VUxHdvno,922
|
|
2
|
+
unrealon_browser/README.md,sha256=9pP6RrfMGHtdT5uDLFAUB1e4nNGzZudXViEo1940gKw,396
|
|
3
|
+
unrealon_browser/__init__.py,sha256=kvp3aZApfvP__iAnFzsHI25XKS9IWnt9znUI9YZbgAk,1428
|
|
4
|
+
unrealon_browser/cli/__init__.py,sha256=b3r88oeCYsqZF8EU8EZXP9v54Q8cIimN7UmxJsXcB84,264
|
|
5
|
+
unrealon_browser/cli/browser_cli.py,sha256=SRRCGbNXaEg1ZN04-jPo9GOWcP2b6Bkalu6PYVOOt5k,8342
|
|
6
|
+
unrealon_browser/cli/cookies_cli.py,sha256=yhZvGrg8bknlH4zlySdi8ue-25Ue-1rI_u1G06OIMg4,13304
|
|
7
|
+
unrealon_browser/cli/interactive_mode.py,sha256=iYt9PNaIBhNjZ9aUa1ZxeeneV2u1VTSkYQ6rsyio-o8,11730
|
|
8
|
+
unrealon_browser/cli/main.py,sha256=XCYcTxJUqaz320KCU_JPKizYMk6bdljb8Boyok3uO-4,1353
|
|
9
|
+
unrealon_browser/core/__init__.py,sha256=uVL_t4sZelUzflWPdgrwoXGnAkSV1WNQ98-eu0QB2eM,151
|
|
10
|
+
unrealon_browser/core/browser_manager.py,sha256=uCr0VYjLCep9Nr1b60wtUDEqLEdvqWae0sAZYzH61vo,26334
|
|
11
|
+
unrealon_browser/dto/__init__.py,sha256=p9mG2QwnXEdHUHYK67vGD6aameM8RkiVATzz8y0u5EE,1206
|
|
12
|
+
unrealon_browser/dto/models/config.py,sha256=XSUkWyam-LWRbqlx1qE9GPosaJZo1ZsW-o_ARyaQ--Q,1019
|
|
13
|
+
unrealon_browser/dto/models/core.py,sha256=HvbwYG27rmmWtp401uws7lfalN_9QPad0M6ceCiN5iQ,2741
|
|
14
|
+
unrealon_browser/dto/models/dataclasses.py,sha256=zqhJVyzp4CvtuTBsZwm6n6TodVWrZf9gkdDG-0_tgeA,2571
|
|
15
|
+
unrealon_browser/dto/models/detection.py,sha256=ma9ZNIjPR7HnjqZaAj6ZoskiewPFiSn_FgFXSkgiQc8,2715
|
|
16
|
+
unrealon_browser/dto/models/enums.py,sha256=Q4WzHdfSKf7dhKyX00i_Pvl2U8w3lBsxOYfSIoaQY3Q,1219
|
|
17
|
+
unrealon_browser/dto/models/statistics.py,sha256=aIzJNV5r23VBxjhEoja4tXwI1Z7_UCw5zOaxuPya2E8,2728
|
|
18
|
+
unrealon_browser/managers/__init__.py,sha256=JuH9FW_kTzVv71jCDp6wOT4SXT6HGSBpyNAb4tD7-ck,456
|
|
19
|
+
unrealon_browser/managers/captcha.py,sha256=KGBO7sfq9XusAlcPByUFdIg-v6rlruzS2oHx-Zx28wo,21453
|
|
20
|
+
unrealon_browser/managers/cookies.py,sha256=r4VVnKLXH82vhU7qgtY-dF7KPf0Ie3QxGD3FEi6geFA,15085
|
|
21
|
+
unrealon_browser/managers/logger_bridge.py,sha256=GSxFSE7I596NQ8zFvWtf5IF9YADexHwvBP2-5FK8wrY,11066
|
|
22
|
+
unrealon_browser/managers/profile.py,sha256=HjddlSeUry_65WPtF8CMkT7cfJ6X3Jap9kJaaZpwtAA,18956
|
|
23
|
+
unrealon_browser/managers/stealth.py,sha256=-ge67IAueO6zRa0SSffi8Fpd2mhG_2IyRvZADzxJbfk,14651
|
|
24
|
+
unrealon_driver/__init__.py,sha256=S0s4m-MpDXGVuQVfcqZ-PccFixGwKgww065LfnK-4fE,1532
|
|
25
|
+
unrealon_driver/exceptions.py,sha256=5b7ndK-UROfVvON3qf9YVXK4tq8PZzgtBSHTkiA1Oos,437
|
|
26
|
+
unrealon_driver/html_analyzer/__init__.py,sha256=sDrhKYJ9I3OdgbL60w92WxfiHD_Jhcw0vgV2XYkKQwg,1007
|
|
27
|
+
unrealon_driver/html_analyzer/cleaner.py,sha256=77umjQojIl9BMSFpe4j6x3_5TMQh7Jk5GXUhuempu3k,25198
|
|
28
|
+
unrealon_driver/html_analyzer/config.py,sha256=BnAUKIr2sLX8XtJ0CC0PxTQieR2eA4SJJ_DM22IhwrY,3021
|
|
29
|
+
unrealon_driver/html_analyzer/manager.py,sha256=0y8EBEJeW3LuWWBLt--W0czs_-UZ-4j5btLS8cigb6E,10734
|
|
30
|
+
unrealon_driver/html_analyzer/models.py,sha256=EXiGJC3i8O-rjqGH4YKVuyS0vqp0s8BmvNZ78qevt50,4854
|
|
31
|
+
unrealon_driver/html_analyzer/websocket_analyzer.py,sha256=MXqtEdV2K4CF4I1Z0zKexf1uqDdwmuguzuAkjV2tyAA,5768
|
|
32
|
+
unrealon_driver/models/__init__.py,sha256=ugsaGIJWH5QU7FduoCoV7S9AxKzYCPisAqow6oV89JQ,620
|
|
33
|
+
unrealon_driver/models/websocket.py,sha256=zmz_J8vER-Y4BCwDXCUbnc-eoUpbFcRLXJRsp_8xFbs,4271
|
|
34
|
+
unrealon_driver/parser/__init__.py,sha256=QW1u5zju71o7mtvINpdz7dO8ZltylrtXUkSm4xzWDwo,949
|
|
35
|
+
unrealon_driver/parser/cli_manager.py,sha256=l0Kk7zm1KGvnjjyydtXxcULmwFerEE03_ZZp4dkN7nc,5124
|
|
36
|
+
unrealon_driver/parser/daemon_manager.py,sha256=0GQ4_ZffJQKcEpw3kyPLXu8up1LGptLbly0BQxwXz5M,15945
|
|
37
|
+
unrealon_driver/parser/parser_manager.py,sha256=URq1xZ7Uyw5CM4TXYWVc1n3IzylyLUfmK5iO2HDYLL8,17923
|
|
38
|
+
unrealon_driver/parser/managers/__init__.py,sha256=DChR17AOEabrIuSNElEzA8kmjmc33rcB1n_RTo3QOaU,616
|
|
39
|
+
unrealon_driver/parser/managers/config.py,sha256=H0MM8FOx_2JQESY9AujrTvsslC9NCTs7sayDIGq3TBs,9897
|
|
40
|
+
unrealon_driver/parser/managers/error.py,sha256=EnJkZLlZihXeKZdHauvnqaSby05caxHOYYIuiLoelkw,14555
|
|
41
|
+
unrealon_driver/parser/managers/result.py,sha256=yIoDTx6e1YzhKmJ2yJPD8_eAAkrjtm7ofN3DFEOLdUU,10236
|
|
42
|
+
unrealon_driver/smart_logging/__init__.py,sha256=sC0Bzzvx73V2iAj7-AH3-H_WFKGRH04eajodREIuwgg,544
|
|
43
|
+
unrealon_driver/smart_logging/models.py,sha256=lGqCxjBp18mA35-VMn7GRUCJvISMw-p1cGUtD8Im3Qo,1079
|
|
44
|
+
unrealon_driver/smart_logging/smart_logger.py,sha256=BVLjA2FTZfFiY97AE3mZh0Ds8rtCj97yIKFcHP56afU,13039
|
|
45
|
+
unrealon_driver/smart_logging/unified_logger.py,sha256=4DfDFNQE1I555Gk1HAxsrd5MFyLID_FyYiqgVRslNvE,18284
|
|
46
|
+
unrealon_driver/websocket/__init__.py,sha256=qwOOIKEcwvP_0GYW6fTAmNdiUMHaotG_IL56g3wi_aU,1008
|
|
47
|
+
unrealon_driver/websocket/client.py,sha256=NvfbeDe_uDfLniS2q6Qzd5p4UnhSqvdSzUaUXNMtzLw,9179
|
|
48
|
+
unrealon_driver/websocket/config.py,sha256=ez6EjAOPjDMoQeXHEkulT90o2vpeC34eShgNFLX2g1E,6500
|
|
49
|
+
unrealon_driver/websocket/manager.py,sha256=8RAq3NuBFfV2y0ThAZVD7IhO_t7OAJV9phvnZooaLlc,2779
|
|
50
|
+
unrealon-1.1.4.dist-info/METADATA,sha256=xwBrCFv9aUhu3QQYBK5irSMuZnP--zleZQZBB3zhip4,21402
|
|
51
|
+
unrealon-1.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
52
|
+
unrealon-1.1.4.dist-info/entry_points.txt,sha256=s-tuFkQDg3TQ3U5k0yzt6oWGUhTLum0BLdCQEIDnpiU,431
|
|
53
|
+
unrealon-1.1.4.dist-info/licenses/LICENSE,sha256=uTZpktXKUsE0IzS5RdSV398HHI74ssbGKTdCbv7U9l0,1070
|
|
54
|
+
unrealon-1.1.4.dist-info/RECORD,,
|
|
@@ -4,6 +4,6 @@ browser-interactive = unrealon_browser.cli.interactive_mode:main
|
|
|
4
4
|
browser-profiles = unrealon_browser.cli.main:profiles_command
|
|
5
5
|
browser-stealth = unrealon_browser.cli.main:stealth_command
|
|
6
6
|
unrealon = unrealon_driver.cli:main
|
|
7
|
-
unrealon-bridge =
|
|
7
|
+
unrealon-bridge = unrealon_server.cli:main
|
|
8
8
|
unrealon-browser = unrealon_browser.cli.main:cli
|
|
9
9
|
unrealon-rpc = unrealon_rpc.cli.main:main
|
unrealon_browser/__init__.py
CHANGED
|
@@ -5,12 +5,8 @@ Enterprise-grade browser automation with stealth capabilities and proxy integrat
|
|
|
5
5
|
Based on proven patterns from unrealparser with modular architecture.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
try:
|
|
11
|
-
__version__ = version("unrealon")
|
|
12
|
-
except Exception:
|
|
13
|
-
__version__ = "0.1.0"
|
|
8
|
+
from unrealon import VersionInfo
|
|
9
|
+
__version__ = VersionInfo().version
|
|
14
10
|
|
|
15
11
|
# Core browser management
|
|
16
12
|
from .core import BrowserManager
|
|
@@ -47,6 +43,7 @@ from .dto import (
|
|
|
47
43
|
)
|
|
48
44
|
|
|
49
45
|
__all__ = [
|
|
46
|
+
"__version__",
|
|
50
47
|
# Core
|
|
51
48
|
"BrowserManager",
|
|
52
49
|
# Managers
|
|
@@ -10,7 +10,9 @@ import uuid
|
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from typing import Optional, Dict, Any, Union
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
|
|
13
|
+
import subprocess
|
|
14
|
+
import platform
|
|
15
|
+
from playwright.async_api import async_playwright
|
|
14
16
|
from unrealon_browser.dto import (
|
|
15
17
|
BrowserConfig,
|
|
16
18
|
BrowserSession,
|
|
@@ -18,7 +20,6 @@ from unrealon_browser.dto import (
|
|
|
18
20
|
BrowserStatistics,
|
|
19
21
|
BrowserManagerStatistics,
|
|
20
22
|
BrowserType,
|
|
21
|
-
# 🔥 StealthLevel removed - STEALTH ALWAYS ON!
|
|
22
23
|
PageResult,
|
|
23
24
|
ProxyInfo,
|
|
24
25
|
)
|
|
@@ -42,9 +43,10 @@ class BrowserManager:
|
|
|
42
43
|
- Basic statistics
|
|
43
44
|
"""
|
|
44
45
|
|
|
45
|
-
def __init__(self, config: BrowserConfig):
|
|
46
|
+
def __init__(self, config: BrowserConfig, parser_id: Optional[str] = None):
|
|
46
47
|
"""Initialize browser manager with configuration"""
|
|
47
48
|
self.config = config
|
|
49
|
+
self.parser_id = parser_id or config.parser_id or config.parser_name
|
|
48
50
|
self.session_metadata: Optional[BrowserSession] = None
|
|
49
51
|
self._browser = None
|
|
50
52
|
self._context = None
|
|
@@ -59,9 +61,7 @@ class BrowserManager:
|
|
|
59
61
|
self.profile_manager = None
|
|
60
62
|
self.cookie_manager = None
|
|
61
63
|
self.captcha_manager = CaptchaDetector()
|
|
62
|
-
self.logger_bridge = create_browser_logger_bridge(
|
|
63
|
-
session_id=self._generate_session_id(), enable_console=True
|
|
64
|
-
)
|
|
64
|
+
self.logger_bridge = create_browser_logger_bridge(session_id=self._generate_session_id(), parser_id=self.parser_id, enable_console=True) # Use resolved parser_id
|
|
65
65
|
|
|
66
66
|
# Signal handlers for graceful shutdown
|
|
67
67
|
self._setup_signal_handlers()
|
|
@@ -70,7 +70,7 @@ class BrowserManager:
|
|
|
70
70
|
"""Setup signal handlers for graceful shutdown"""
|
|
71
71
|
|
|
72
72
|
def signal_handler(signum, frame):
|
|
73
|
-
|
|
73
|
+
self.logger_bridge.log_info(f"\n🔄 Received signal {signum}, shutting down gracefully...")
|
|
74
74
|
if self._initialized:
|
|
75
75
|
try:
|
|
76
76
|
loop = asyncio.get_event_loop()
|
|
@@ -82,8 +82,8 @@ class BrowserManager:
|
|
|
82
82
|
# If no event loop running, use asyncio.run
|
|
83
83
|
asyncio.run(self.close_async())
|
|
84
84
|
except Exception as e:
|
|
85
|
-
|
|
86
|
-
|
|
85
|
+
self.logger_bridge.log_warning(f"⚠️ Error during shutdown: {e}")
|
|
86
|
+
self.logger_bridge.log_info("🔄 Browser cleanup completed")
|
|
87
87
|
sys.exit(0)
|
|
88
88
|
|
|
89
89
|
signal.signal(signal.SIGINT, signal_handler)
|
|
@@ -95,11 +95,11 @@ class BrowserManager:
|
|
|
95
95
|
# Try graceful cleanup with timeout
|
|
96
96
|
await asyncio.wait_for(self.close_async(), timeout=3.0)
|
|
97
97
|
except asyncio.TimeoutError:
|
|
98
|
-
|
|
98
|
+
self.logger_bridge.log_warning("⚠️ Cleanup timeout - forcing exit")
|
|
99
99
|
except Exception as e:
|
|
100
|
-
|
|
100
|
+
self.logger_bridge.log_warning(f"⚠️ Cleanup error: {e}")
|
|
101
101
|
finally:
|
|
102
|
-
|
|
102
|
+
self.logger_bridge.log_info("🔄 Browser cleanup completed")
|
|
103
103
|
# Force exit from the event loop
|
|
104
104
|
try:
|
|
105
105
|
loop = asyncio.get_event_loop()
|
|
@@ -115,13 +115,33 @@ class BrowserManager:
|
|
|
115
115
|
"""Generate unique session ID"""
|
|
116
116
|
return f"browser_session_{uuid.uuid4().hex[:8]}"
|
|
117
117
|
|
|
118
|
+
async def _force_cleanup_chromium_processes(self) -> None:
|
|
119
|
+
"""Force cleanup of any remaining Chromium processes."""
|
|
120
|
+
try:
|
|
121
|
+
|
|
122
|
+
if platform.system() == "Darwin": # macOS
|
|
123
|
+
# Kill playwright chromium processes
|
|
124
|
+
subprocess.run(["pkill", "-f", "playwright.*chromium"], capture_output=True, check=False)
|
|
125
|
+
|
|
126
|
+
elif platform.system() == "Linux":
|
|
127
|
+
# Kill playwright chromium processes on Linux
|
|
128
|
+
subprocess.run(["pkill", "-f", "playwright.*chromium"], capture_output=True, check=False)
|
|
129
|
+
|
|
130
|
+
elif platform.system() == "Windows":
|
|
131
|
+
# Kill chromium processes on Windows
|
|
132
|
+
subprocess.run(["taskkill", "/F", "/IM", "chrome.exe"], capture_output=True, check=False)
|
|
133
|
+
subprocess.run(["taskkill", "/F", "/IM", "chromium.exe"], capture_output=True, check=False)
|
|
134
|
+
|
|
135
|
+
except Exception as e:
|
|
136
|
+
self.logger_bridge.log_warning(f"⚠️ Could not force cleanup Chromium processes: {e}")
|
|
137
|
+
# Don't raise - this is a best-effort cleanup
|
|
138
|
+
|
|
118
139
|
async def initialize_async(self) -> None:
|
|
119
140
|
"""Initialize browser with Playwright"""
|
|
120
141
|
if self._initialized:
|
|
121
142
|
return
|
|
122
143
|
|
|
123
144
|
try:
|
|
124
|
-
from playwright.async_api import async_playwright
|
|
125
145
|
|
|
126
146
|
# Create session metadata
|
|
127
147
|
self.session_metadata = BrowserSession(
|
|
@@ -135,10 +155,10 @@ class BrowserManager:
|
|
|
135
155
|
browser_type=self.config.browser_type.value,
|
|
136
156
|
)
|
|
137
157
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
158
|
+
self.logger_bridge.log_info(f"🚀 Initializing browser session: {self.session_metadata.session_id}")
|
|
159
|
+
self.logger_bridge.log_info(f" Parser: {self.config.parser_name}")
|
|
160
|
+
self.logger_bridge.log_info(f" Browser: {self.config.browser_type.value}")
|
|
161
|
+
self.logger_bridge.log_info(f" Stealth: ALWAYS ON")
|
|
142
162
|
|
|
143
163
|
# Log initialization through bridge
|
|
144
164
|
self.logger_bridge.log_browser_initialized(self.session_metadata)
|
|
@@ -160,7 +180,7 @@ class BrowserManager:
|
|
|
160
180
|
|
|
161
181
|
# ✅ FIX: Use launch_persistent_context for profiles, regular launch otherwise
|
|
162
182
|
if profile_path:
|
|
163
|
-
|
|
183
|
+
self.logger_bridge.log_info(f" 📂 Using profile: {profile_path}")
|
|
164
184
|
|
|
165
185
|
# Combine args for persistent context (different structure than regular launch)
|
|
166
186
|
persistent_args = {
|
|
@@ -174,23 +194,17 @@ class BrowserManager:
|
|
|
174
194
|
|
|
175
195
|
# Use persistent context with user_data_dir for profiles
|
|
176
196
|
if self.config.browser_type == BrowserType.CHROMIUM:
|
|
177
|
-
self._context = await self._playwright.chromium.launch_persistent_context(
|
|
178
|
-
**persistent_args
|
|
179
|
-
)
|
|
197
|
+
self._context = await self._playwright.chromium.launch_persistent_context(**persistent_args)
|
|
180
198
|
elif self.config.browser_type == BrowserType.FIREFOX:
|
|
181
|
-
self._context = await self._playwright.firefox.launch_persistent_context(
|
|
182
|
-
**persistent_args
|
|
183
|
-
)
|
|
199
|
+
self._context = await self._playwright.firefox.launch_persistent_context(**persistent_args)
|
|
184
200
|
elif self.config.browser_type == BrowserType.WEBKIT:
|
|
185
|
-
self._context = await self._playwright.webkit.launch_persistent_context(
|
|
186
|
-
**persistent_args
|
|
187
|
-
)
|
|
201
|
+
self._context = await self._playwright.webkit.launch_persistent_context(**persistent_args)
|
|
188
202
|
else:
|
|
189
203
|
raise ValueError(f"Unsupported browser type: {self.config.browser_type}")
|
|
190
204
|
|
|
191
205
|
# For persistent context, browser is accessed via context.browser
|
|
192
206
|
self._browser = self._context.browser
|
|
193
|
-
|
|
207
|
+
self.logger_bridge.log_info(f"✅ Persistent context created with profile: {profile_path}")
|
|
194
208
|
else:
|
|
195
209
|
# Regular browser launch without profile
|
|
196
210
|
if self.config.browser_type == BrowserType.CHROMIUM:
|
|
@@ -206,7 +220,7 @@ class BrowserManager:
|
|
|
206
220
|
self._context = await self._browser.new_context(**context_options)
|
|
207
221
|
|
|
208
222
|
# 🔥 STEALTH ALWAYS ON - NO CONFIG NEEDED!
|
|
209
|
-
self.stealth_manager.apply_webdriver_removal(self._context)
|
|
223
|
+
await self.stealth_manager.apply_webdriver_removal(self._context)
|
|
210
224
|
|
|
211
225
|
# Create page
|
|
212
226
|
self._page = await self._context.new_page()
|
|
@@ -217,7 +231,7 @@ class BrowserManager:
|
|
|
217
231
|
|
|
218
232
|
# 🔥 CRITICAL: If stealth fails, CLOSE BROWSER WITH ERROR!
|
|
219
233
|
if not stealth_success:
|
|
220
|
-
|
|
234
|
+
self.logger_bridge.log_error("❌ STEALTH FAILED - CLOSING BROWSER!")
|
|
221
235
|
await self.close_async()
|
|
222
236
|
raise RuntimeError("🔥 STEALTH MANDATORY: Browser closed due to stealth application failure")
|
|
223
237
|
|
|
@@ -226,13 +240,13 @@ class BrowserManager:
|
|
|
226
240
|
self._statistics.set_session_start()
|
|
227
241
|
self._initialized = True
|
|
228
242
|
|
|
229
|
-
|
|
230
|
-
|
|
243
|
+
self.logger_bridge.log_info(f"✅ Browser initialized successfully")
|
|
244
|
+
self.logger_bridge.log_info(f" Session ID: {self.session_metadata.session_id}")
|
|
231
245
|
|
|
232
246
|
except Exception as e:
|
|
233
247
|
if self.session_metadata:
|
|
234
248
|
self.session_metadata.current_status = BrowserSessionStatus.ERROR
|
|
235
|
-
|
|
249
|
+
self.logger_bridge.log_error(f"❌ Failed to initialize browser: {e}")
|
|
236
250
|
raise
|
|
237
251
|
|
|
238
252
|
def _get_browser_args(self) -> Dict[str, Any]:
|
|
@@ -283,7 +297,7 @@ class BrowserManager:
|
|
|
283
297
|
self._statistics.increment_total()
|
|
284
298
|
|
|
285
299
|
try:
|
|
286
|
-
|
|
300
|
+
self.logger_bridge.log_info(f"🌐 Navigating to: {url}")
|
|
287
301
|
|
|
288
302
|
# Navigate with timeout
|
|
289
303
|
response = await self._page.goto(
|
|
@@ -294,10 +308,8 @@ class BrowserManager:
|
|
|
294
308
|
|
|
295
309
|
# Wait for additional selector if specified
|
|
296
310
|
if wait_for:
|
|
297
|
-
|
|
298
|
-
await self._page.wait_for_selector(
|
|
299
|
-
wait_for, timeout=self.config.page_load_timeout_seconds * 1000
|
|
300
|
-
)
|
|
311
|
+
self.logger_bridge.log_info(f"⏳ Waiting for: {wait_for}")
|
|
312
|
+
await self._page.wait_for_selector(wait_for, timeout=self.config.page_load_timeout_seconds * 1000)
|
|
301
313
|
|
|
302
314
|
# Check response status
|
|
303
315
|
if response and response.status >= 400:
|
|
@@ -310,9 +322,7 @@ class BrowserManager:
|
|
|
310
322
|
self.profile_manager.mark_session_success(True)
|
|
311
323
|
|
|
312
324
|
# Calculate duration
|
|
313
|
-
duration_ms = (
|
|
314
|
-
datetime.now(timezone.utc) - datetime.now(timezone.utc).replace(microsecond=0)
|
|
315
|
-
).total_seconds() * 1000
|
|
325
|
+
duration_ms = (datetime.now(timezone.utc) - datetime.now(timezone.utc).replace(microsecond=0)).total_seconds() * 1000
|
|
316
326
|
|
|
317
327
|
# Log successful navigation
|
|
318
328
|
title = await self._page.title()
|
|
@@ -322,7 +332,7 @@ class BrowserManager:
|
|
|
322
332
|
captcha_result = await self.captcha_manager.detect_captcha(self._page)
|
|
323
333
|
if captcha_result.detected:
|
|
324
334
|
self.logger_bridge.log_captcha_detected(captcha_result)
|
|
325
|
-
|
|
335
|
+
self.logger_bridge.log_warning(f"⚠️ Captcha detected: {captcha_result.captcha_type.value}")
|
|
326
336
|
# Update session status to indicate captcha is required
|
|
327
337
|
self.session_metadata.current_status = BrowserSessionStatus.CAPTCHA_REQUIRED
|
|
328
338
|
|
|
@@ -336,16 +346,14 @@ class BrowserManager:
|
|
|
336
346
|
|
|
337
347
|
except Exception as e:
|
|
338
348
|
self._statistics.increment_failed()
|
|
339
|
-
|
|
349
|
+
self.logger_bridge.log_error(f"❌ Navigation failed: {e}")
|
|
340
350
|
|
|
341
351
|
# Mark profile session as failure
|
|
342
352
|
if self.profile_manager:
|
|
343
353
|
self.profile_manager.mark_session_success(False)
|
|
344
354
|
|
|
345
355
|
# Calculate duration
|
|
346
|
-
duration_ms = (
|
|
347
|
-
datetime.now(timezone.utc) - datetime.now(timezone.utc).replace(microsecond=0)
|
|
348
|
-
).total_seconds() * 1000
|
|
356
|
+
duration_ms = (datetime.now(timezone.utc) - datetime.now(timezone.utc).replace(microsecond=0)).total_seconds() * 1000
|
|
349
357
|
|
|
350
358
|
# Log failed navigation
|
|
351
359
|
self.logger_bridge.log_navigation_failed(url, str(e), duration_ms)
|
|
@@ -366,7 +374,7 @@ class BrowserManager:
|
|
|
366
374
|
try:
|
|
367
375
|
return await self._page.content()
|
|
368
376
|
except Exception as e:
|
|
369
|
-
|
|
377
|
+
self.logger_bridge.log_error(f"❌ Failed to get page content: {e}")
|
|
370
378
|
return None
|
|
371
379
|
|
|
372
380
|
async def execute_script_async(self, script: str) -> Any:
|
|
@@ -377,7 +385,7 @@ class BrowserManager:
|
|
|
377
385
|
try:
|
|
378
386
|
return await self._page.evaluate(script)
|
|
379
387
|
except Exception as e:
|
|
380
|
-
|
|
388
|
+
self.logger_bridge.log_error(f"❌ Script execution failed: {e}")
|
|
381
389
|
raise
|
|
382
390
|
|
|
383
391
|
def get_statistics(self) -> Dict[str, Any]:
|
|
@@ -387,14 +395,10 @@ class BrowserManager:
|
|
|
387
395
|
|
|
388
396
|
if self._statistics.session_start_time:
|
|
389
397
|
current_time = datetime.now(timezone.utc)
|
|
390
|
-
stats_dict["session_duration_seconds"] = (
|
|
391
|
-
current_time - self._statistics.session_start_time
|
|
392
|
-
).total_seconds()
|
|
398
|
+
stats_dict["session_duration_seconds"] = (current_time - self._statistics.session_start_time).total_seconds()
|
|
393
399
|
|
|
394
400
|
if self._statistics.total_navigations > 0:
|
|
395
|
-
stats_dict["success_rate"] =
|
|
396
|
-
self._statistics.successful_navigations / self._statistics.total_navigations
|
|
397
|
-
)
|
|
401
|
+
stats_dict["success_rate"] = self._statistics.successful_navigations / self._statistics.total_navigations
|
|
398
402
|
else:
|
|
399
403
|
stats_dict["success_rate"] = 0.0
|
|
400
404
|
|
|
@@ -404,15 +408,13 @@ class BrowserManager:
|
|
|
404
408
|
"""Print session statistics"""
|
|
405
409
|
stats = self.get_statistics()
|
|
406
410
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
)
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
print(f" Success rate: {stats['success_rate']:.1%}")
|
|
415
|
-
print(f" Duration: {stats['session_duration_seconds']:.1f}s")
|
|
411
|
+
self.logger_bridge.log_info("\n📊 Browser Session Statistics:")
|
|
412
|
+
self.logger_bridge.log_info(f" Session ID: {self.session_metadata.session_id if self.session_metadata else 'N/A'}")
|
|
413
|
+
self.logger_bridge.log_info(f" Total navigations: {stats['total_navigations']}")
|
|
414
|
+
self.logger_bridge.log_info(f" Successful: {stats['successful_navigations']}")
|
|
415
|
+
self.logger_bridge.log_info(f" Failed: {stats['failed_navigations']}")
|
|
416
|
+
self.logger_bridge.log_info(f" Success rate: {stats['success_rate']:.1%}")
|
|
417
|
+
self.logger_bridge.log_info(f" Duration: {stats['session_duration_seconds']:.1f}s")
|
|
416
418
|
|
|
417
419
|
# Print stealth status
|
|
418
420
|
self.stealth_manager.print_stealth_status()
|
|
@@ -470,12 +472,10 @@ class BrowserManager:
|
|
|
470
472
|
if not detection_result.detected:
|
|
471
473
|
return {"success": False, "error": "No captcha detected", "should_continue": True}
|
|
472
474
|
|
|
473
|
-
|
|
475
|
+
self.logger_bridge.log_info(f"\n🤖 Starting interactive captcha resolution...")
|
|
474
476
|
|
|
475
477
|
# Handle captcha interactively
|
|
476
|
-
resolution_result = await self.captcha_manager.handle_captcha_interactive(
|
|
477
|
-
self, detection_result, timeout_seconds
|
|
478
|
-
)
|
|
478
|
+
resolution_result = await self.captcha_manager.handle_captcha_interactive(self, detection_result, timeout_seconds)
|
|
479
479
|
|
|
480
480
|
if resolution_result["success"]:
|
|
481
481
|
# Log successful captcha resolution
|
|
@@ -514,7 +514,7 @@ class BrowserManager:
|
|
|
514
514
|
|
|
515
515
|
try:
|
|
516
516
|
# Step 1: Navigate to URL
|
|
517
|
-
|
|
517
|
+
self.logger_bridge.log_info(f"🚀 Starting automation workflow for: {url}")
|
|
518
518
|
navigation_result = await self.navigate_async(url)
|
|
519
519
|
workflow_result["steps_completed"].append("navigation")
|
|
520
520
|
|
|
@@ -528,7 +528,7 @@ class BrowserManager:
|
|
|
528
528
|
workflow_result["captcha_encountered"] = True
|
|
529
529
|
workflow_result["steps_completed"].append("captcha_detection")
|
|
530
530
|
|
|
531
|
-
|
|
531
|
+
self.logger_bridge.log_info(f"🤖 Captcha detected: {captcha_result['captcha_type']}")
|
|
532
532
|
|
|
533
533
|
# Step 3: Handle captcha interactively
|
|
534
534
|
resolution_result = await self.handle_captcha_interactive_async(timeout_seconds=300)
|
|
@@ -536,11 +536,9 @@ class BrowserManager:
|
|
|
536
536
|
|
|
537
537
|
if resolution_result["success"]:
|
|
538
538
|
workflow_result["captcha_resolved"] = True
|
|
539
|
-
|
|
539
|
+
self.logger_bridge.log_info("✅ Captcha resolved successfully!")
|
|
540
540
|
else:
|
|
541
|
-
workflow_result["error"] = (
|
|
542
|
-
f"Captcha resolution failed: {resolution_result.get('error')}"
|
|
543
|
-
)
|
|
541
|
+
workflow_result["error"] = f"Captcha resolution failed: {resolution_result.get('error')}"
|
|
544
542
|
return workflow_result
|
|
545
543
|
|
|
546
544
|
# Step 4: Save cookies if we have a proxy
|
|
@@ -550,25 +548,26 @@ class BrowserManager:
|
|
|
550
548
|
workflow_result["steps_completed"].append("cookie_saving")
|
|
551
549
|
|
|
552
550
|
if cookies_saved:
|
|
553
|
-
|
|
551
|
+
self.logger_bridge.log_info("💾 Cookies saved successfully!")
|
|
554
552
|
|
|
555
553
|
workflow_result["success"] = True
|
|
556
|
-
|
|
554
|
+
self.logger_bridge.log_info("🎉 Automation workflow completed successfully!")
|
|
557
555
|
|
|
558
556
|
return workflow_result
|
|
559
557
|
|
|
560
558
|
except Exception as e:
|
|
561
559
|
workflow_result["error"] = str(e)
|
|
562
|
-
|
|
560
|
+
self.logger_bridge.log_error(f"❌ Automation workflow failed: {e}")
|
|
563
561
|
return workflow_result
|
|
564
562
|
|
|
565
563
|
async def close_async(self) -> None:
|
|
566
564
|
"""Close browser and cleanup resources"""
|
|
565
|
+
|
|
567
566
|
if not self._initialized:
|
|
568
567
|
return
|
|
569
568
|
|
|
570
569
|
try:
|
|
571
|
-
|
|
570
|
+
self.logger_bridge.log_info("🔄 Closing browser session...")
|
|
572
571
|
|
|
573
572
|
if self.session_metadata:
|
|
574
573
|
self.session_metadata.current_status = BrowserSessionStatus.CLOSED
|
|
@@ -579,7 +578,7 @@ class BrowserManager:
|
|
|
579
578
|
if not self._page.is_closed():
|
|
580
579
|
await self._page.close()
|
|
581
580
|
except Exception as e:
|
|
582
|
-
|
|
581
|
+
self.logger_bridge.log_warning(f"⚠️ Page already closed: {e}")
|
|
583
582
|
finally:
|
|
584
583
|
self._page = None
|
|
585
584
|
|
|
@@ -589,7 +588,7 @@ class BrowserManager:
|
|
|
589
588
|
# Check if context is still valid before closing
|
|
590
589
|
await self._context.close()
|
|
591
590
|
except Exception as e:
|
|
592
|
-
|
|
591
|
+
self.logger_bridge.log_warning(f"⚠️ Context already closed: {e}")
|
|
593
592
|
finally:
|
|
594
593
|
self._context = None
|
|
595
594
|
|
|
@@ -599,7 +598,7 @@ class BrowserManager:
|
|
|
599
598
|
if self._browser.is_connected():
|
|
600
599
|
await self._browser.close()
|
|
601
600
|
except Exception as e:
|
|
602
|
-
|
|
601
|
+
self.logger_bridge.log_warning(f"⚠️ Browser already closed: {e}")
|
|
603
602
|
finally:
|
|
604
603
|
self._browser = None
|
|
605
604
|
|
|
@@ -608,18 +607,21 @@ class BrowserManager:
|
|
|
608
607
|
try:
|
|
609
608
|
await self._playwright.stop()
|
|
610
609
|
except Exception as e:
|
|
611
|
-
|
|
610
|
+
self.logger_bridge.log_warning(f"⚠️ Playwright already stopped: {e}")
|
|
612
611
|
finally:
|
|
613
612
|
self._playwright = None
|
|
614
613
|
|
|
614
|
+
# Force cleanup of any remaining Chromium processes
|
|
615
|
+
await self._force_cleanup_chromium_processes()
|
|
616
|
+
|
|
615
617
|
self._initialized = False
|
|
616
618
|
|
|
617
|
-
#
|
|
619
|
+
# Log final statistics
|
|
618
620
|
self.print_statistics()
|
|
619
|
-
|
|
621
|
+
self.logger_bridge.log_info("✅ Browser closed successfully")
|
|
620
622
|
|
|
621
623
|
except Exception as e:
|
|
622
|
-
|
|
624
|
+
self.logger_bridge.log_error(f"❌ Error closing browser: {e}")
|
|
623
625
|
# Don't re-raise - we want graceful shutdown even with errors
|
|
624
626
|
|
|
625
627
|
@property
|
|
@@ -4,6 +4,7 @@ Browser DTOs - Configuration Models
|
|
|
4
4
|
Configuration models for browser automation.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from typing import Optional
|
|
7
8
|
from pydantic import BaseModel, Field, ConfigDict
|
|
8
9
|
from .enums import BrowserType, BrowserMode
|
|
9
10
|
|
|
@@ -25,6 +26,7 @@ class BrowserConfig(BaseModel):
|
|
|
25
26
|
use_proxy_rotation: bool = Field(default=True)
|
|
26
27
|
realistic_ports_only: bool = Field(default=False)
|
|
27
28
|
parser_name: str = Field(default="default_parser")
|
|
29
|
+
parser_id: Optional[str] = Field(default=None, description="Optional parser ID for logging")
|
|
28
30
|
|
|
29
31
|
# Performance
|
|
30
32
|
disable_images: bool = Field(default=False)
|