unrealon 1.1.5__tar.gz → 1.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.5 → unrealon-1.1.6}/MANIFEST.in +0 -2
- {unrealon-1.1.5 → unrealon-1.1.6}/PKG-INFO +11 -7
- {unrealon-1.1.5 → unrealon-1.1.6}/README.md +6 -6
- {unrealon-1.1.5 → unrealon-1.1.6}/pyproject.toml +6 -1
- {unrealon-1.1.5 → unrealon-1.1.6}/requirements-dev.txt +5 -1
- {unrealon-1.1.5 → unrealon-1.1.6}/requirements-test.txt +5 -1
- {unrealon-1.1.5 → unrealon-1.1.6}/requirements.txt +5 -1
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/__init__.py +6 -2
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/cli/interactive_mode.py +6 -4
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/core/browser_manager.py +58 -8
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/config.py +7 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/__init__.py +3 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/logger_bridge.py +6 -6
- unrealon-1.1.6/src/unrealon_browser/managers/page_wait_manager.py +198 -0
- unrealon-1.1.6/src/unrealon_driver/README.md +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/__init__.py +5 -6
- unrealon-1.1.5/src/unrealon/__init__.py +0 -40
- {unrealon-1.1.5 → unrealon-1.1.6}/.gitignore +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/LICENSE +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/README.md +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/cli/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/cli/browser_cli.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/cli/cookies_cli.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/cli/main.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/core/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/core.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/dataclasses.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/detection.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/enums.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/dto/models/statistics.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/captcha.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/cookies.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/profile.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_browser/managers/stealth.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/exceptions.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/cleaner.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/config.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/manager.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/models.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/html_analyzer/websocket_analyzer.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/models/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/models/websocket.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/cli_manager.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/daemon_manager.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/managers/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/managers/config.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/managers/error.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/managers/result.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/parser/parser_manager.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/smart_logging/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/smart_logging/models.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/smart_logging/smart_logger.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/smart_logging/unified_logger.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/websocket/__init__.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/websocket/client.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/websocket/config.py +0 -0
- {unrealon-1.1.5 → unrealon-1.1.6}/src/unrealon_driver/websocket/manager.py +0 -0
|
@@ -3,10 +3,8 @@ include LICENSE
|
|
|
3
3
|
include CHANGELOG.md
|
|
4
4
|
include MANIFEST.in
|
|
5
5
|
include requirements*.txt
|
|
6
|
-
recursive-include src/unrealon *.py
|
|
7
6
|
recursive-include src/unrealon_driver *.py
|
|
8
7
|
recursive-include src/unrealon_browser *.py
|
|
9
|
-
recursive-include src/unrealon_driver *.json
|
|
10
8
|
# Examples and tests are EXCLUDED from public package
|
|
11
9
|
|
|
12
10
|
# Exclude cache files
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unrealon
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.6
|
|
4
4
|
Summary: 🚀 Revolutionary web scraping platform with unbreakable stealth, AI-powered extraction, and zero-config setup. Build enterprise parsers in minutes, not months!
|
|
5
5
|
Project-URL: Homepage, https://github.com/unrealos/unrealon-rpc
|
|
6
6
|
Project-URL: Documentation, https://unrealon-rpc.readthedocs.io
|
|
@@ -36,12 +36,14 @@ Classifier: Topic :: System :: Distributed Computing
|
|
|
36
36
|
Classifier: Typing :: Typed
|
|
37
37
|
Requires-Python: <4.0,>=3.10
|
|
38
38
|
Requires-Dist: aiohttp>=3.9.0
|
|
39
|
+
Requires-Dist: aioipfs<0.8.0,>=0.7.1
|
|
39
40
|
Requires-Dist: asyncio-mqtt>=0.16.0
|
|
40
41
|
Requires-Dist: beautifulsoup4>=4.13.4
|
|
41
42
|
Requires-Dist: click>=8.2.0
|
|
42
43
|
Requires-Dist: httpx>=0.26.0
|
|
43
44
|
Requires-Dist: ipfshttpclient>=0.8.0a2
|
|
44
45
|
Requires-Dist: lxml>=6.0.0
|
|
46
|
+
Requires-Dist: msgpack<2.0.0,>=1.1.1
|
|
45
47
|
Requires-Dist: playwright-stealth>=2.0.0
|
|
46
48
|
Requires-Dist: playwright>=1.54.0
|
|
47
49
|
Requires-Dist: pydantic-yaml<2.0.0,>=1.6.0
|
|
@@ -49,9 +51,11 @@ Requires-Dist: pydantic<3.0,>=2.11
|
|
|
49
51
|
Requires-Dist: python-dateutil>=2.8
|
|
50
52
|
Requires-Dist: python-dotenv>=1.0.0
|
|
51
53
|
Requires-Dist: pyyaml>=6.0
|
|
54
|
+
Requires-Dist: questionary<3.0.0,>=2.1.0
|
|
52
55
|
Requires-Dist: redis>=5.0.0
|
|
53
56
|
Requires-Dist: rich>=13.0.0
|
|
54
57
|
Requires-Dist: tomlkit>=0.13.0
|
|
58
|
+
Requires-Dist: watchdog<7.0.0,>=6.0.0
|
|
55
59
|
Requires-Dist: websockets>=12.0
|
|
56
60
|
Provides-Extra: dev
|
|
57
61
|
Requires-Dist: bandit>=1.7.0; extra == 'dev'
|
|
@@ -347,15 +351,15 @@ await parser.start_daemon(schedule="1h")
|
|
|
347
351
|
- **CAPTCHA Solving** - Automatic CAPTCHA resolution
|
|
348
352
|
- **Behavioral Patterns** - User action simulation
|
|
349
353
|
|
|
350
|
-
### Stealth
|
|
354
|
+
### Stealth Features:
|
|
351
355
|
```python
|
|
352
|
-
#
|
|
353
|
-
parser = ParserManager(
|
|
356
|
+
# Stealth is always enabled by default
|
|
357
|
+
parser = ParserManager() # 🔥 STEALTH ALWAYS ON!
|
|
354
358
|
```
|
|
355
359
|
|
|
356
|
-
- **
|
|
357
|
-
- **
|
|
358
|
-
- **
|
|
360
|
+
- **Webdriver Detection Prevention** - Hides automation signals
|
|
361
|
+
- **Browser Fingerprint Randomization** - Unique fingerprints
|
|
362
|
+
- **JavaScript API Modifications** - Prevents detection
|
|
359
363
|
|
|
360
364
|
---
|
|
361
365
|
|
|
@@ -259,15 +259,15 @@ await parser.start_daemon(schedule="1h")
|
|
|
259
259
|
- **CAPTCHA Solving** - Automatic CAPTCHA resolution
|
|
260
260
|
- **Behavioral Patterns** - User action simulation
|
|
261
261
|
|
|
262
|
-
### Stealth
|
|
262
|
+
### Stealth Features:
|
|
263
263
|
```python
|
|
264
|
-
#
|
|
265
|
-
parser = ParserManager(
|
|
264
|
+
# Stealth is always enabled by default
|
|
265
|
+
parser = ParserManager() # 🔥 STEALTH ALWAYS ON!
|
|
266
266
|
```
|
|
267
267
|
|
|
268
|
-
- **
|
|
269
|
-
- **
|
|
270
|
-
- **
|
|
268
|
+
- **Webdriver Detection Prevention** - Hides automation signals
|
|
269
|
+
- **Browser Fingerprint Randomization** - Unique fingerprints
|
|
270
|
+
- **JavaScript API Modifications** - Prevents detection
|
|
271
271
|
|
|
272
272
|
---
|
|
273
273
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "unrealon"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.6"
|
|
8
8
|
description = "🚀 Revolutionary web scraping platform with unbreakable stealth, AI-powered extraction, and zero-config setup. Build enterprise parsers in minutes, not months!"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -91,6 +91,10 @@ dependencies = [
|
|
|
91
91
|
"asyncio-mqtt>=0.16.0",
|
|
92
92
|
"python-dateutil>=2.8",
|
|
93
93
|
"pydantic-yaml (>=1.6.0,<2.0.0)",
|
|
94
|
+
"msgpack (>=1.1.1,<2.0.0)",
|
|
95
|
+
"aioipfs (>=0.7.1,<0.8.0)",
|
|
96
|
+
"questionary (>=2.1.0,<3.0.0)",
|
|
97
|
+
"watchdog (>=6.0.0,<7.0.0)",
|
|
94
98
|
]
|
|
95
99
|
|
|
96
100
|
[project.optional-dependencies]
|
|
@@ -185,6 +189,7 @@ exclude = [
|
|
|
185
189
|
"tmp/",
|
|
186
190
|
# EXCLUDE from public package
|
|
187
191
|
"src/unrealon_rpc",
|
|
192
|
+
"src/unrealon_server",
|
|
188
193
|
"tests/",
|
|
189
194
|
"examples/",
|
|
190
195
|
"src/*/tests/",
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
PyYAML>=6.0
|
|
6
6
|
aiohttp>=3.9.0
|
|
7
|
+
aioipfs (>=0.7.1,<0.8.0)
|
|
7
8
|
asyncio-mqtt>=0.16.0
|
|
8
9
|
bandit>=1.7.0
|
|
9
10
|
beautifulsoup4>=4.13.4
|
|
@@ -18,6 +19,7 @@ lxml>=6.0.0
|
|
|
18
19
|
mkdocs-material>=9.0.0
|
|
19
20
|
mkdocs>=1.5.0
|
|
20
21
|
mkdocstrings[python]>=0.22.0
|
|
22
|
+
msgpack (>=1.1.1,<2.0.0)
|
|
21
23
|
mypy>=1.5.0
|
|
22
24
|
playwright-stealth>=2.0.0
|
|
23
25
|
playwright>=1.54.0
|
|
@@ -32,11 +34,13 @@ pytest-xdist>=3.0.0
|
|
|
32
34
|
pytest>=7.0
|
|
33
35
|
python-dateutil>=2.8
|
|
34
36
|
python-dotenv>=1.0.0
|
|
37
|
+
questionary (>=2.1.0,<3.0.0)
|
|
35
38
|
questionary>=2.1.0
|
|
36
39
|
redis>=5.0.0
|
|
37
40
|
rich>=13.0.0
|
|
38
41
|
tomlkit>=0.13.0
|
|
39
42
|
twine>=4.0.0
|
|
43
|
+
watchdog (>=6.0.0,<7.0.0)
|
|
40
44
|
websockets>=12.0
|
|
41
45
|
|
|
42
|
-
# Total:
|
|
46
|
+
# Total: 40 dependencies
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
PyYAML>=6.0
|
|
6
6
|
aiohttp>=3.9.0
|
|
7
|
+
aioipfs (>=0.7.1,<0.8.0)
|
|
7
8
|
asyncio-mqtt>=0.16.0
|
|
8
9
|
beautifulsoup4>=4.13.4
|
|
9
10
|
click>=8.2.0
|
|
@@ -11,6 +12,7 @@ factory-boy>=3.2.0
|
|
|
11
12
|
httpx>=0.26.0
|
|
12
13
|
ipfshttpclient>=0.8.0a2
|
|
13
14
|
lxml>=6.0.0
|
|
15
|
+
msgpack (>=1.1.1,<2.0.0)
|
|
14
16
|
playwright-stealth>=2.0.0
|
|
15
17
|
playwright>=1.54.0
|
|
16
18
|
pydantic-yaml (>=1.6.0,<2.0.0)
|
|
@@ -22,9 +24,11 @@ pytest-xdist>=3.0.0
|
|
|
22
24
|
pytest>=7.0
|
|
23
25
|
python-dateutil>=2.8
|
|
24
26
|
python-dotenv>=1.0.0
|
|
27
|
+
questionary (>=2.1.0,<3.0.0)
|
|
25
28
|
redis>=5.0.0
|
|
26
29
|
rich>=13.0.0
|
|
27
30
|
tomlkit>=0.13.0
|
|
31
|
+
watchdog (>=6.0.0,<7.0.0)
|
|
28
32
|
websockets>=12.0
|
|
29
33
|
|
|
30
|
-
# Total:
|
|
34
|
+
# Total: 28 dependencies
|
|
@@ -4,21 +4,25 @@
|
|
|
4
4
|
|
|
5
5
|
PyYAML>=6.0
|
|
6
6
|
aiohttp>=3.9.0
|
|
7
|
+
aioipfs (>=0.7.1,<0.8.0)
|
|
7
8
|
asyncio-mqtt>=0.16.0
|
|
8
9
|
beautifulsoup4>=4.13.4
|
|
9
10
|
click>=8.2.0
|
|
10
11
|
httpx>=0.26.0
|
|
11
12
|
ipfshttpclient>=0.8.0a2
|
|
12
13
|
lxml>=6.0.0
|
|
14
|
+
msgpack (>=1.1.1,<2.0.0)
|
|
13
15
|
playwright-stealth>=2.0.0
|
|
14
16
|
playwright>=1.54.0
|
|
15
17
|
pydantic-yaml (>=1.6.0,<2.0.0)
|
|
16
18
|
pydantic>=2.11,<3.0
|
|
17
19
|
python-dateutil>=2.8
|
|
18
20
|
python-dotenv>=1.0.0
|
|
21
|
+
questionary (>=2.1.0,<3.0.0)
|
|
19
22
|
redis>=5.0.0
|
|
20
23
|
rich>=13.0.0
|
|
21
24
|
tomlkit>=0.13.0
|
|
25
|
+
watchdog (>=6.0.0,<7.0.0)
|
|
22
26
|
websockets>=12.0
|
|
23
27
|
|
|
24
|
-
# Total:
|
|
28
|
+
# Total: 22 dependencies
|
|
@@ -5,8 +5,12 @@ Enterprise-grade browser automation with stealth capabilities and proxy integrat
|
|
|
5
5
|
Based on proven patterns from unrealparser with modular architecture.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
try:
|
|
9
|
+
from importlib.metadata import version
|
|
10
|
+
|
|
11
|
+
__version__ = version("unrealon-browser")
|
|
12
|
+
except Exception:
|
|
13
|
+
__version__ = "1.0.0-dev"
|
|
10
14
|
|
|
11
15
|
# Core browser management
|
|
12
16
|
from .core import BrowserManager
|
|
@@ -97,9 +97,10 @@ async def _interactive_browser_launch(parser: str, verbose: bool) -> None:
|
|
|
97
97
|
|
|
98
98
|
headless = questionary.confirm("Run in headless mode?", default=False).ask()
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
# 🔥 STEALTH ALWAYS ON - NO CONFIG NEEDED!
|
|
101
|
+
stealth_info = questionary.select(
|
|
102
|
+
"Stealth is always enabled. Select stealth verification:",
|
|
103
|
+
choices=["None", "Test on bot.sannysoft.com", "Test on fingerprint.com"]
|
|
103
104
|
).ask()
|
|
104
105
|
|
|
105
106
|
url = questionary.text(
|
|
@@ -114,7 +115,8 @@ async def _interactive_browser_launch(parser: str, verbose: bool) -> None:
|
|
|
114
115
|
Parser: [cyan]{parser}[/cyan]
|
|
115
116
|
Browser: [green]{browser_type}[/green]
|
|
116
117
|
Mode: [yellow]{'Headless' if headless else 'GUI'}[/yellow]
|
|
117
|
-
Stealth: [magenta]
|
|
118
|
+
Stealth: [magenta]ALWAYS ON[/magenta]
|
|
119
|
+
Verification: [magenta]{stealth_info}[/magenta]
|
|
118
120
|
Target URL: [blue]{url}[/blue]
|
|
119
121
|
"""
|
|
120
122
|
|
|
@@ -29,6 +29,7 @@ from unrealon_browser.managers import (
|
|
|
29
29
|
CookieManager,
|
|
30
30
|
CaptchaDetector,
|
|
31
31
|
create_browser_logger_bridge,
|
|
32
|
+
PageWaitManager,
|
|
32
33
|
)
|
|
33
34
|
|
|
34
35
|
|
|
@@ -62,6 +63,7 @@ class BrowserManager:
|
|
|
62
63
|
self.cookie_manager = None
|
|
63
64
|
self.captcha_manager = CaptchaDetector()
|
|
64
65
|
self.logger_bridge = create_browser_logger_bridge(session_id=self._generate_session_id(), parser_id=self.parser_id, enable_console=True) # Use resolved parser_id
|
|
66
|
+
self.page_wait = PageWaitManager(None, self.logger_bridge)
|
|
65
67
|
|
|
66
68
|
# Signal handlers for graceful shutdown
|
|
67
69
|
self._setup_signal_handlers()
|
|
@@ -224,6 +226,9 @@ class BrowserManager:
|
|
|
224
226
|
|
|
225
227
|
# Create page
|
|
226
228
|
self._page = await self._context.new_page()
|
|
229
|
+
|
|
230
|
+
# Update page wait manager with new page
|
|
231
|
+
self.page_wait.update_page(self._page)
|
|
227
232
|
|
|
228
233
|
# 🔥 STEALTH ALWAYS APPLIED TO EVERY PAGE!
|
|
229
234
|
stealth_success = await self.stealth_manager.apply_stealth(self._page)
|
|
@@ -290,7 +295,47 @@ class BrowserManager:
|
|
|
290
295
|
return options
|
|
291
296
|
|
|
292
297
|
async def navigate_async(self, url: str, wait_for: Optional[str] = None) -> Dict[str, Any]:
|
|
293
|
-
"""Navigate to URL with
|
|
298
|
+
"""Navigate to URL with stealth retry logic"""
|
|
299
|
+
# Always use stealth retry logic
|
|
300
|
+
return await self._navigate_with_stealth_retry(url, wait_for)
|
|
301
|
+
|
|
302
|
+
async def _navigate_with_stealth_retry(self, url: str, wait_for: Optional[str] = None) -> Dict[str, Any]:
|
|
303
|
+
"""Navigate with stealth retry logic - universal for all sites"""
|
|
304
|
+
self.logger_bridge.log_info(f"🥷 Stealth navigation to: {url}")
|
|
305
|
+
|
|
306
|
+
# Stealth warmup if enabled
|
|
307
|
+
if self.config.stealth_warmup_enabled:
|
|
308
|
+
self.logger_bridge.log_info(f"🧪 First visiting stealth test page: {self.config.stealth_test_url}")
|
|
309
|
+
await self._navigate_basic(self.config.stealth_test_url, None)
|
|
310
|
+
self.logger_bridge.log_info(f"⏳ Waiting {self.config.stealth_warmup_delay} seconds for stealth establishment...")
|
|
311
|
+
await asyncio.sleep(self.config.stealth_warmup_delay)
|
|
312
|
+
|
|
313
|
+
# Now navigate to the actual target URL with retry logic
|
|
314
|
+
max_retries = self.config.stealth_retry_attempts
|
|
315
|
+
for attempt in range(max_retries + 1):
|
|
316
|
+
if attempt > 0:
|
|
317
|
+
self.logger_bridge.log_info(f"🔄 Stealth retry attempt {attempt}/{max_retries}")
|
|
318
|
+
await asyncio.sleep(self.config.stealth_retry_delay)
|
|
319
|
+
|
|
320
|
+
result = await self._navigate_basic(url, wait_for)
|
|
321
|
+
|
|
322
|
+
# If successful, return result
|
|
323
|
+
if result["success"]:
|
|
324
|
+
if attempt > 0:
|
|
325
|
+
self.logger_bridge.log_info(f"✅ Stealth retry successful on attempt {attempt + 1}")
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
# If failed and we have retries left, continue
|
|
329
|
+
if attempt < max_retries:
|
|
330
|
+
self.logger_bridge.log_info(f"⚠️ Navigation failed, will retry in {self.config.stealth_retry_delay} seconds...")
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
# All retries failed
|
|
334
|
+
self.logger_bridge.log_warning(f"❌ Stealth navigation failed after {max_retries + 1} attempts")
|
|
335
|
+
return result
|
|
336
|
+
|
|
337
|
+
async def _navigate_basic(self, url: str, wait_for: Optional[str] = None) -> Dict[str, Any]:
|
|
338
|
+
"""Basic navigation without retry logic"""
|
|
294
339
|
if not self._initialized or not self._page:
|
|
295
340
|
raise RuntimeError("Browser not initialized. Call initialize_async() first.")
|
|
296
341
|
|
|
@@ -328,13 +373,13 @@ class BrowserManager:
|
|
|
328
373
|
title = await self._page.title()
|
|
329
374
|
self.logger_bridge.log_navigation_success(self._page.url, title, duration_ms)
|
|
330
375
|
|
|
331
|
-
# Check for captcha after navigation
|
|
332
|
-
captcha_result = await self.captcha_manager.detect_captcha(self._page)
|
|
333
|
-
if captcha_result.detected:
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
376
|
+
# Check for captcha after navigation (TEMPORARILY DISABLED FOR DEBUGGING)
|
|
377
|
+
# captcha_result = await self.captcha_manager.detect_captcha(self._page)
|
|
378
|
+
# if captcha_result.detected:
|
|
379
|
+
# self.logger_bridge.log_captcha_detected(captcha_result)
|
|
380
|
+
# self.logger_bridge.log_warning(f"⚠️ Captcha detected: {captcha_result.captcha_type.value}")
|
|
381
|
+
# # Update session status to indicate captcha is required
|
|
382
|
+
# self.session_metadata.current_status = BrowserSessionStatus.CAPTCHA_REQUIRED
|
|
338
383
|
|
|
339
384
|
return {
|
|
340
385
|
"success": True,
|
|
@@ -366,6 +411,10 @@ class BrowserManager:
|
|
|
366
411
|
"error": str(e),
|
|
367
412
|
}
|
|
368
413
|
|
|
414
|
+
async def wait_for_page_ready_async(self, wait_type: str = "networkidle", timeout: int = 10000) -> bool:
|
|
415
|
+
"""Wait for page to be ready for parsing (legacy method - use page_wait.* methods instead)"""
|
|
416
|
+
return await self.page_wait.wait_custom(wait_type, timeout)
|
|
417
|
+
|
|
369
418
|
async def get_page_content_async(self) -> Optional[str]:
|
|
370
419
|
"""Get current page content"""
|
|
371
420
|
if not self._page:
|
|
@@ -581,6 +630,7 @@ class BrowserManager:
|
|
|
581
630
|
self.logger_bridge.log_warning(f"⚠️ Page already closed: {e}")
|
|
582
631
|
finally:
|
|
583
632
|
self._page = None
|
|
633
|
+
self.page_wait.update_page(None)
|
|
584
634
|
|
|
585
635
|
# Close context with safety checks
|
|
586
636
|
if self._context:
|
|
@@ -31,3 +31,10 @@ class BrowserConfig(BaseModel):
|
|
|
31
31
|
# Performance
|
|
32
32
|
disable_images: bool = Field(default=False)
|
|
33
33
|
enable_stealth_check: bool = Field(default=False)
|
|
34
|
+
|
|
35
|
+
# Stealth settings
|
|
36
|
+
stealth_warmup_enabled: bool = Field(default=True, description="Enable stealth warmup before target navigation")
|
|
37
|
+
stealth_test_url: str = Field(default="https://bot.sannysoft.com", description="URL for stealth warmup")
|
|
38
|
+
stealth_warmup_delay: float = Field(default=3.0, description="Delay in seconds after stealth warmup")
|
|
39
|
+
stealth_retry_attempts: int = Field(default=2, description="Maximum retry attempts for failed navigation")
|
|
40
|
+
stealth_retry_delay: float = Field(default=3.0, description="Delay between retry attempts")
|
|
@@ -7,6 +7,8 @@ from .profile import ProfileManager
|
|
|
7
7
|
from .logger_bridge import BrowserLoggerBridge, create_browser_logger_bridge
|
|
8
8
|
from .cookies import CookieManager
|
|
9
9
|
from .captcha import CaptchaDetector
|
|
10
|
+
from .page_wait_manager import PageWaitManager
|
|
11
|
+
|
|
10
12
|
|
|
11
13
|
__all__ = [
|
|
12
14
|
"StealthManager",
|
|
@@ -15,4 +17,5 @@ __all__ = [
|
|
|
15
17
|
"create_browser_logger_bridge",
|
|
16
18
|
"CookieManager",
|
|
17
19
|
"CaptchaDetector",
|
|
20
|
+
"PageWaitManager",
|
|
18
21
|
]
|
|
@@ -141,7 +141,7 @@ class BrowserLoggerBridge:
|
|
|
141
141
|
session_id=metadata.session_id,
|
|
142
142
|
parser_name=metadata.parser_name,
|
|
143
143
|
browser_type=metadata.browser_type or "unknown",
|
|
144
|
-
stealth_level="
|
|
144
|
+
stealth_level="ALWAYS_ON", # 🔥 STEALTH ALWAYS ON!
|
|
145
145
|
proxy_host=getattr(metadata.proxy, "host", None) if metadata.proxy else None,
|
|
146
146
|
proxy_port=getattr(metadata.proxy, "port", None) if metadata.proxy else None,
|
|
147
147
|
)
|
|
@@ -168,20 +168,20 @@ class BrowserLoggerBridge:
|
|
|
168
168
|
navigation_type="browser_navigation",
|
|
169
169
|
)
|
|
170
170
|
|
|
171
|
-
def log_stealth_applied(self, stealth_level: str, success: bool) -> None:
|
|
171
|
+
def log_stealth_applied(self, stealth_level: str = "ALWAYS_ON", success: bool = True) -> None:
|
|
172
172
|
"""Log stealth application - 🔥 STEALTH ALWAYS ON!"""
|
|
173
173
|
self._browser_events["stealth_applied"] += 1
|
|
174
174
|
|
|
175
175
|
if success:
|
|
176
176
|
self._log_info(
|
|
177
|
-
|
|
178
|
-
stealth_level=
|
|
177
|
+
"Stealth measures applied: ALWAYS_ON",
|
|
178
|
+
stealth_level="ALWAYS_ON",
|
|
179
179
|
stealth_success=True,
|
|
180
180
|
)
|
|
181
181
|
else:
|
|
182
182
|
self._log_warning(
|
|
183
|
-
|
|
184
|
-
stealth_level=
|
|
183
|
+
"Stealth application failed: ALWAYS_ON",
|
|
184
|
+
stealth_level="ALWAYS_ON",
|
|
185
185
|
stealth_success=False,
|
|
186
186
|
)
|
|
187
187
|
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Page Wait Manager - Convenient methods for different page loading scenarios
|
|
3
|
+
"""
|
|
4
|
+
import asyncio
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from playwright.async_api import Page
|
|
7
|
+
|
|
8
|
+
from .logger_bridge import BrowserLoggerBridge as LoggingBridge
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PageWaitManager:
|
|
12
|
+
"""Manager for different page waiting strategies"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, page: Optional[Page], logger_bridge: LoggingBridge):
|
|
15
|
+
self._page = page
|
|
16
|
+
self.logger_bridge = logger_bridge
|
|
17
|
+
|
|
18
|
+
def update_page(self, page: Optional[Page]):
|
|
19
|
+
"""Update the page reference"""
|
|
20
|
+
self._page = page
|
|
21
|
+
|
|
22
|
+
# Quick wait methods with fallback
|
|
23
|
+
async def wait_fast_with_fallback(self) -> bool:
|
|
24
|
+
"""Fast wait - networkidle 5s, fallback to domcontentloaded 3s"""
|
|
25
|
+
if not self._page:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
self.logger_bridge.log_info("🚀 Fast wait with fallback (networkidle 5s → domcontentloaded 3s)")
|
|
29
|
+
|
|
30
|
+
# Try networkidle first
|
|
31
|
+
if await self._wait_for_state("networkidle", 5000):
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
# Fallback to domcontentloaded
|
|
35
|
+
self.logger_bridge.log_info("⏳ Networkidle timeout, trying domcontentloaded...")
|
|
36
|
+
return await self._wait_for_state("domcontentloaded", 3000)
|
|
37
|
+
|
|
38
|
+
async def wait_safe_with_fallback(self) -> bool:
|
|
39
|
+
"""Safe wait - networkidle 10s, fallback to domcontentloaded 5s"""
|
|
40
|
+
if not self._page:
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
self.logger_bridge.log_info("🛡️ Safe wait with fallback (networkidle 10s → domcontentloaded 5s)")
|
|
44
|
+
|
|
45
|
+
# Try networkidle first
|
|
46
|
+
if await self._wait_for_state("networkidle", 10000):
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
# Fallback to domcontentloaded
|
|
50
|
+
self.logger_bridge.log_info("⏳ Networkidle timeout, trying domcontentloaded...")
|
|
51
|
+
return await self._wait_for_state("domcontentloaded", 5000)
|
|
52
|
+
|
|
53
|
+
# Generic methods
|
|
54
|
+
async def wait_fast(self) -> bool:
|
|
55
|
+
"""Fast wait - domcontentloaded 3s"""
|
|
56
|
+
if not self._page:
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
self.logger_bridge.log_info("⚡ Fast wait (domcontentloaded 3s)")
|
|
60
|
+
return await self._wait_for_state("domcontentloaded", 3000)
|
|
61
|
+
|
|
62
|
+
async def wait_standard(self) -> bool:
|
|
63
|
+
"""Standard wait - networkidle 10s"""
|
|
64
|
+
if not self._page:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
self.logger_bridge.log_info("⏳ Standard wait (networkidle 10s)")
|
|
68
|
+
return await self._wait_for_state("networkidle", 10000)
|
|
69
|
+
|
|
70
|
+
async def wait_full_load(self) -> bool:
|
|
71
|
+
"""Full page load including images - load 30s"""
|
|
72
|
+
if not self._page:
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
self.logger_bridge.log_info("🖼️ Full load wait (load 30s)")
|
|
76
|
+
return await self._wait_for_state("load", 30000)
|
|
77
|
+
|
|
78
|
+
async def wait_minimal(self) -> bool:
|
|
79
|
+
"""Minimal wait - domcontentloaded 1s"""
|
|
80
|
+
if not self._page:
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
self.logger_bridge.log_info("⚡ Minimal wait (domcontentloaded 1s)")
|
|
84
|
+
return await self._wait_for_state("domcontentloaded", 1000)
|
|
85
|
+
|
|
86
|
+
# Specialized methods
|
|
87
|
+
async def wait_spa(self) -> bool:
|
|
88
|
+
"""Single Page Application wait - networkidle 15s"""
|
|
89
|
+
if not self._page:
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
self.logger_bridge.log_info("⚛️ SPA wait (networkidle 15s)")
|
|
93
|
+
return await self._wait_for_state("networkidle", 15000)
|
|
94
|
+
|
|
95
|
+
# Custom methods
|
|
96
|
+
async def wait_custom(self, wait_type: str = "networkidle", timeout: int = 10000) -> bool:
|
|
97
|
+
"""Custom wait with specified parameters"""
|
|
98
|
+
if not self._page:
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
self.logger_bridge.log_info(f"🔧 Custom wait ({wait_type} {timeout}ms)")
|
|
102
|
+
return await self._wait_for_state(wait_type, timeout)
|
|
103
|
+
|
|
104
|
+
async def wait_with_fallback(self,
|
|
105
|
+
primary_type: str = "networkidle",
|
|
106
|
+
primary_timeout: int = 10000,
|
|
107
|
+
fallback_type: str = "domcontentloaded",
|
|
108
|
+
fallback_timeout: int = 5000) -> bool:
|
|
109
|
+
"""Wait with custom fallback strategy"""
|
|
110
|
+
if not self._page:
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
self.logger_bridge.log_info(f"🔄 Fallback wait ({primary_type} {primary_timeout}ms → {fallback_type} {fallback_timeout}ms)")
|
|
114
|
+
|
|
115
|
+
# Try primary first
|
|
116
|
+
if await self._wait_for_state(primary_type, primary_timeout):
|
|
117
|
+
return True
|
|
118
|
+
|
|
119
|
+
# Fallback
|
|
120
|
+
self.logger_bridge.log_info(f"⏳ {primary_type} timeout, trying {fallback_type}...")
|
|
121
|
+
return await self._wait_for_state(fallback_type, fallback_timeout)
|
|
122
|
+
|
|
123
|
+
# Helper methods
|
|
124
|
+
async def _wait_for_state(self, wait_type: str, timeout: int) -> bool:
|
|
125
|
+
"""Internal method to wait for specific state"""
|
|
126
|
+
if not self._page:
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
if wait_type == "networkidle":
|
|
131
|
+
await self._page.wait_for_load_state("networkidle", timeout=timeout)
|
|
132
|
+
elif wait_type == "domcontentloaded":
|
|
133
|
+
await self._page.wait_for_load_state("domcontentloaded", timeout=timeout)
|
|
134
|
+
elif wait_type == "load":
|
|
135
|
+
await self._page.wait_for_load_state("load", timeout=timeout)
|
|
136
|
+
else:
|
|
137
|
+
# Default to networkidle
|
|
138
|
+
await self._page.wait_for_load_state("networkidle", timeout=timeout)
|
|
139
|
+
|
|
140
|
+
self.logger_bridge.log_info(f"✅ Page ready ({wait_type})")
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
self.logger_bridge.log_warning(f"⚠️ Page ready timeout ({wait_type}): {e}")
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
# Utility methods
|
|
148
|
+
async def wait_for_selector(self, selector: str, timeout: int = 10000) -> bool:
|
|
149
|
+
"""Wait for specific selector to appear"""
|
|
150
|
+
if not self._page:
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
self.logger_bridge.log_info(f"🎯 Waiting for selector: {selector}")
|
|
155
|
+
await self._page.wait_for_selector(selector, timeout=timeout)
|
|
156
|
+
self.logger_bridge.log_info(f"✅ Selector found: {selector}")
|
|
157
|
+
return True
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.logger_bridge.log_warning(f"⚠️ Selector timeout: {selector} - {e}")
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
async def wait_for_text(self, text: str, timeout: int = 10000) -> bool:
|
|
163
|
+
"""Wait for specific text to appear on page"""
|
|
164
|
+
if not self._page:
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
self.logger_bridge.log_info(f"📝 Waiting for text: {text}")
|
|
169
|
+
await self._page.wait_for_function(
|
|
170
|
+
f"document.body.innerText.includes('{text}')",
|
|
171
|
+
timeout=timeout
|
|
172
|
+
)
|
|
173
|
+
self.logger_bridge.log_info(f"✅ Text found: {text}")
|
|
174
|
+
return True
|
|
175
|
+
except Exception as e:
|
|
176
|
+
self.logger_bridge.log_warning(f"⚠️ Text timeout: {text} - {e}")
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
async def wait_for_url_change(self, timeout: int = 10000) -> bool:
|
|
180
|
+
"""Wait for URL to change"""
|
|
181
|
+
if not self._page:
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
current_url = self._page.url
|
|
186
|
+
self.logger_bridge.log_info(f"🔗 Waiting for URL change from: {current_url}")
|
|
187
|
+
|
|
188
|
+
await self._page.wait_for_function(
|
|
189
|
+
f"window.location.href !== '{current_url}'",
|
|
190
|
+
timeout=timeout
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
new_url = self._page.url
|
|
194
|
+
self.logger_bridge.log_info(f"✅ URL changed to: {new_url}")
|
|
195
|
+
return True
|
|
196
|
+
except Exception as e:
|
|
197
|
+
self.logger_bridge.log_warning(f"⚠️ URL change timeout: {e}")
|
|
198
|
+
return False
|
|
File without changes
|
|
@@ -14,9 +14,12 @@ Key Features:
|
|
|
14
14
|
- 🛡️ Type Safety: Full Pydantic v2 compliance
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
try:
|
|
18
|
+
from importlib.metadata import version
|
|
18
19
|
|
|
19
|
-
__version__ =
|
|
20
|
+
__version__ = version("unrealon-driver")
|
|
21
|
+
except Exception:
|
|
22
|
+
__version__ = "1.0.0-dev"
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
from .parser import (
|
|
@@ -33,8 +36,6 @@ from .parser import (
|
|
|
33
36
|
ErrorManager,
|
|
34
37
|
RetryConfig,
|
|
35
38
|
ErrorInfo,
|
|
36
|
-
|
|
37
|
-
|
|
38
39
|
)
|
|
39
40
|
from .exceptions import ParserError, BrowserError
|
|
40
41
|
|
|
@@ -55,8 +56,6 @@ __all__ = [
|
|
|
55
56
|
"ErrorManager",
|
|
56
57
|
"RetryConfig",
|
|
57
58
|
"ErrorInfo",
|
|
58
|
-
|
|
59
|
-
|
|
60
59
|
# Exceptions
|
|
61
60
|
"ParserError",
|
|
62
61
|
"BrowserError",
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
UnrealOn - Main Package
|
|
3
|
-
Enterprise browser automation framework with WebSocket bridge for distributed web scraping.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from importlib.metadata import version
|
|
7
|
-
from pydantic import BaseModel, Field, ConfigDict
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
try:
|
|
11
|
-
__version__ = version("unrealon")
|
|
12
|
-
except Exception:
|
|
13
|
-
__version__ = "1.1.5"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class VersionInfo(BaseModel):
|
|
17
|
-
"""Version information model."""
|
|
18
|
-
|
|
19
|
-
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
20
|
-
|
|
21
|
-
version: str = Field(default=__version__)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
# Re-export main modules
|
|
25
|
-
import unrealon_driver
|
|
26
|
-
import unrealon_server
|
|
27
|
-
import unrealon_browser
|
|
28
|
-
|
|
29
|
-
# Re-export all from submodules
|
|
30
|
-
from unrealon_driver import *
|
|
31
|
-
from unrealon_server import *
|
|
32
|
-
from unrealon_browser import *
|
|
33
|
-
|
|
34
|
-
__all__ = [
|
|
35
|
-
"VersionInfo",
|
|
36
|
-
# Re-export all from submodules
|
|
37
|
-
*getattr(unrealon_driver, "__all__", []),
|
|
38
|
-
*getattr(unrealon_server, "__all__", []),
|
|
39
|
-
*getattr(unrealon_browser, "__all__", []),
|
|
40
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|