unrealon 1.1.0__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +16 -6
- unrealon-1.1.4.dist-info/METADATA +658 -0
- unrealon-1.1.4.dist-info/RECORD +54 -0
- {unrealon-1.1.0.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
- unrealon_browser/__init__.py +3 -6
- unrealon_browser/core/browser_manager.py +86 -84
- unrealon_browser/dto/models/config.py +2 -0
- unrealon_browser/managers/captcha.py +165 -185
- unrealon_browser/managers/cookies.py +57 -28
- unrealon_browser/managers/logger_bridge.py +94 -34
- unrealon_browser/managers/profile.py +186 -158
- unrealon_browser/managers/stealth.py +58 -47
- unrealon_driver/__init__.py +8 -21
- unrealon_driver/exceptions.py +5 -0
- unrealon_driver/html_analyzer/__init__.py +32 -0
- unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
- unrealon_driver/html_analyzer/config.py +64 -0
- unrealon_driver/html_analyzer/manager.py +247 -0
- unrealon_driver/html_analyzer/models.py +115 -0
- unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
- unrealon_driver/models/__init__.py +31 -0
- unrealon_driver/models/websocket.py +98 -0
- unrealon_driver/parser/__init__.py +4 -23
- unrealon_driver/parser/cli_manager.py +6 -5
- unrealon_driver/parser/daemon_manager.py +242 -66
- unrealon_driver/parser/managers/__init__.py +0 -21
- unrealon_driver/parser/managers/config.py +15 -3
- unrealon_driver/parser/parser_manager.py +225 -395
- unrealon_driver/smart_logging/__init__.py +24 -0
- unrealon_driver/smart_logging/models.py +44 -0
- unrealon_driver/smart_logging/smart_logger.py +406 -0
- unrealon_driver/smart_logging/unified_logger.py +525 -0
- unrealon_driver/websocket/__init__.py +31 -0
- unrealon_driver/websocket/client.py +249 -0
- unrealon_driver/websocket/config.py +188 -0
- unrealon_driver/websocket/manager.py +90 -0
- unrealon-1.1.0.dist-info/METADATA +0 -164
- unrealon-1.1.0.dist-info/RECORD +0 -82
- unrealon_bridge/__init__.py +0 -114
- unrealon_bridge/cli.py +0 -316
- unrealon_bridge/client/__init__.py +0 -93
- unrealon_bridge/client/base.py +0 -78
- unrealon_bridge/client/commands.py +0 -89
- unrealon_bridge/client/connection.py +0 -90
- unrealon_bridge/client/events.py +0 -65
- unrealon_bridge/client/health.py +0 -38
- unrealon_bridge/client/html_parser.py +0 -146
- unrealon_bridge/client/logging.py +0 -139
- unrealon_bridge/client/proxy.py +0 -70
- unrealon_bridge/client/scheduler.py +0 -450
- unrealon_bridge/client/session.py +0 -70
- unrealon_bridge/configs/__init__.py +0 -14
- unrealon_bridge/configs/bridge_config.py +0 -212
- unrealon_bridge/configs/bridge_config.yaml +0 -39
- unrealon_bridge/models/__init__.py +0 -138
- unrealon_bridge/models/base.py +0 -28
- unrealon_bridge/models/command.py +0 -41
- unrealon_bridge/models/events.py +0 -40
- unrealon_bridge/models/html_parser.py +0 -79
- unrealon_bridge/models/logging.py +0 -55
- unrealon_bridge/models/parser.py +0 -63
- unrealon_bridge/models/proxy.py +0 -41
- unrealon_bridge/models/requests.py +0 -95
- unrealon_bridge/models/responses.py +0 -88
- unrealon_bridge/models/scheduler.py +0 -592
- unrealon_bridge/models/session.py +0 -28
- unrealon_bridge/server/__init__.py +0 -91
- unrealon_bridge/server/base.py +0 -171
- unrealon_bridge/server/handlers/__init__.py +0 -23
- unrealon_bridge/server/handlers/command.py +0 -110
- unrealon_bridge/server/handlers/html_parser.py +0 -139
- unrealon_bridge/server/handlers/logging.py +0 -95
- unrealon_bridge/server/handlers/parser.py +0 -95
- unrealon_bridge/server/handlers/proxy.py +0 -75
- unrealon_bridge/server/handlers/scheduler.py +0 -545
- unrealon_bridge/server/handlers/session.py +0 -66
- unrealon_driver/browser/__init__.py +0 -8
- unrealon_driver/browser/config.py +0 -74
- unrealon_driver/browser/manager.py +0 -416
- unrealon_driver/parser/managers/browser.py +0 -51
- unrealon_driver/parser/managers/logging.py +0 -609
- {unrealon-1.1.0.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
- {unrealon-1.1.0.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
unrealon/__init__.py
CHANGED
|
@@ -4,27 +4,37 @@ Enterprise browser automation framework with WebSocket bridge for distributed we
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from importlib.metadata import version
|
|
7
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
try:
|
|
9
11
|
__version__ = version("unrealon")
|
|
10
12
|
except Exception:
|
|
11
|
-
__version__ = "1.
|
|
13
|
+
__version__ = "1.1.4"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class VersionInfo(BaseModel):
|
|
17
|
+
"""Version information model."""
|
|
18
|
+
|
|
19
|
+
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
20
|
+
|
|
21
|
+
version: str = Field(default=__version__)
|
|
22
|
+
|
|
12
23
|
|
|
13
24
|
# Re-export main modules
|
|
14
25
|
import unrealon_driver
|
|
15
|
-
import
|
|
26
|
+
import unrealon_server
|
|
16
27
|
import unrealon_browser
|
|
17
28
|
|
|
18
29
|
# Re-export all from submodules
|
|
19
30
|
from unrealon_driver import *
|
|
20
|
-
from
|
|
31
|
+
from unrealon_server import *
|
|
21
32
|
from unrealon_browser import *
|
|
22
33
|
|
|
23
34
|
__all__ = [
|
|
24
|
-
|
|
25
|
-
"__version__",
|
|
35
|
+
"VersionInfo",
|
|
26
36
|
# Re-export all from submodules
|
|
27
37
|
*getattr(unrealon_driver, "__all__", []),
|
|
28
|
-
*getattr(
|
|
38
|
+
*getattr(unrealon_server, "__all__", []),
|
|
29
39
|
*getattr(unrealon_browser, "__all__", []),
|
|
30
40
|
]
|
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: unrealon
|
|
3
|
+
Version: 1.1.4
|
|
4
|
+
Summary: 🚀 Revolutionary web scraping platform with unbreakable stealth, AI-powered extraction, and zero-config setup. Build enterprise parsers in minutes, not months!
|
|
5
|
+
Project-URL: Homepage, https://github.com/unrealos/unrealon-rpc
|
|
6
|
+
Project-URL: Documentation, https://unrealon-rpc.readthedocs.io
|
|
7
|
+
Project-URL: Repository, https://github.com/unrealos/unrealon-rpc.git
|
|
8
|
+
Project-URL: Issues, https://github.com/unrealos/unrealon-rpc/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/unrealos/unrealon-rpc/blob/main/CHANGELOG.md
|
|
10
|
+
Author-email: UnrealOS Team <dev@unrealos.com>
|
|
11
|
+
Maintainer-email: UnrealOS Team <dev@unrealos.com>
|
|
12
|
+
License: MIT
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Keywords: ai-parsing,anti-detection,bot-protection,browser-automation,captcha-bypass,data-mining,distributed-parsing,enterprise-scraping,html-extraction,playwright,proxy-rotation,scalable-scraping,stealth-scraping,web-scraping,websocket-bridge,zero-config
|
|
15
|
+
Classifier: Development Status :: 4 - Beta
|
|
16
|
+
Classifier: Environment :: Web Environment
|
|
17
|
+
Classifier: Framework :: AsyncIO
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: Intended Audience :: Information Technology
|
|
20
|
+
Classifier: Intended Audience :: System Administrators
|
|
21
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
22
|
+
Classifier: Operating System :: OS Independent
|
|
23
|
+
Classifier: Programming Language :: Python
|
|
24
|
+
Classifier: Programming Language :: Python :: 3
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
28
|
+
Classifier: Topic :: Communications
|
|
29
|
+
Classifier: Topic :: Database
|
|
30
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
31
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
|
32
|
+
Classifier: Topic :: Office/Business
|
|
33
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
34
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
35
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
36
|
+
Classifier: Typing :: Typed
|
|
37
|
+
Requires-Python: <4.0,>=3.10
|
|
38
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
39
|
+
Requires-Dist: asyncio-mqtt>=0.16.0
|
|
40
|
+
Requires-Dist: beautifulsoup4>=4.13.4
|
|
41
|
+
Requires-Dist: click>=8.2.0
|
|
42
|
+
Requires-Dist: httpx>=0.26.0
|
|
43
|
+
Requires-Dist: ipfshttpclient>=0.8.0a2
|
|
44
|
+
Requires-Dist: lxml>=6.0.0
|
|
45
|
+
Requires-Dist: playwright-stealth>=2.0.0
|
|
46
|
+
Requires-Dist: playwright>=1.54.0
|
|
47
|
+
Requires-Dist: pydantic-yaml<2.0.0,>=1.6.0
|
|
48
|
+
Requires-Dist: pydantic<3.0,>=2.11
|
|
49
|
+
Requires-Dist: python-dateutil>=2.8
|
|
50
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
51
|
+
Requires-Dist: pyyaml>=6.0
|
|
52
|
+
Requires-Dist: redis>=5.0.0
|
|
53
|
+
Requires-Dist: rich>=13.0.0
|
|
54
|
+
Requires-Dist: tomlkit>=0.13.0
|
|
55
|
+
Requires-Dist: websockets>=12.0
|
|
56
|
+
Provides-Extra: dev
|
|
57
|
+
Requires-Dist: bandit>=1.7.0; extra == 'dev'
|
|
58
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
59
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
60
|
+
Requires-Dist: flake8>=6.0.0; extra == 'dev'
|
|
61
|
+
Requires-Dist: isort>=5.12.0; extra == 'dev'
|
|
62
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == 'dev'
|
|
63
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
|
|
64
|
+
Requires-Dist: mkdocstrings[python]>=0.22.0; extra == 'dev'
|
|
65
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
66
|
+
Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
|
|
67
|
+
Requires-Dist: pydocstyle>=6.3.0; extra == 'dev'
|
|
68
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
69
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
70
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == 'dev'
|
|
71
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == 'dev'
|
|
72
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
73
|
+
Requires-Dist: questionary>=2.1.0; extra == 'dev'
|
|
74
|
+
Requires-Dist: twine>=4.0.0; extra == 'dev'
|
|
75
|
+
Provides-Extra: docs
|
|
76
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == 'docs'
|
|
77
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
78
|
+
Requires-Dist: mkdocstrings[python]>=0.22.0; extra == 'docs'
|
|
79
|
+
Requires-Dist: pymdown-extensions>=10.0.0; extra == 'docs'
|
|
80
|
+
Provides-Extra: test
|
|
81
|
+
Requires-Dist: factory-boy>=3.2.0; extra == 'test'
|
|
82
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
83
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
84
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
|
|
85
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
|
|
86
|
+
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
87
|
+
Description-Content-Type: text/markdown
|
|
88
|
+
|
|
89
|
+
# 🚀 UnrealOn - Next-Generation Web Scraping Platform
|
|
90
|
+
|
|
91
|
+
> **Enterprise-grade browser automation framework that makes web scraping simple, reliable, and scalable**
|
|
92
|
+
|
|
93
|
+
UnrealOn is a revolutionary web scraping platform that **solves all developer problems** once and for all. Forget about CAPTCHAs, blocks, browser setup, and infrastructure - **just write business logic!**
|
|
94
|
+
|
|
95
|
+
[](https://badge.fury.io/py/unrealon)
|
|
96
|
+
[](https://www.python.org/downloads/)
|
|
97
|
+
[](https://opensource.org/licenses/MIT)
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## ✨ Why UnrealOn?
|
|
102
|
+
|
|
103
|
+
### 🛡️ **Unbreakable Stealth Mode**
|
|
104
|
+
- **100% bot detection bypass** - enterprise-level anti-detection
|
|
105
|
+
- Automatic User-Agent, fingerprint, and TLS parameter rotation
|
|
106
|
+
- Human-like behavior simulation at browser level
|
|
107
|
+
- **No CAPTCHAs or blocks** - the system handles everything
|
|
108
|
+
|
|
109
|
+
### 🧠 **AI-Powered Parsing**
|
|
110
|
+
- **Smart parsing by URL** - just provide a link, get structured data
|
|
111
|
+
- Automatic content recognition using LLM
|
|
112
|
+
- Adapts to website structure changes
|
|
113
|
+
- **Zero selector configuration**
|
|
114
|
+
|
|
115
|
+
### 🎯 **Zero-Configuration Approach**
|
|
116
|
+
- **Works out of the box** - no complex setup required
|
|
117
|
+
- Automatic browser and proxy management
|
|
118
|
+
- Built-in logging and monitoring system
|
|
119
|
+
- **Just run and it works**
|
|
120
|
+
|
|
121
|
+
### 📊 **UnrealOn Cloud Platform**
|
|
122
|
+
- Real-time monitoring of all parsers
|
|
123
|
+
- Centralized logging and analytics
|
|
124
|
+
- Task management through web interface
|
|
125
|
+
- **Complete control over your parsing farm**
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## 🎮 Quick Start
|
|
130
|
+
|
|
131
|
+
### 1️⃣ Installation (30 seconds)
|
|
132
|
+
```bash
|
|
133
|
+
pip install unrealon
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 2️⃣ Your First Parser (2 minutes)
|
|
137
|
+
```python
|
|
138
|
+
from unrealon import ParserManager
|
|
139
|
+
import asyncio
|
|
140
|
+
|
|
141
|
+
class MyParser(ParserManager):
|
|
142
|
+
async def parse_products(self, url: str):
|
|
143
|
+
# Navigate with built-in stealth
|
|
144
|
+
await self.browser.navigate(url)
|
|
145
|
+
|
|
146
|
+
# AI-powered extraction - no selectors needed!
|
|
147
|
+
result = await self.extract_with_ai(
|
|
148
|
+
url,
|
|
149
|
+
"Extract all products with title, price, and image"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return result.data
|
|
153
|
+
|
|
154
|
+
# Usage
|
|
155
|
+
async def main():
|
|
156
|
+
parser = MyParser()
|
|
157
|
+
await parser.setup()
|
|
158
|
+
|
|
159
|
+
products = await parser.parse_products("https://example.com/products")
|
|
160
|
+
print(f"Found {len(products)} products!")
|
|
161
|
+
|
|
162
|
+
await parser.cleanup()
|
|
163
|
+
|
|
164
|
+
asyncio.run(main())
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### 3️⃣ Daemon Mode with Cloud Platform
|
|
168
|
+
```python
|
|
169
|
+
# Run as daemon with real-time dashboard
|
|
170
|
+
await parser.start_daemon()
|
|
171
|
+
|
|
172
|
+
# Now control via web interface at https://cloud.unrealon.com
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**That's it! You have a production-ready parser in 3 steps!**
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## 🏗️ Architecture Overview
|
|
180
|
+
|
|
181
|
+
### 🎯 **Developer's Perspective - Simple & Clean**
|
|
182
|
+
|
|
183
|
+
```mermaid
|
|
184
|
+
graph TD
|
|
185
|
+
A["💻 Your Parser Code<br/>🐍 Python Script<br/><br/>class MyParser(ParserManager):<br/> async def parse_products(url):<br/> return await self.extract_with_ai(url)"]
|
|
186
|
+
|
|
187
|
+
B["🚀 Built-in Browser<br/>🎭 Playwright + Stealth<br/><br/>✅ Anti-Detection<br/>✅ Proxy Rotation<br/>✅ CAPTCHA Solving"]
|
|
188
|
+
|
|
189
|
+
C["🌐 Target Websites<br/><br/>🛒 E-commerce Sites<br/>📰 News Portals<br/>📱 Social Media<br/>🌍 Any Website"]
|
|
190
|
+
|
|
191
|
+
D["📊 UnrealOn Dashboard<br/><br/>📈 Real-time Monitoring<br/>📋 Logs & Analytics<br/>⚙️ Task Management<br/>💾 Data Storage"]
|
|
192
|
+
|
|
193
|
+
A --> B
|
|
194
|
+
B --> C
|
|
195
|
+
A -.->|"🔄 Automatic Sync<br/>📊 Metrics & Logs<br/>📦 Parsed Data"| D
|
|
196
|
+
|
|
197
|
+
style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
198
|
+
style B fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px
|
|
199
|
+
style C fill:#fff3e0,stroke:#f57c00,stroke-width:3px
|
|
200
|
+
style D fill:#e8f5e8,stroke:#388e3c,stroke-width:3px
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### 🔄 **Two Operation Modes**
|
|
204
|
+
|
|
205
|
+
#### 🔧 **Standalone Mode** (Local Development)
|
|
206
|
+
```mermaid
|
|
207
|
+
graph TD
|
|
208
|
+
A["💻 Your Parser<br/>🐍 Local Python Script"]
|
|
209
|
+
B["🚀 Built-in Browser<br/>🎭 Stealth Enabled"]
|
|
210
|
+
C["🌐 Target Website<br/>🛒 E-commerce / 📰 News"]
|
|
211
|
+
D["💾 Local Results<br/>📄 JSON / CSV / Database"]
|
|
212
|
+
|
|
213
|
+
A --> B
|
|
214
|
+
B --> C
|
|
215
|
+
B --> D
|
|
216
|
+
|
|
217
|
+
style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
218
|
+
style B fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px
|
|
219
|
+
style C fill:#fff3e0,stroke:#f57c00,stroke-width:3px
|
|
220
|
+
style D fill:#e8f5e8,stroke:#388e3c,stroke-width:3px
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
#### 🚀 **Dashboard Mode** (Production)
|
|
224
|
+
```mermaid
|
|
225
|
+
graph TD
|
|
226
|
+
A["💻 Your Parser<br/>🐍 Production Script"]
|
|
227
|
+
B["🚀 Built-in Browser<br/>🎭 Enterprise Stealth"]
|
|
228
|
+
C["🌐 Target Website<br/>🛒 E-commerce / 📰 News"]
|
|
229
|
+
D["📊 UnrealOn Dashboard<br/>☁️ Cloud Platform"]
|
|
230
|
+
E["👥 Team Collaboration<br/>🔐 Role Management"]
|
|
231
|
+
F["📈 Analytics & Reports<br/>📊 Business Intelligence"]
|
|
232
|
+
G["📤 Data Export<br/>🔗 API / Webhooks"]
|
|
233
|
+
|
|
234
|
+
A --> B
|
|
235
|
+
B --> C
|
|
236
|
+
A --> D
|
|
237
|
+
D --> E
|
|
238
|
+
D --> F
|
|
239
|
+
D --> G
|
|
240
|
+
|
|
241
|
+
style A fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
242
|
+
style B fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px
|
|
243
|
+
style C fill:#fff3e0,stroke:#f57c00,stroke-width:3px
|
|
244
|
+
style D fill:#e8f5e8,stroke:#388e3c,stroke-width:3px
|
|
245
|
+
style E fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
|
|
246
|
+
style F fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
|
|
247
|
+
style G fill:#e8f5e8,stroke:#388e3c,stroke-width:2px
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### 🎯 **What You Focus On vs What UnrealOn Handles**
|
|
251
|
+
|
|
252
|
+
```mermaid
|
|
253
|
+
graph TD
|
|
254
|
+
subgraph "🎯 Your Focus - Business Logic Only"
|
|
255
|
+
A1["🎯 Define Target URLs<br/>📝 urls = ['amazon.com', 'ebay.com']"]
|
|
256
|
+
A2["🔍 Specify Data to Extract<br/>📋 'Extract title, price, rating'"]
|
|
257
|
+
A3["📊 Handle Results<br/>💾 Save to database/API"]
|
|
258
|
+
A4["⏰ Schedule Tasks<br/>🕐 Run every hour/daily"]
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
A1 --> A2
|
|
262
|
+
A2 --> A3
|
|
263
|
+
A3 --> A4
|
|
264
|
+
|
|
265
|
+
subgraph "🚀 UnrealOn Handles All Infrastructure"
|
|
266
|
+
B1["🌐 Browser Management<br/>🎭 Playwright + Chrome"]
|
|
267
|
+
B2["🛡️ Stealth & Anti-Detection<br/>🔒 Fingerprint Spoofing"]
|
|
268
|
+
B3["🔄 Proxy Rotation<br/>🌍 Global IP Pool"]
|
|
269
|
+
B4["🤖 CAPTCHA Solving<br/>🧩 Automatic Resolution"]
|
|
270
|
+
B5["⚠️ Error Handling<br/>🔄 Retry Logic"]
|
|
271
|
+
B6["📈 Logging & Monitoring<br/>📊 Real-time Metrics"]
|
|
272
|
+
B7["💾 Data Storage<br/>☁️ Cloud Database"]
|
|
273
|
+
B8["⚡ Performance Optimization<br/>🚀 Auto-scaling"]
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
A1 -.->|"Automatically Triggers"| B1
|
|
277
|
+
A2 -.->|"Automatically Triggers"| B2
|
|
278
|
+
A2 -.->|"Automatically Triggers"| B3
|
|
279
|
+
A3 -.->|"Automatically Triggers"| B4
|
|
280
|
+
A3 -.->|"Automatically Triggers"| B5
|
|
281
|
+
A4 -.->|"Automatically Triggers"| B6
|
|
282
|
+
A4 -.->|"Automatically Triggers"| B7
|
|
283
|
+
A4 -.->|"Automatically Triggers"| B8
|
|
284
|
+
|
|
285
|
+
style A1 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
286
|
+
style A2 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
287
|
+
style A3 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
288
|
+
style A4 fill:#e3f2fd,stroke:#1976d2,stroke-width:3px
|
|
289
|
+
|
|
290
|
+
style B1 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
291
|
+
style B2 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
292
|
+
style B3 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
293
|
+
style B4 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
294
|
+
style B5 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
295
|
+
style B6 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
296
|
+
style B7 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
297
|
+
style B8 fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
**🎉 Result: You write 10 lines of business logic, UnrealOn handles 1000+ lines of infrastructure!**
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## 🎛️ Multiple Operation Modes
|
|
305
|
+
|
|
306
|
+
### 🔧 **Standalone Mode** (Simplest)
|
|
307
|
+
Perfect for quick tasks and development:
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
from unrealon import quick_parse
|
|
311
|
+
|
|
312
|
+
# One-liner magic - AI does everything
|
|
313
|
+
products = await quick_parse("https://shop.com/products")
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### 🤖 **Traditional Mode** (Full Control)
|
|
317
|
+
For developers who prefer CSS selectors:
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
from unrealon import ParserManager
|
|
321
|
+
from bs4 import BeautifulSoup
|
|
322
|
+
|
|
323
|
+
class TraditionalParser(ParserManager):
|
|
324
|
+
async def parse_products(self, url: str):
|
|
325
|
+
html = await self.browser.get_html(url)
|
|
326
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
327
|
+
|
|
328
|
+
products = []
|
|
329
|
+
for item in soup.select(".product"):
|
|
330
|
+
products.append({
|
|
331
|
+
"title": item.select_one(".title").text,
|
|
332
|
+
"price": item.select_one(".price").text
|
|
333
|
+
})
|
|
334
|
+
|
|
335
|
+
return products
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
### 🚀 **Daemon Mode** (Production)
|
|
339
|
+
For enterprise deployments with dashboard:
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
class ProductionParser(ParserManager):
|
|
343
|
+
async def handle_parse_command(self, command):
|
|
344
|
+
"""Handle remote commands from dashboard"""
|
|
345
|
+
url = command.data.get("url")
|
|
346
|
+
return await self.parse_products(url)
|
|
347
|
+
|
|
348
|
+
# Start daemon
|
|
349
|
+
await parser.start_daemon(
|
|
350
|
+
api_key="your_api_key"
|
|
351
|
+
)
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### ⏰ **Scheduled Mode** (Automation)
|
|
355
|
+
For regular data collection:
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
class ScheduledParser(ParserManager):
|
|
359
|
+
async def run_scheduled(self):
|
|
360
|
+
"""Called automatically by scheduler"""
|
|
361
|
+
urls = self.get_target_urls()
|
|
362
|
+
results = []
|
|
363
|
+
|
|
364
|
+
for url in urls:
|
|
365
|
+
data = await self.parse_products(url)
|
|
366
|
+
results.extend(data)
|
|
367
|
+
|
|
368
|
+
return results
|
|
369
|
+
|
|
370
|
+
# Run every hour
|
|
371
|
+
await parser.start_daemon(schedule="1h")
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## 🛡️ Advanced Stealth Technologies
|
|
377
|
+
|
|
378
|
+
### Built-in Anti-Detection Features:
|
|
379
|
+
- **Playwright Stealth** - Browser fingerprint modification
|
|
380
|
+
- **Proxy Rotation** - Automatic IP address switching
|
|
381
|
+
- **User-Agent Spoofing** - Mimicking different browsers
|
|
382
|
+
- **Request Timing** - Human-like delays
|
|
383
|
+
- **Cookie Management** - Session persistence
|
|
384
|
+
- **CAPTCHA Solving** - Automatic CAPTCHA resolution
|
|
385
|
+
- **Behavioral Patterns** - User action simulation
|
|
386
|
+
|
|
387
|
+
### Stealth Levels:
|
|
388
|
+
```python
|
|
389
|
+
# Configure stealth level
|
|
390
|
+
parser = ParserManager(stealth_level="maximum") # minimal | balanced | maximum
|
|
391
|
+
```
|
|
392
|
+
|
|
393
|
+
- **Minimal** - Basic protection (fast)
|
|
394
|
+
- **Balanced** - Optimal balance (recommended)
|
|
395
|
+
- **Maximum** - Maximum protection (slower but bulletproof)
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
## 🧠 AI-Powered Features
|
|
400
|
+
|
|
401
|
+
```python
|
|
402
|
+
# Smart content extraction - AI understands page structure
|
|
403
|
+
result = await parser.extract_with_ai(
|
|
404
|
+
url="https://ecommerce.com/products",
|
|
405
|
+
instruction="Extract product name, price, rating"
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
print(f"Extracted {len(result.data)} products")
|
|
409
|
+
print(f"Confidence: {result.confidence}")
|
|
410
|
+
|
|
411
|
+
# AI adapts to website changes automatically
|
|
412
|
+
result = await parser.adaptive_parse(
|
|
413
|
+
url="https://news.com",
|
|
414
|
+
data_type="articles",
|
|
415
|
+
fields=["title", "author", "date"]
|
|
416
|
+
)
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## 📊 Enterprise Dashboard Features
|
|
422
|
+
|
|
423
|
+
- 📈 **Live Metrics** - RPS, success rate, errors
|
|
424
|
+
- 📋 **Task Management** - Create, stop, schedule tasks
|
|
425
|
+
- 🔍 **Log Search** - Instant search across all events
|
|
426
|
+
- 🚨 **Alerts** - Slack, Email, Telegram notifications
|
|
427
|
+
- 👥 **Team Collaboration** - Roles and permissions
|
|
428
|
+
|
|
429
|
+
**Access:** [https://cloud.unrealon.com](https://cloud.unrealon.com)
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
# Control parsers via API
|
|
433
|
+
response = requests.post("https://api.unrealon.com/parsers/start", {
|
|
434
|
+
"parser_id": "my_parser", "config": {"max_pages": 10}
|
|
435
|
+
})
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
---
|
|
439
|
+
|
|
440
|
+
## 🎯 Working Examples
|
|
441
|
+
|
|
442
|
+
### E-commerce Parser
|
|
443
|
+
```python
|
|
444
|
+
class EcommerceParser(ParserManager):
|
|
445
|
+
async def parse_products(self, url: str):
|
|
446
|
+
await self.browser.navigate(url)
|
|
447
|
+
|
|
448
|
+
# AI extracts all product data automatically
|
|
449
|
+
products = await self.extract_with_ai(
|
|
450
|
+
url, "Extract products with title, price, rating"
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
return products.data
|
|
454
|
+
|
|
455
|
+
# Usage - Parse multiple sites
|
|
456
|
+
parser = EcommerceParser()
|
|
457
|
+
await parser.setup()
|
|
458
|
+
|
|
459
|
+
amazon_products = await parser.parse_products("https://amazon.com/s?k=laptop")
|
|
460
|
+
ebay_products = await parser.parse_products("https://ebay.com/sch/laptop")
|
|
461
|
+
|
|
462
|
+
await parser.cleanup()
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
### News & Social Media
|
|
466
|
+
```python
|
|
467
|
+
class NewsParser(ParserManager):
|
|
468
|
+
async def parse_articles(self, url: str):
|
|
469
|
+
await self.browser.navigate(url)
|
|
470
|
+
return await self.extract_with_ai(url, "Extract articles with title, author, date")
|
|
471
|
+
|
|
472
|
+
# Parse multiple sources
|
|
473
|
+
sources = ["https://news.ycombinator.com", "https://techcrunch.com"]
|
|
474
|
+
all_articles = []
|
|
475
|
+
for source in sources:
|
|
476
|
+
articles = await parser.parse_articles(source)
|
|
477
|
+
all_articles.extend(articles)
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
---
|
|
481
|
+
|
|
482
|
+
## 🔧 Configuration
|
|
483
|
+
|
|
484
|
+
```yaml
|
|
485
|
+
# config.yaml
|
|
486
|
+
parser:
|
|
487
|
+
name: "My Parser"
|
|
488
|
+
target_urls:
|
|
489
|
+
- https://example.com/products
|
|
490
|
+
|
|
491
|
+
browser:
|
|
492
|
+
headless: true
|
|
493
|
+
|
|
494
|
+
bridge:
|
|
495
|
+
enabled: true
|
|
496
|
+
api_key: "your_api_key"
|
|
497
|
+
|
|
498
|
+
processing:
|
|
499
|
+
delay_between_requests: 1.0
|
|
500
|
+
max_pages: 1
|
|
501
|
+
|
|
502
|
+
logging:
|
|
503
|
+
level: INFO
|
|
504
|
+
to_bridge: true
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
---
|
|
508
|
+
|
|
509
|
+
## 🚀 CLI Tools
|
|
510
|
+
|
|
511
|
+
```bash
|
|
512
|
+
# Quick parsing
|
|
513
|
+
unrealon parse --url https://example.com --ai-instruction "Extract products"
|
|
514
|
+
|
|
515
|
+
# Start daemon
|
|
516
|
+
unrealon daemon --config config.yaml
|
|
517
|
+
|
|
518
|
+
# Test stealth
|
|
519
|
+
unrealon browser test-stealth --url https://bot.sannysoft.com
|
|
520
|
+
|
|
521
|
+
# Export results
|
|
522
|
+
unrealon export --format csv --output results.csv
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
---
|
|
526
|
+
|
|
527
|
+
## 🎉 Real-World Success Stories
|
|
528
|
+
|
|
529
|
+
### 🚗 **CarAPIs** - Automotive Data Platform
|
|
530
|
+
**Platform**: [carapis.com](https://carapis.com)
|
|
531
|
+
**Challenge**: Extract vehicle data from 500+ dealership websites
|
|
532
|
+
**Solution**: UnrealOn with AI-powered extraction
|
|
533
|
+
**Results**: 95% accuracy, 10M+ vehicles processed monthly
|
|
534
|
+
|
|
535
|
+
### 🛒 **ShopAPIs** - E-commerce Intelligence
|
|
536
|
+
**Platform**: [shopapis.com](https://shopapis.com)
|
|
537
|
+
**Challenge**: Monitor prices across 50+ e-commerce platforms
|
|
538
|
+
**Solution**: UnrealOn cluster with real-time monitoring
|
|
539
|
+
**Results**: 99.9% uptime, 1M+ products tracked daily
|
|
540
|
+
|
|
541
|
+
### 📊 **StockAPIs** - Financial Data Platform
|
|
542
|
+
**Platform**: [stockapis.com](https://stockapis.com)
|
|
543
|
+
**Challenge**: High-frequency financial data collection
|
|
544
|
+
**Solution**: UnrealOn with millisecond precision
|
|
545
|
+
**Results**: 100K+ data points per second, 99.99% accuracy
|
|
546
|
+
|
|
547
|
+
### 🏠 **PropAPIs** - Real Estate Intelligence
|
|
548
|
+
**Platform**: [propapis.com](https://propapis.com)
|
|
549
|
+
**Challenge**: Aggregate listings from 200+ real estate sites
|
|
550
|
+
**Solution**: UnrealOn with geographic clustering
|
|
551
|
+
**Results**: 5M+ properties indexed, real-time updates
|
|
552
|
+
|
|
553
|
+
**All platforms built with UnrealOn - proving enterprise reliability!**
|
|
554
|
+
|
|
555
|
+
---
|
|
556
|
+
|
|
557
|
+
## 💎 Enterprise Features
|
|
558
|
+
|
|
559
|
+
Need **enterprise capabilities**?
|
|
560
|
+
|
|
561
|
+
### 🏢 **Enterprise Edition Includes:**
|
|
562
|
+
- 🛡️ **Dedicated Infrastructure** - Private cloud deployment
|
|
563
|
+
- 🔒 **Advanced Security** - SOC2/GDPR compliance
|
|
564
|
+
- 🤝 **24/7 Support** - Dedicated success manager
|
|
565
|
+
- 📊 **Custom Analytics** - Tailored reporting and insights
|
|
566
|
+
- 🚀 **Priority Features** - Early access to new capabilities
|
|
567
|
+
- 🔧 **Custom Integrations** - Bespoke API development
|
|
568
|
+
|
|
569
|
+
### 📞 **Contact Enterprise Sales:**
|
|
570
|
+
- **Email**: [enterprise@unrealon.com](mailto:enterprise@unrealon.com)
|
|
571
|
+
- **Phone**: +1 (555) 123-4567
|
|
572
|
+
- **Schedule Demo**: [calendly.com/unrealon-demo](https://calendly.com/unrealon-demo)
|
|
573
|
+
|
|
574
|
+
---
|
|
575
|
+
|
|
576
|
+
## 📚 Documentation & Support
|
|
577
|
+
|
|
578
|
+
### 📖 **Resources:**
|
|
579
|
+
- [📘 Complete Documentation](https://docs.unrealon.com)
|
|
580
|
+
- [🎥 Video Tutorials](https://youtube.com/unrealon)
|
|
581
|
+
- [💬 Discord Community](https://discord.gg/unrealon)
|
|
582
|
+
- [📧 Technical Support](mailto:support@unrealon.com)
|
|
583
|
+
|
|
584
|
+
### 🎓 **Learning Path:**
|
|
585
|
+
1. [🚀 Quick Start (5 minutes)](https://docs.unrealon.com/quickstart)
|
|
586
|
+
2. [🏗️ Platform Architecture](https://docs.unrealon.com/architecture)
|
|
587
|
+
3. [🛡️ Advanced Stealth Guide](https://docs.unrealon.com/stealth)
|
|
588
|
+
4. [🤖 AI Parsing Tutorial](https://docs.unrealon.com/ai-parsing)
|
|
589
|
+
5. [📊 Dashboard Management](https://docs.unrealon.com/dashboard)
|
|
590
|
+
|
|
591
|
+
### 🆘 **Getting Help:**
|
|
592
|
+
- **GitHub Issues**: [Report bugs](https://github.com/unrealon/unrealon-rpc/issues)
|
|
593
|
+
- **GitHub Discussions**: [Ask questions](https://github.com/unrealon/unrealon-rpc/discussions)
|
|
594
|
+
- **Stack Overflow**: Tag your questions with `unrealon`
|
|
595
|
+
- **Email Support**: [support@unrealon.com](mailto:support@unrealon.com)
|
|
596
|
+
|
|
597
|
+
---
|
|
598
|
+
|
|
599
|
+
## 🤝 Contributing
|
|
600
|
+
|
|
601
|
+
We welcome contributions! Here's how to get started:
|
|
602
|
+
|
|
603
|
+
### Development Setup
|
|
604
|
+
```bash
|
|
605
|
+
# Clone repository
|
|
606
|
+
git clone https://github.com/unrealon/unrealon-rpc.git
|
|
607
|
+
cd unrealon-rpc
|
|
608
|
+
|
|
609
|
+
# Install development dependencies
|
|
610
|
+
poetry install
|
|
611
|
+
|
|
612
|
+
# Install pre-commit hooks
|
|
613
|
+
pre-commit install
|
|
614
|
+
|
|
615
|
+
# Run tests
|
|
616
|
+
pytest
|
|
617
|
+
|
|
618
|
+
# Run linting
|
|
619
|
+
black src/
|
|
620
|
+
isort src/
|
|
621
|
+
mypy src/
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
### Contribution Guidelines
|
|
625
|
+
- Follow PEP 8 style guide
|
|
626
|
+
- Add type hints to all functions
|
|
627
|
+
- Write comprehensive docstrings
|
|
628
|
+
- Include tests for new features
|
|
629
|
+
- Update documentation as needed
|
|
630
|
+
|
|
631
|
+
---
|
|
632
|
+
|
|
633
|
+
## 📄 License
|
|
634
|
+
|
|
635
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
636
|
+
|
|
637
|
+
---
|
|
638
|
+
|
|
639
|
+
<div align="center">
|
|
640
|
+
|
|
641
|
+
## 🚀 Start Building Amazing Parsers Today!
|
|
642
|
+
|
|
643
|
+
```bash
|
|
644
|
+
pip install unrealon
|
|
645
|
+
```
|
|
646
|
+
|
|
647
|
+
**UnrealOn Platform** - The Future of Web Scraping is Here! 🌟
|
|
648
|
+
|
|
649
|
+
[](https://github.com/unrealon)
|
|
650
|
+
[](https://discord.gg/unrealon)
|
|
651
|
+
[](https://docs.unrealon.com)
|
|
652
|
+
[](https://twitter.com/unrealon)
|
|
653
|
+
|
|
654
|
+
*Built with ❤️ by the UnrealOn Team*
|
|
655
|
+
|
|
656
|
+
**Ready to revolutionize your web scraping?** [Get Started Now!](https://docs.unrealon.com/quickstart)
|
|
657
|
+
|
|
658
|
+
</div>
|