unrealon 1.1.1__tar.gz → 1.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.1 → unrealon-1.1.5}/MANIFEST.in +0 -3
- unrealon-1.1.5/PKG-INFO +621 -0
- unrealon-1.1.5/README.md +533 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/pyproject.toml +32 -18
- {unrealon-1.1.1 → unrealon-1.1.5}/requirements-dev.txt +6 -5
- {unrealon-1.1.1 → unrealon-1.1.5}/requirements-test.txt +6 -5
- {unrealon-1.1.1 → unrealon-1.1.5}/requirements.txt +6 -5
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon/__init__.py +16 -6
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/__init__.py +3 -6
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/core/browser_manager.py +86 -84
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/config.py +2 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/captcha.py +165 -185
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/cookies.py +57 -28
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/logger_bridge.py +94 -34
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/profile.py +186 -158
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/stealth.py +58 -47
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/__init__.py +8 -21
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/exceptions.py +5 -0
- unrealon-1.1.5/src/unrealon_driver/html_analyzer/__init__.py +32 -0
- unrealon-1.1.1/src/unrealon_driver/parser/managers/html.py → unrealon-1.1.5/src/unrealon_driver/html_analyzer/cleaner.py +330 -405
- unrealon-1.1.5/src/unrealon_driver/html_analyzer/config.py +64 -0
- unrealon-1.1.5/src/unrealon_driver/html_analyzer/manager.py +247 -0
- unrealon-1.1.5/src/unrealon_driver/html_analyzer/models.py +115 -0
- unrealon-1.1.5/src/unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
- unrealon-1.1.5/src/unrealon_driver/models/__init__.py +31 -0
- unrealon-1.1.5/src/unrealon_driver/models/websocket.py +98 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/__init__.py +4 -23
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/cli_manager.py +6 -5
- unrealon-1.1.5/src/unrealon_driver/parser/daemon_manager.py +403 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/managers/__init__.py +0 -21
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/managers/config.py +15 -3
- unrealon-1.1.5/src/unrealon_driver/parser/parser_manager.py +458 -0
- unrealon-1.1.5/src/unrealon_driver/smart_logging/__init__.py +24 -0
- unrealon-1.1.5/src/unrealon_driver/smart_logging/models.py +44 -0
- unrealon-1.1.5/src/unrealon_driver/smart_logging/smart_logger.py +406 -0
- unrealon-1.1.5/src/unrealon_driver/smart_logging/unified_logger.py +525 -0
- unrealon-1.1.5/src/unrealon_driver/websocket/__init__.py +31 -0
- unrealon-1.1.5/src/unrealon_driver/websocket/client.py +249 -0
- unrealon-1.1.5/src/unrealon_driver/websocket/config.py +188 -0
- unrealon-1.1.5/src/unrealon_driver/websocket/manager.py +90 -0
- unrealon-1.1.1/PKG-INFO +0 -722
- unrealon-1.1.1/README.md +0 -643
- unrealon-1.1.1/src/unrealon_bridge/__init__.py +0 -114
- unrealon-1.1.1/src/unrealon_bridge/cli.py +0 -316
- unrealon-1.1.1/src/unrealon_bridge/client/__init__.py +0 -93
- unrealon-1.1.1/src/unrealon_bridge/client/base.py +0 -78
- unrealon-1.1.1/src/unrealon_bridge/client/commands.py +0 -89
- unrealon-1.1.1/src/unrealon_bridge/client/connection.py +0 -90
- unrealon-1.1.1/src/unrealon_bridge/client/events.py +0 -65
- unrealon-1.1.1/src/unrealon_bridge/client/health.py +0 -38
- unrealon-1.1.1/src/unrealon_bridge/client/html_parser.py +0 -146
- unrealon-1.1.1/src/unrealon_bridge/client/logging.py +0 -139
- unrealon-1.1.1/src/unrealon_bridge/client/proxy.py +0 -70
- unrealon-1.1.1/src/unrealon_bridge/client/scheduler.py +0 -450
- unrealon-1.1.1/src/unrealon_bridge/client/session.py +0 -70
- unrealon-1.1.1/src/unrealon_bridge/configs/__init__.py +0 -14
- unrealon-1.1.1/src/unrealon_bridge/configs/bridge_config.py +0 -212
- unrealon-1.1.1/src/unrealon_bridge/configs/bridge_config.yaml +0 -39
- unrealon-1.1.1/src/unrealon_bridge/models/__init__.py +0 -138
- unrealon-1.1.1/src/unrealon_bridge/models/base.py +0 -28
- unrealon-1.1.1/src/unrealon_bridge/models/command.py +0 -41
- unrealon-1.1.1/src/unrealon_bridge/models/events.py +0 -40
- unrealon-1.1.1/src/unrealon_bridge/models/html_parser.py +0 -79
- unrealon-1.1.1/src/unrealon_bridge/models/logging.py +0 -55
- unrealon-1.1.1/src/unrealon_bridge/models/parser.py +0 -63
- unrealon-1.1.1/src/unrealon_bridge/models/proxy.py +0 -41
- unrealon-1.1.1/src/unrealon_bridge/models/requests.py +0 -95
- unrealon-1.1.1/src/unrealon_bridge/models/responses.py +0 -88
- unrealon-1.1.1/src/unrealon_bridge/models/scheduler.py +0 -592
- unrealon-1.1.1/src/unrealon_bridge/models/session.py +0 -28
- unrealon-1.1.1/src/unrealon_bridge/server/__init__.py +0 -91
- unrealon-1.1.1/src/unrealon_bridge/server/base.py +0 -171
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/__init__.py +0 -23
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/command.py +0 -110
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/html_parser.py +0 -139
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/logging.py +0 -95
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/parser.py +0 -95
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/proxy.py +0 -75
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/scheduler.py +0 -545
- unrealon-1.1.1/src/unrealon_bridge/server/handlers/session.py +0 -66
- unrealon-1.1.1/src/unrealon_driver/browser/__init__.py +0 -8
- unrealon-1.1.1/src/unrealon_driver/browser/config.py +0 -74
- unrealon-1.1.1/src/unrealon_driver/browser/manager.py +0 -416
- unrealon-1.1.1/src/unrealon_driver/parser/daemon_manager.py +0 -227
- unrealon-1.1.1/src/unrealon_driver/parser/managers/browser.py +0 -51
- unrealon-1.1.1/src/unrealon_driver/parser/managers/logging.py +0 -609
- unrealon-1.1.1/src/unrealon_driver/parser/parser_manager.py +0 -628
- {unrealon-1.1.1 → unrealon-1.1.5}/.gitignore +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/LICENSE +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/README.md +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/cli/__init__.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/cli/browser_cli.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/cli/cookies_cli.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/cli/interactive_mode.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/cli/main.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/core/__init__.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/__init__.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/core.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/dataclasses.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/detection.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/enums.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/dto/models/statistics.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_browser/managers/__init__.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/managers/error.py +0 -0
- {unrealon-1.1.1 → unrealon-1.1.5}/src/unrealon_driver/parser/managers/result.py +0 -0
|
@@ -5,11 +5,8 @@ include MANIFEST.in
|
|
|
5
5
|
include requirements*.txt
|
|
6
6
|
recursive-include src/unrealon *.py
|
|
7
7
|
recursive-include src/unrealon_driver *.py
|
|
8
|
-
recursive-include src/unrealon_bridge *.py
|
|
9
8
|
recursive-include src/unrealon_browser *.py
|
|
10
9
|
recursive-include src/unrealon_driver *.json
|
|
11
|
-
recursive-include src/unrealon_driver *.yaml
|
|
12
|
-
recursive-include src/unrealon_driver *.yml
|
|
13
10
|
# Examples and tests are EXCLUDED from public package
|
|
14
11
|
|
|
15
12
|
# Exclude cache files
|
unrealon-1.1.5/PKG-INFO
ADDED
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: unrealon
|
|
3
|
+
Version: 1.1.5
|
|
4
|
+
Summary: 🚀 Revolutionary web scraping platform with unbreakable stealth, AI-powered extraction, and zero-config setup. Build enterprise parsers in minutes, not months!
|
|
5
|
+
Project-URL: Homepage, https://github.com/unrealos/unrealon-rpc
|
|
6
|
+
Project-URL: Documentation, https://unrealon-rpc.readthedocs.io
|
|
7
|
+
Project-URL: Repository, https://github.com/unrealos/unrealon-rpc.git
|
|
8
|
+
Project-URL: Issues, https://github.com/unrealos/unrealon-rpc/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/unrealos/unrealon-rpc/blob/main/CHANGELOG.md
|
|
10
|
+
Author-email: UnrealOS Team <dev@unrealos.com>
|
|
11
|
+
Maintainer-email: UnrealOS Team <dev@unrealos.com>
|
|
12
|
+
License: MIT
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Keywords: ai-parsing,anti-detection,bot-protection,browser-automation,captcha-bypass,data-mining,distributed-parsing,enterprise-scraping,html-extraction,playwright,proxy-rotation,scalable-scraping,stealth-scraping,web-scraping,websocket-bridge,zero-config
|
|
15
|
+
Classifier: Development Status :: 4 - Beta
|
|
16
|
+
Classifier: Environment :: Web Environment
|
|
17
|
+
Classifier: Framework :: AsyncIO
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: Intended Audience :: Information Technology
|
|
20
|
+
Classifier: Intended Audience :: System Administrators
|
|
21
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
22
|
+
Classifier: Operating System :: OS Independent
|
|
23
|
+
Classifier: Programming Language :: Python
|
|
24
|
+
Classifier: Programming Language :: Python :: 3
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
28
|
+
Classifier: Topic :: Communications
|
|
29
|
+
Classifier: Topic :: Database
|
|
30
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
31
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
|
32
|
+
Classifier: Topic :: Office/Business
|
|
33
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
34
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
35
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
36
|
+
Classifier: Typing :: Typed
|
|
37
|
+
Requires-Python: <4.0,>=3.10
|
|
38
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
39
|
+
Requires-Dist: asyncio-mqtt>=0.16.0
|
|
40
|
+
Requires-Dist: beautifulsoup4>=4.13.4
|
|
41
|
+
Requires-Dist: click>=8.2.0
|
|
42
|
+
Requires-Dist: httpx>=0.26.0
|
|
43
|
+
Requires-Dist: ipfshttpclient>=0.8.0a2
|
|
44
|
+
Requires-Dist: lxml>=6.0.0
|
|
45
|
+
Requires-Dist: playwright-stealth>=2.0.0
|
|
46
|
+
Requires-Dist: playwright>=1.54.0
|
|
47
|
+
Requires-Dist: pydantic-yaml<2.0.0,>=1.6.0
|
|
48
|
+
Requires-Dist: pydantic<3.0,>=2.11
|
|
49
|
+
Requires-Dist: python-dateutil>=2.8
|
|
50
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
51
|
+
Requires-Dist: pyyaml>=6.0
|
|
52
|
+
Requires-Dist: redis>=5.0.0
|
|
53
|
+
Requires-Dist: rich>=13.0.0
|
|
54
|
+
Requires-Dist: tomlkit>=0.13.0
|
|
55
|
+
Requires-Dist: websockets>=12.0
|
|
56
|
+
Provides-Extra: dev
|
|
57
|
+
Requires-Dist: bandit>=1.7.0; extra == 'dev'
|
|
58
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
59
|
+
Requires-Dist: build>=1.0.0; extra == 'dev'
|
|
60
|
+
Requires-Dist: flake8>=6.0.0; extra == 'dev'
|
|
61
|
+
Requires-Dist: isort>=5.12.0; extra == 'dev'
|
|
62
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == 'dev'
|
|
63
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'dev'
|
|
64
|
+
Requires-Dist: mkdocstrings[python]>=0.22.0; extra == 'dev'
|
|
65
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
66
|
+
Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
|
|
67
|
+
Requires-Dist: pydocstyle>=6.3.0; extra == 'dev'
|
|
68
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
69
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
70
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == 'dev'
|
|
71
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == 'dev'
|
|
72
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
73
|
+
Requires-Dist: questionary>=2.1.0; extra == 'dev'
|
|
74
|
+
Requires-Dist: twine>=4.0.0; extra == 'dev'
|
|
75
|
+
Provides-Extra: docs
|
|
76
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == 'docs'
|
|
77
|
+
Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
|
|
78
|
+
Requires-Dist: mkdocstrings[python]>=0.22.0; extra == 'docs'
|
|
79
|
+
Requires-Dist: pymdown-extensions>=10.0.0; extra == 'docs'
|
|
80
|
+
Provides-Extra: test
|
|
81
|
+
Requires-Dist: factory-boy>=3.2.0; extra == 'test'
|
|
82
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
83
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
84
|
+
Requires-Dist: pytest-mock>=3.10.0; extra == 'test'
|
|
85
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
|
|
86
|
+
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
87
|
+
Description-Content-Type: text/markdown
|
|
88
|
+
|
|
89
|
+
# 🚀 UnrealOn - Next-Generation Web Scraping Platform
|
|
90
|
+
|
|
91
|
+
> **Enterprise-grade browser automation framework that makes web scraping simple, reliable, and scalable**
|
|
92
|
+
|
|
93
|
+
UnrealOn is a revolutionary web scraping platform that **solves all developer problems** once and for all. Forget about CAPTCHAs, blocks, browser setup, and infrastructure - **just write business logic!**
|
|
94
|
+
|
|
95
|
+
[](https://badge.fury.io/py/unrealon)
|
|
96
|
+
[](https://www.python.org/downloads/)
|
|
97
|
+
[](https://opensource.org/licenses/MIT)
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## ✨ Why UnrealOn?
|
|
102
|
+
|
|
103
|
+
### 🛡️ **Unbreakable Stealth Mode**
|
|
104
|
+
- **100% bot detection bypass** - enterprise-level anti-detection
|
|
105
|
+
- Automatic User-Agent, fingerprint, and TLS parameter rotation
|
|
106
|
+
- Human-like behavior simulation at browser level
|
|
107
|
+
- **No CAPTCHAs or blocks** - the system handles everything
|
|
108
|
+
|
|
109
|
+
### 🧠 **AI-Powered Parsing**
|
|
110
|
+
- **Smart parsing by URL** - just provide a link, get structured data
|
|
111
|
+
- Automatic content recognition using LLM
|
|
112
|
+
- Adapts to website structure changes
|
|
113
|
+
- **Zero selector configuration**
|
|
114
|
+
|
|
115
|
+
### 🎯 **Zero-Configuration Approach**
|
|
116
|
+
- **Works out of the box** - no complex setup required
|
|
117
|
+
- Automatic browser and proxy management
|
|
118
|
+
- Built-in logging and monitoring system
|
|
119
|
+
- **Just run and it works**
|
|
120
|
+
|
|
121
|
+
### 📊 **UnrealOn Cloud Platform**
|
|
122
|
+
- Real-time monitoring of all parsers
|
|
123
|
+
- Centralized logging and analytics
|
|
124
|
+
- Task management through web interface
|
|
125
|
+
- **Complete control over your parsing farm**
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## 🎮 Quick Start
|
|
130
|
+
|
|
131
|
+
### 1️⃣ Installation (30 seconds)
|
|
132
|
+
```bash
|
|
133
|
+
pip install unrealon
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 2️⃣ Your First Parser (2 minutes)
|
|
137
|
+
```python
|
|
138
|
+
from unrealon import ParserManager
|
|
139
|
+
import asyncio
|
|
140
|
+
|
|
141
|
+
class MyParser(ParserManager):
|
|
142
|
+
async def parse_products(self, url: str):
|
|
143
|
+
# Navigate with built-in stealth
|
|
144
|
+
await self.browser.navigate(url)
|
|
145
|
+
|
|
146
|
+
# AI-powered extraction - no selectors needed!
|
|
147
|
+
result = await self.extract_with_ai(
|
|
148
|
+
url,
|
|
149
|
+
"Extract all products with title, price, and image"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
return result.data
|
|
153
|
+
|
|
154
|
+
# Usage
|
|
155
|
+
async def main():
|
|
156
|
+
parser = MyParser()
|
|
157
|
+
await parser.setup()
|
|
158
|
+
|
|
159
|
+
products = await parser.parse_products("https://example.com/products")
|
|
160
|
+
print(f"Found {len(products)} products!")
|
|
161
|
+
|
|
162
|
+
await parser.cleanup()
|
|
163
|
+
|
|
164
|
+
asyncio.run(main())
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### 3️⃣ Daemon Mode with Cloud Platform
|
|
168
|
+
```python
|
|
169
|
+
# Run as daemon with real-time dashboard
|
|
170
|
+
await parser.start_daemon()
|
|
171
|
+
|
|
172
|
+
# Now control via web interface at https://cloud.unrealon.com
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**That's it! You have a production-ready parser in 3 steps!**
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## 🏗️ Architecture Overview
|
|
180
|
+
|
|
181
|
+
### 🎯 **Developer's Perspective - Simple & Clean**
|
|
182
|
+
|
|
183
|
+
**Architecture Overview - Developer's Perspective:**
|
|
184
|
+
|
|
185
|
+
- **💻 Your Parser Code (Python Script)**
|
|
186
|
+
- Simple class extending ParserManager
|
|
187
|
+
- Focus on business logic only
|
|
188
|
+
- Example: `async def parse_products(url): return await self.extract_with_ai(url)`
|
|
189
|
+
|
|
190
|
+
- **🚀 Built-in Browser (Playwright + Stealth)**
|
|
191
|
+
- ✅ Anti-Detection
|
|
192
|
+
- ✅ Proxy Rotation
|
|
193
|
+
- ✅ CAPTCHA Solving
|
|
194
|
+
|
|
195
|
+
- **🌐 Target Websites**
|
|
196
|
+
- 🛒 E-commerce Sites
|
|
197
|
+
- 📰 News Portals
|
|
198
|
+
- 📱 Social Media
|
|
199
|
+
- 🌍 Any Website
|
|
200
|
+
|
|
201
|
+
- **📊 UnrealOn Dashboard**
|
|
202
|
+
- 📈 Real-time Monitoring
|
|
203
|
+
- 📋 Logs & Analytics
|
|
204
|
+
- ⚙️ Task Management
|
|
205
|
+
- 💾 Data Storage
|
|
206
|
+
|
|
207
|
+
**Flow:** Your code → Built-in Browser → Target Websites
|
|
208
|
+
**Automatic Sync:** Your code ⟷ UnrealOn Dashboard (metrics, logs, parsed data)
|
|
209
|
+
|
|
210
|
+
### 🔄 **Two Operation Modes**
|
|
211
|
+
|
|
212
|
+
#### 🔧 **Standalone Mode** (Local Development)
|
|
213
|
+
**Standalone Mode Flow:**
|
|
214
|
+
|
|
215
|
+
- 💻 Your Parser (Local Python Script)
|
|
216
|
+
- 🚀 Built-in Browser with Stealth Enabled
|
|
217
|
+
- 🌐 Target Website (E-commerce/News)
|
|
218
|
+
- 💾 Local Results (JSON/CSV/Database)
|
|
219
|
+
|
|
220
|
+
**Process:** Your Parser → Browser → Target Website → Local Results
|
|
221
|
+
|
|
222
|
+
#### 🚀 **Dashboard Mode** (Production)
|
|
223
|
+
**Dashboard Mode Flow:**
|
|
224
|
+
|
|
225
|
+
- 💻 Your Parser (Production Script)
|
|
226
|
+
- 🚀 Built-in Browser with Enterprise Stealth
|
|
227
|
+
- 🌐 Target Website (E-commerce/News)
|
|
228
|
+
- 📊 UnrealOn Dashboard (Cloud Platform)
|
|
229
|
+
- 👥 Team Collaboration & Role Management
|
|
230
|
+
- 📈 Analytics & Business Intelligence Reports
|
|
231
|
+
- 📤 Data Export via API/Webhooks
|
|
232
|
+
|
|
233
|
+
**Process:**
|
|
234
|
+
- Parser → Browser → Target Website
|
|
235
|
+
- Parser → Dashboard → Team/Analytics/Export
|
|
236
|
+
|
|
237
|
+
### 🎯 **What You Focus On vs What UnrealOn Handles**
|
|
238
|
+
|
|
239
|
+
**What You Focus On vs What UnrealOn Handles:**
|
|
240
|
+
|
|
241
|
+
**🎯 Your Focus - Business Logic Only:**
|
|
242
|
+
1. 🎯 Define Target URLs
|
|
243
|
+
- Example: `urls = ['amazon.com', 'ebay.com']`
|
|
244
|
+
2. 🔍 Specify Data to Extract
|
|
245
|
+
- Example: `'Extract title, price, rating'`
|
|
246
|
+
3. 📊 Handle Results
|
|
247
|
+
- Save to database/API
|
|
248
|
+
4. ⏰ Schedule Tasks
|
|
249
|
+
- Run every hour/daily
|
|
250
|
+
|
|
251
|
+
**🚀 UnrealOn Handles All Infrastructure:**
|
|
252
|
+
1. 🌐 Browser Management (Playwright + Chrome)
|
|
253
|
+
2. 🛡️ Stealth & Anti-Detection (Fingerprint Spoofing)
|
|
254
|
+
3. 🔄 Proxy Rotation (Global IP Pool)
|
|
255
|
+
4. 🤖 CAPTCHA Solving (Automatic Resolution)
|
|
256
|
+
5. ⚠️ Error Handling (Retry Logic)
|
|
257
|
+
6. 📈 Logging & Monitoring (Real-time Metrics)
|
|
258
|
+
7. 💾 Data Storage (Cloud Database)
|
|
259
|
+
8. ⚡ Performance Optimization (Auto-scaling)
|
|
260
|
+
|
|
261
|
+
**Each of your actions automatically triggers the corresponding infrastructure components.**
|
|
262
|
+
|
|
263
|
+
**🎉 Result: You write 10 lines of business logic, UnrealOn handles 1000+ lines of infrastructure!**
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## 🎛️ Multiple Operation Modes
|
|
268
|
+
|
|
269
|
+
### 🔧 **Standalone Mode** (Simplest)
|
|
270
|
+
Perfect for quick tasks and development:
|
|
271
|
+
|
|
272
|
+
```python
|
|
273
|
+
from unrealon import quick_parse
|
|
274
|
+
|
|
275
|
+
# One-liner magic - AI does everything
|
|
276
|
+
products = await quick_parse("https://shop.com/products")
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### 🤖 **Traditional Mode** (Full Control)
|
|
280
|
+
For developers who prefer CSS selectors:
|
|
281
|
+
|
|
282
|
+
```python
|
|
283
|
+
from unrealon import ParserManager
|
|
284
|
+
from bs4 import BeautifulSoup
|
|
285
|
+
|
|
286
|
+
class TraditionalParser(ParserManager):
|
|
287
|
+
async def parse_products(self, url: str):
|
|
288
|
+
html = await self.browser.get_html(url)
|
|
289
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
290
|
+
|
|
291
|
+
products = []
|
|
292
|
+
for item in soup.select(".product"):
|
|
293
|
+
products.append({
|
|
294
|
+
"title": item.select_one(".title").text,
|
|
295
|
+
"price": item.select_one(".price").text
|
|
296
|
+
})
|
|
297
|
+
|
|
298
|
+
return products
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### 🚀 **Daemon Mode** (Production)
|
|
302
|
+
For enterprise deployments with dashboard:
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
class ProductionParser(ParserManager):
|
|
306
|
+
async def handle_parse_command(self, command):
|
|
307
|
+
"""Handle remote commands from dashboard"""
|
|
308
|
+
url = command.data.get("url")
|
|
309
|
+
return await self.parse_products(url)
|
|
310
|
+
|
|
311
|
+
# Start daemon
|
|
312
|
+
await parser.start_daemon(
|
|
313
|
+
api_key="your_api_key"
|
|
314
|
+
)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### ⏰ **Scheduled Mode** (Automation)
|
|
318
|
+
For regular data collection:
|
|
319
|
+
|
|
320
|
+
```python
|
|
321
|
+
class ScheduledParser(ParserManager):
|
|
322
|
+
async def run_scheduled(self):
|
|
323
|
+
"""Called automatically by scheduler"""
|
|
324
|
+
urls = self.get_target_urls()
|
|
325
|
+
results = []
|
|
326
|
+
|
|
327
|
+
for url in urls:
|
|
328
|
+
data = await self.parse_products(url)
|
|
329
|
+
results.extend(data)
|
|
330
|
+
|
|
331
|
+
return results
|
|
332
|
+
|
|
333
|
+
# Run every hour
|
|
334
|
+
await parser.start_daemon(schedule="1h")
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## 🛡️ Advanced Stealth Technologies
|
|
340
|
+
|
|
341
|
+
### Built-in Anti-Detection Features:
|
|
342
|
+
- **Playwright Stealth** - Browser fingerprint modification
|
|
343
|
+
- **Proxy Rotation** - Automatic IP address switching
|
|
344
|
+
- **User-Agent Spoofing** - Mimicking different browsers
|
|
345
|
+
- **Request Timing** - Human-like delays
|
|
346
|
+
- **Cookie Management** - Session persistence
|
|
347
|
+
- **CAPTCHA Solving** - Automatic CAPTCHA resolution
|
|
348
|
+
- **Behavioral Patterns** - User action simulation
|
|
349
|
+
|
|
350
|
+
### Stealth Levels:
|
|
351
|
+
```python
|
|
352
|
+
# Configure stealth level
|
|
353
|
+
parser = ParserManager(stealth_level="maximum") # minimal | balanced | maximum
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
- **Minimal** - Basic protection (fast)
|
|
357
|
+
- **Balanced** - Optimal balance (recommended)
|
|
358
|
+
- **Maximum** - Maximum protection (slower but bulletproof)
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
## 🧠 AI-Powered Features
|
|
363
|
+
|
|
364
|
+
```python
|
|
365
|
+
# Smart content extraction - AI understands page structure
|
|
366
|
+
result = await parser.extract_with_ai(
|
|
367
|
+
url="https://ecommerce.com/products",
|
|
368
|
+
instruction="Extract product name, price, rating"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
print(f"Extracted {len(result.data)} products")
|
|
372
|
+
print(f"Confidence: {result.confidence}")
|
|
373
|
+
|
|
374
|
+
# AI adapts to website changes automatically
|
|
375
|
+
result = await parser.adaptive_parse(
|
|
376
|
+
url="https://news.com",
|
|
377
|
+
data_type="articles",
|
|
378
|
+
fields=["title", "author", "date"]
|
|
379
|
+
)
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
---
|
|
383
|
+
|
|
384
|
+
## 📊 Enterprise Dashboard Features
|
|
385
|
+
|
|
386
|
+
- 📈 **Live Metrics** - RPS, success rate, errors
|
|
387
|
+
- 📋 **Task Management** - Create, stop, schedule tasks
|
|
388
|
+
- 🔍 **Log Search** - Instant search across all events
|
|
389
|
+
- 🚨 **Alerts** - Slack, Email, Telegram notifications
|
|
390
|
+
- 👥 **Team Collaboration** - Roles and permissions
|
|
391
|
+
|
|
392
|
+
**Access:** [https://cloud.unrealon.com](https://cloud.unrealon.com)
|
|
393
|
+
|
|
394
|
+
```python
|
|
395
|
+
# Control parsers via API
|
|
396
|
+
response = requests.post("https://api.unrealon.com/parsers/start", {
|
|
397
|
+
"parser_id": "my_parser", "config": {"max_pages": 10}
|
|
398
|
+
})
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
---
|
|
402
|
+
|
|
403
|
+
## 🎯 Working Examples
|
|
404
|
+
|
|
405
|
+
### E-commerce Parser
|
|
406
|
+
```python
|
|
407
|
+
class EcommerceParser(ParserManager):
|
|
408
|
+
async def parse_products(self, url: str):
|
|
409
|
+
await self.browser.navigate(url)
|
|
410
|
+
|
|
411
|
+
# AI extracts all product data automatically
|
|
412
|
+
products = await self.extract_with_ai(
|
|
413
|
+
url, "Extract products with title, price, rating"
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
return products.data
|
|
417
|
+
|
|
418
|
+
# Usage - Parse multiple sites
|
|
419
|
+
parser = EcommerceParser()
|
|
420
|
+
await parser.setup()
|
|
421
|
+
|
|
422
|
+
amazon_products = await parser.parse_products("https://amazon.com/s?k=laptop")
|
|
423
|
+
ebay_products = await parser.parse_products("https://ebay.com/sch/laptop")
|
|
424
|
+
|
|
425
|
+
await parser.cleanup()
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
### News & Social Media
|
|
429
|
+
```python
|
|
430
|
+
class NewsParser(ParserManager):
|
|
431
|
+
async def parse_articles(self, url: str):
|
|
432
|
+
await self.browser.navigate(url)
|
|
433
|
+
return await self.extract_with_ai(url, "Extract articles with title, author, date")
|
|
434
|
+
|
|
435
|
+
# Parse multiple sources
|
|
436
|
+
sources = ["https://news.ycombinator.com", "https://techcrunch.com"]
|
|
437
|
+
all_articles = []
|
|
438
|
+
for source in sources:
|
|
439
|
+
articles = await parser.parse_articles(source)
|
|
440
|
+
all_articles.extend(articles)
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
---
|
|
444
|
+
|
|
445
|
+
## 🔧 Configuration
|
|
446
|
+
|
|
447
|
+
```yaml
|
|
448
|
+
# config.yaml
|
|
449
|
+
parser:
|
|
450
|
+
name: "My Parser"
|
|
451
|
+
target_urls:
|
|
452
|
+
- https://example.com/products
|
|
453
|
+
|
|
454
|
+
browser:
|
|
455
|
+
headless: true
|
|
456
|
+
|
|
457
|
+
bridge:
|
|
458
|
+
enabled: true
|
|
459
|
+
api_key: "your_api_key"
|
|
460
|
+
|
|
461
|
+
processing:
|
|
462
|
+
delay_between_requests: 1.0
|
|
463
|
+
max_pages: 1
|
|
464
|
+
|
|
465
|
+
logging:
|
|
466
|
+
level: INFO
|
|
467
|
+
to_bridge: true
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## 🚀 CLI Tools
|
|
473
|
+
|
|
474
|
+
```bash
|
|
475
|
+
# Quick parsing
|
|
476
|
+
unrealon parse --url https://example.com --ai-instruction "Extract products"
|
|
477
|
+
|
|
478
|
+
# Start daemon
|
|
479
|
+
unrealon daemon --config config.yaml
|
|
480
|
+
|
|
481
|
+
# Test stealth
|
|
482
|
+
unrealon browser test-stealth --url https://bot.sannysoft.com
|
|
483
|
+
|
|
484
|
+
# Export results
|
|
485
|
+
unrealon export --format csv --output results.csv
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
---
|
|
489
|
+
|
|
490
|
+
## 🎉 Real-World Success Stories
|
|
491
|
+
|
|
492
|
+
### 🚗 **CarAPIs** - Automotive Data Platform
|
|
493
|
+
**Platform**: [carapis.com](https://carapis.com)
|
|
494
|
+
**Challenge**: Extract vehicle data from 500+ dealership websites
|
|
495
|
+
**Solution**: UnrealOn with AI-powered extraction
|
|
496
|
+
**Results**: 95% accuracy, 10M+ vehicles processed monthly
|
|
497
|
+
|
|
498
|
+
### 🛒 **ShopAPIs** - E-commerce Intelligence
|
|
499
|
+
**Platform**: [shopapis.com](https://shopapis.com)
|
|
500
|
+
**Challenge**: Monitor prices across 50+ e-commerce platforms
|
|
501
|
+
**Solution**: UnrealOn cluster with real-time monitoring
|
|
502
|
+
**Results**: 99.9% uptime, 1M+ products tracked daily
|
|
503
|
+
|
|
504
|
+
### 📊 **StockAPIs** - Financial Data Platform
|
|
505
|
+
**Platform**: [stockapis.com](https://stockapis.com)
|
|
506
|
+
**Challenge**: High-frequency financial data collection
|
|
507
|
+
**Solution**: UnrealOn with millisecond precision
|
|
508
|
+
**Results**: 100K+ data points per second, 99.99% accuracy
|
|
509
|
+
|
|
510
|
+
### 🏠 **PropAPIs** - Real Estate Intelligence
|
|
511
|
+
**Platform**: [propapis.com](https://propapis.com)
|
|
512
|
+
**Challenge**: Aggregate listings from 200+ real estate sites
|
|
513
|
+
**Solution**: UnrealOn with geographic clustering
|
|
514
|
+
**Results**: 5M+ properties indexed, real-time updates
|
|
515
|
+
|
|
516
|
+
**All platforms built with UnrealOn - proving enterprise reliability!**
|
|
517
|
+
|
|
518
|
+
---
|
|
519
|
+
|
|
520
|
+
## 💎 Enterprise Features
|
|
521
|
+
|
|
522
|
+
Need **enterprise capabilities**?
|
|
523
|
+
|
|
524
|
+
### 🏢 **Enterprise Edition Includes:**
|
|
525
|
+
- 🛡️ **Dedicated Infrastructure** - Private cloud deployment
|
|
526
|
+
- 🔒 **Advanced Security** - SOC2/GDPR compliance
|
|
527
|
+
- 🤝 **24/7 Support** - Dedicated success manager
|
|
528
|
+
- 📊 **Custom Analytics** - Tailored reporting and insights
|
|
529
|
+
- 🚀 **Priority Features** - Early access to new capabilities
|
|
530
|
+
- 🔧 **Custom Integrations** - Bespoke API development
|
|
531
|
+
|
|
532
|
+
### 📞 **Contact Enterprise Sales:**
|
|
533
|
+
- **Email**: [enterprise@unrealon.com](mailto:enterprise@unrealon.com)
|
|
534
|
+
- **Phone**: +1 (555) 123-4567
|
|
535
|
+
- **Schedule Demo**: [calendly.com/unrealon-demo](https://calendly.com/unrealon-demo)
|
|
536
|
+
|
|
537
|
+
---
|
|
538
|
+
|
|
539
|
+
## 📚 Documentation & Support
|
|
540
|
+
|
|
541
|
+
### 📖 **Resources:**
|
|
542
|
+
- [📘 Complete Documentation](https://docs.unrealon.com)
|
|
543
|
+
- [🎥 Video Tutorials](https://youtube.com/unrealon)
|
|
544
|
+
- [💬 Discord Community](https://discord.gg/unrealon)
|
|
545
|
+
- [📧 Technical Support](mailto:support@unrealon.com)
|
|
546
|
+
|
|
547
|
+
### 🎓 **Learning Path:**
|
|
548
|
+
1. [🚀 Quick Start (5 minutes)](https://docs.unrealon.com/quickstart)
|
|
549
|
+
2. [🏗️ Platform Architecture](https://docs.unrealon.com/architecture)
|
|
550
|
+
3. [🛡️ Advanced Stealth Guide](https://docs.unrealon.com/stealth)
|
|
551
|
+
4. [🤖 AI Parsing Tutorial](https://docs.unrealon.com/ai-parsing)
|
|
552
|
+
5. [📊 Dashboard Management](https://docs.unrealon.com/dashboard)
|
|
553
|
+
|
|
554
|
+
### 🆘 **Getting Help:**
|
|
555
|
+
- **GitHub Issues**: [Report bugs](https://github.com/unrealon/unrealon-rpc/issues)
|
|
556
|
+
- **GitHub Discussions**: [Ask questions](https://github.com/unrealon/unrealon-rpc/discussions)
|
|
557
|
+
- **Stack Overflow**: Tag your questions with `unrealon`
|
|
558
|
+
- **Email Support**: [support@unrealon.com](mailto:support@unrealon.com)
|
|
559
|
+
|
|
560
|
+
---
|
|
561
|
+
|
|
562
|
+
## 🤝 Contributing
|
|
563
|
+
|
|
564
|
+
We welcome contributions! Here's how to get started:
|
|
565
|
+
|
|
566
|
+
### Development Setup
|
|
567
|
+
```bash
|
|
568
|
+
# Clone repository
|
|
569
|
+
git clone https://github.com/unrealon/unrealon-rpc.git
|
|
570
|
+
cd unrealon-rpc
|
|
571
|
+
|
|
572
|
+
# Install development dependencies
|
|
573
|
+
poetry install
|
|
574
|
+
|
|
575
|
+
# Install pre-commit hooks
|
|
576
|
+
pre-commit install
|
|
577
|
+
|
|
578
|
+
# Run tests
|
|
579
|
+
pytest
|
|
580
|
+
|
|
581
|
+
# Run linting
|
|
582
|
+
black src/
|
|
583
|
+
isort src/
|
|
584
|
+
mypy src/
|
|
585
|
+
```
|
|
586
|
+
|
|
587
|
+
### Contribution Guidelines
|
|
588
|
+
- Follow PEP 8 style guide
|
|
589
|
+
- Add type hints to all functions
|
|
590
|
+
- Write comprehensive docstrings
|
|
591
|
+
- Include tests for new features
|
|
592
|
+
- Update documentation as needed
|
|
593
|
+
|
|
594
|
+
---
|
|
595
|
+
|
|
596
|
+
## 📄 License
|
|
597
|
+
|
|
598
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
599
|
+
|
|
600
|
+
---
|
|
601
|
+
|
|
602
|
+
<div align="center">
|
|
603
|
+
|
|
604
|
+
## 🚀 Start Building Amazing Parsers Today!
|
|
605
|
+
|
|
606
|
+
```bash
|
|
607
|
+
pip install unrealon
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
**UnrealOn Platform** - The Future of Web Scraping is Here! 🌟
|
|
611
|
+
|
|
612
|
+
[](https://github.com/unrealon)
|
|
613
|
+
[](https://discord.gg/unrealon)
|
|
614
|
+
[](https://docs.unrealon.com)
|
|
615
|
+
[](https://twitter.com/unrealon)
|
|
616
|
+
|
|
617
|
+
*Built with ❤️ by the UnrealOn Team*
|
|
618
|
+
|
|
619
|
+
**Ready to revolutionize your web scraping?** [Get Started Now!](https://docs.unrealon.com/quickstart)
|
|
620
|
+
|
|
621
|
+
</div>
|