unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.4.dist-info/METADATA +658 -0
  3. unrealon-1.1.4.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,74 +0,0 @@
1
- """
2
- Browser configuration with Pydantic v2
3
- """
4
-
5
- from typing import Optional, Dict, Any, List
6
- from pathlib import Path
7
- from pydantic import BaseModel, Field
8
-
9
-
10
- class BrowserConfig(BaseModel):
11
- """
12
- Browser configuration with smart defaults
13
- """
14
-
15
- # Browser type and mode
16
- browser_type: str = Field("chromium", description="Browser type (chromium, firefox, webkit)")
17
- headless: bool = Field(True, description="Run browser in headless mode")
18
-
19
- # Stealth and detection
20
- stealth_mode: bool = Field(True, description="Enable stealth mode")
21
- user_agent: Optional[str] = Field(None, description="Custom user agent")
22
- viewport_width: int = Field(1920, description="Viewport width")
23
- viewport_height: int = Field(1080, description="Viewport height")
24
-
25
- # Timeouts
26
- page_timeout: int = Field(30000, description="Page load timeout in milliseconds")
27
- navigation_timeout: int = Field(30000, description="Navigation timeout in milliseconds")
28
- element_timeout: int = Field(10000, description="Element wait timeout in milliseconds")
29
-
30
- # Proxy settings
31
- proxy_url: Optional[str] = Field(None, description="Proxy URL")
32
- proxy_username: Optional[str] = Field(None, description="Proxy username")
33
- proxy_password: Optional[str] = Field(None, description="Proxy password")
34
-
35
- # Cookie and session management
36
- persist_cookies: bool = Field(True, description="Persist cookies between sessions")
37
- cookies_file: Optional[Path] = Field(None, description="Path to cookies file")
38
-
39
- # Screenshots and debugging
40
- screenshots_dir: Optional[Path] = Field(None, description="Screenshots directory")
41
- save_screenshots: bool = Field(False, description="Save screenshots for debugging")
42
- debug: bool = Field(False, description="Enable debug mode")
43
-
44
- # Performance settings
45
- disable_images: bool = Field(False, description="Disable image loading")
46
- disable_javascript: bool = Field(False, description="Disable JavaScript execution")
47
- disable_css: bool = Field(False, description="Disable CSS loading")
48
-
49
- # Browser arguments
50
- extra_args: List[str] = Field(default_factory=list, description="Additional browser arguments")
51
-
52
- # Additional settings
53
- extra_config: Dict[str, Any] = Field(default_factory=dict, description="Additional configuration")
54
-
55
- class Config:
56
- """Pydantic configuration"""
57
- validate_assignment = True
58
- extra = "forbid"
59
-
60
- def model_post_init(self, __context: Any) -> None:
61
- """Post-initialization setup"""
62
- # Setup directories
63
- if not self.screenshots_dir:
64
- self.screenshots_dir = Path.cwd() / "system" / "screenshots"
65
-
66
- if not self.cookies_file:
67
- self.cookies_file = Path.cwd() / "system" / "cookies.json"
68
-
69
- # Create directories
70
- if self.screenshots_dir:
71
- self.screenshots_dir.mkdir(parents=True, exist_ok=True)
72
-
73
- if self.cookies_file:
74
- self.cookies_file.parent.mkdir(parents=True, exist_ok=True)
@@ -1,416 +0,0 @@
1
- """
2
- Modern Browser Manager built on Playwright
3
- """
4
-
5
- import asyncio
6
- import json
7
- import uuid
8
- from datetime import datetime, timezone
9
- from typing import Optional, Dict, Any, List
10
- from pathlib import Path
11
-
12
- try:
13
- from playwright.async_api import async_playwright, Browser, BrowserContext, Page
14
- except ImportError:
15
- async_playwright = None
16
- Browser = None
17
- BrowserContext = None
18
- Page = None
19
-
20
- from unrealon_rpc.logging import get_logger
21
-
22
- from .config import BrowserConfig
23
- from ..exceptions import BrowserError
24
-
25
-
26
- class BrowserManager:
27
- """
28
- 🌐 Modern Browser Manager v4.0
29
-
30
- Simplified browser automation built on Playwright with stealth capabilities.
31
- Designed for the new architecture where complex automation is simplified.
32
-
33
- Features:
34
- - 🎭 Stealth Mode: Anti-detection by default
35
- - 🍪 Cookie Persistence: Automatic cookie management
36
- - 📸 Screenshots: Debug-friendly screenshot capture
37
- - ⚡ Performance: Optimized for speed and reliability
38
- - 🔧 Zero Config: Works out of the box
39
- """
40
-
41
- def __init__(self, config: BrowserConfig):
42
- """
43
- Initialize browser manager
44
-
45
- Args:
46
- config: Browser configuration
47
- """
48
- if async_playwright is None:
49
- raise BrowserError(
50
- "Playwright is not installed. Install it with: pip install playwright && playwright install"
51
- )
52
-
53
- self.config = config
54
- self.logger = get_logger()
55
-
56
- # Browser components
57
- self._playwright = None
58
- self._browser: Optional[Browser] = None
59
- self._context: Optional[BrowserContext] = None
60
- self._page: Optional[Page] = None
61
-
62
- # State
63
- self._is_initialized = False
64
- self._session_id = str(uuid.uuid4())
65
-
66
- # ==========================================
67
- # LIFECYCLE MANAGEMENT
68
- # ==========================================
69
-
70
- async def initialize(self) -> None:
71
- """Initialize browser components"""
72
- if self._is_initialized:
73
- return
74
-
75
- try:
76
- self.logger.info("Initializing browser manager...")
77
-
78
- # Start Playwright
79
- self._playwright = await async_playwright().start()
80
-
81
- # Launch browser
82
- browser_args = self._get_browser_args()
83
-
84
- if self.config.browser_type == "chromium":
85
- self._browser = await self._playwright.chromium.launch(**browser_args)
86
- elif self.config.browser_type == "firefox":
87
- self._browser = await self._playwright.firefox.launch(**browser_args)
88
- elif self.config.browser_type == "webkit":
89
- self._browser = await self._playwright.webkit.launch(**browser_args)
90
- else:
91
- raise BrowserError(f"Unsupported browser type: {self.config.browser_type}")
92
-
93
- # Create context
94
- context_args = self._get_context_args()
95
- self._context = await self._browser.new_context(**context_args)
96
-
97
- # Load cookies if available
98
- await self._load_cookies()
99
-
100
- # Create page
101
- self._page = await self._context.new_page()
102
-
103
- # Setup stealth mode
104
- if self.config.stealth_mode:
105
- await self._setup_stealth()
106
-
107
- # Set timeouts
108
- self._page.set_default_timeout(self.config.page_timeout)
109
- self._page.set_default_navigation_timeout(self.config.navigation_timeout)
110
-
111
- self._is_initialized = True
112
- self.logger.info(f"Browser initialized: {self.config.browser_type}")
113
-
114
- except Exception as e:
115
- await self.cleanup()
116
- raise BrowserError(f"Failed to initialize browser: {e}")
117
-
118
- async def cleanup(self) -> None:
119
- """Clean up browser resources"""
120
- self.logger.info("Cleaning up browser resources...")
121
-
122
- try:
123
- # Save cookies
124
- if self._context and self.config.persist_cookies:
125
- await self._save_cookies()
126
-
127
- # Close page
128
- if self._page:
129
- await self._page.close()
130
- self._page = None
131
-
132
- # Close context
133
- if self._context:
134
- await self._context.close()
135
- self._context = None
136
-
137
- # Close browser
138
- if self._browser:
139
- await self._browser.close()
140
- self._browser = None
141
-
142
- # Stop Playwright
143
- if self._playwright:
144
- await self._playwright.stop()
145
- self._playwright = None
146
-
147
- self._is_initialized = False
148
- self.logger.info("Browser cleanup completed")
149
-
150
- except Exception as e:
151
- self.logger.error(f"Error during browser cleanup: {e}")
152
-
153
- # ==========================================
154
- # HIGH-LEVEL METHODS
155
- # ==========================================
156
-
157
- async def get_html(self, url: str, wait_for: Optional[str] = None) -> str:
158
- """
159
- Get HTML content from URL
160
-
161
- Args:
162
- url: Target URL
163
- wait_for: Optional CSS selector to wait for
164
-
165
- Returns:
166
- HTML content as string
167
- """
168
- await self._ensure_initialized()
169
-
170
- try:
171
- self.logger.info(f"Navigating to: {url}")
172
-
173
- # Navigate to URL
174
- await self._page.goto(url, wait_until="domcontentloaded")
175
-
176
- # Wait for specific element if requested
177
- if wait_for:
178
- await self._page.wait_for_selector(wait_for, timeout=self.config.element_timeout)
179
-
180
- # Get HTML content
181
- html = await self._page.content()
182
-
183
- # Save screenshot if debugging
184
- if self.config.save_screenshots:
185
- await self._save_screenshot(f"get_html_{url.replace('/', '_')}")
186
-
187
- self.logger.info(f"Retrieved HTML content: {len(html)} characters")
188
- return html
189
-
190
- except Exception as e:
191
- if self.config.save_screenshots:
192
- await self._save_screenshot(f"error_{url.replace('/', '_')}")
193
- raise BrowserError(f"Failed to get HTML from {url}: {e}")
194
-
195
- async def extract_elements(
196
- self,
197
- url: str,
198
- selector: str,
199
- attribute: Optional[str] = None
200
- ) -> List[str]:
201
- """
202
- Extract elements from URL using CSS selector
203
-
204
- Args:
205
- url: Target URL
206
- selector: CSS selector
207
- attribute: Optional attribute to extract (default: text content)
208
-
209
- Returns:
210
- List of extracted values
211
- """
212
- await self._ensure_initialized()
213
-
214
- try:
215
- self.logger.info(f"Extracting elements from: {url}")
216
-
217
- # Navigate to URL
218
- await self._page.goto(url, wait_until="domcontentloaded")
219
-
220
- # Wait for elements
221
- await self._page.wait_for_selector(selector, timeout=self.config.element_timeout)
222
-
223
- # Extract elements
224
- if attribute:
225
- elements = await self._page.eval_on_selector_all(
226
- selector,
227
- f"elements => elements.map(el => el.getAttribute('{attribute}'))"
228
- )
229
- else:
230
- elements = await self._page.eval_on_selector_all(
231
- selector,
232
- "elements => elements.map(el => el.textContent.trim())"
233
- )
234
-
235
- # Filter out empty values
236
- elements = [el for el in elements if el and el.strip()]
237
-
238
- self.logger.info(f"Extracted {len(elements)} elements")
239
- return elements
240
-
241
- except Exception as e:
242
- raise BrowserError(f"Failed to extract elements from {url}: {e}")
243
-
244
- async def screenshot(self, filename: Optional[str] = None) -> Path:
245
- """
246
- Take screenshot of current page
247
-
248
- Args:
249
- filename: Optional filename (auto-generated if not provided)
250
-
251
- Returns:
252
- Path to screenshot file
253
- """
254
- await self._ensure_initialized()
255
-
256
- if not filename:
257
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
258
- filename = f"screenshot_{timestamp}.png"
259
-
260
- if not filename.endswith('.png'):
261
- filename += '.png'
262
-
263
- screenshot_path = self.config.screenshots_dir / filename
264
-
265
- try:
266
- await self._page.screenshot(path=str(screenshot_path), full_page=True)
267
- self.logger.info(f"Screenshot saved: {screenshot_path}")
268
- return screenshot_path
269
-
270
- except Exception as e:
271
- raise BrowserError(f"Failed to take screenshot: {e}")
272
-
273
- # ==========================================
274
- # UTILITY METHODS
275
- # ==========================================
276
-
277
- async def health_check(self) -> Dict[str, Any]:
278
- """Browser health check"""
279
- return {
280
- "status": "healthy" if self._is_initialized else "not_initialized",
281
- "browser_type": self.config.browser_type,
282
- "session_id": self._session_id,
283
- "stealth_mode": self.config.stealth_mode,
284
- "headless": self.config.headless,
285
- "initialized": self._is_initialized
286
- }
287
-
288
- # ==========================================
289
- # PRIVATE METHODS
290
- # ==========================================
291
-
292
- async def _ensure_initialized(self) -> None:
293
- """Ensure browser is initialized"""
294
- if not self._is_initialized:
295
- await self.initialize()
296
-
297
- def _get_browser_args(self) -> Dict[str, Any]:
298
- """Get browser launch arguments"""
299
- args = {
300
- "headless": self.config.headless,
301
- "args": self.config.extra_args.copy()
302
- }
303
-
304
- # Add stealth arguments
305
- if self.config.stealth_mode:
306
- args["args"].extend([
307
- "--no-first-run",
308
- "--no-default-browser-check",
309
- "--disable-blink-features=AutomationControlled",
310
- "--disable-web-security",
311
- "--disable-features=VizDisplayCompositor"
312
- ])
313
-
314
- # Add performance arguments
315
- if self.config.disable_images:
316
- args["args"].append("--disable-images")
317
-
318
- return args
319
-
320
- def _get_context_args(self) -> Dict[str, Any]:
321
- """Get browser context arguments"""
322
- args = {
323
- "viewport": {
324
- "width": self.config.viewport_width,
325
- "height": self.config.viewport_height
326
- }
327
- }
328
-
329
- # User agent
330
- if self.config.user_agent:
331
- args["user_agent"] = self.config.user_agent
332
-
333
- # Proxy
334
- if self.config.proxy_url:
335
- proxy_config = {"server": self.config.proxy_url}
336
- if self.config.proxy_username:
337
- proxy_config["username"] = self.config.proxy_username
338
- if self.config.proxy_password:
339
- proxy_config["password"] = self.config.proxy_password
340
- args["proxy"] = proxy_config
341
-
342
- # Disable resources
343
- if self.config.disable_javascript:
344
- args["java_script_enabled"] = False
345
-
346
- return args
347
-
348
- async def _setup_stealth(self) -> None:
349
- """Setup stealth mode"""
350
- # Add stealth scripts
351
- await self._page.add_init_script("""
352
- // Remove webdriver property
353
- Object.defineProperty(navigator, 'webdriver', {
354
- get: () => undefined,
355
- });
356
-
357
- // Mock plugins
358
- Object.defineProperty(navigator, 'plugins', {
359
- get: () => [1, 2, 3, 4, 5],
360
- });
361
-
362
- // Mock languages
363
- Object.defineProperty(navigator, 'languages', {
364
- get: () => ['en-US', 'en'],
365
- });
366
- """)
367
-
368
- async def _load_cookies(self) -> None:
369
- """Load cookies from file"""
370
- if not self.config.persist_cookies or not self.config.cookies_file:
371
- return
372
-
373
- try:
374
- if self.config.cookies_file.exists():
375
- with open(self.config.cookies_file, 'r') as f:
376
- cookies = json.load(f)
377
- await self._context.add_cookies(cookies)
378
- self.logger.info(f"Loaded {len(cookies)} cookies")
379
- except Exception as e:
380
- self.logger.warning(f"Failed to load cookies: {e}")
381
-
382
- async def _save_cookies(self) -> None:
383
- """Save cookies to file"""
384
- if not self.config.persist_cookies or not self.config.cookies_file:
385
- return
386
-
387
- try:
388
- cookies = await self._context.cookies()
389
- with open(self.config.cookies_file, 'w') as f:
390
- json.dump(cookies, f, indent=2)
391
- self.logger.info(f"Saved {len(cookies)} cookies")
392
- except Exception as e:
393
- self.logger.warning(f"Failed to save cookies: {e}")
394
-
395
- async def _save_screenshot(self, name: str) -> None:
396
- """Save debug screenshot"""
397
- try:
398
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
399
- filename = f"{name}_{timestamp}.png"
400
- await self.screenshot(filename)
401
- except Exception as e:
402
- self.logger.warning(f"Failed to save debug screenshot: {e}")
403
-
404
- # ==========================================
405
- # CONTEXT MANAGER SUPPORT
406
- # ==========================================
407
-
408
- async def __aenter__(self):
409
- """Async context manager entry"""
410
- await self.initialize()
411
- return self
412
-
413
- async def __aexit__(self, exc_type, exc_val, exc_tb):
414
- """Async context manager exit"""
415
- await self.cleanup()
416
- return False
@@ -1,51 +0,0 @@
1
- """
2
- Browser Manager - Wrapper over unrealon_driver.browser
3
-
4
- Simple wrapper that inherits from the main BrowserManager
5
- """
6
-
7
- from typing import Optional, Dict, Any
8
- from pydantic import BaseModel, Field, ConfigDict
9
-
10
- from unrealon_driver.browser import BrowserManager as BaseBrowserManager, BrowserConfig as BaseBrowserConfig
11
-
12
-
13
- class BrowserConfig(BaseBrowserConfig):
14
- """Extended browser configuration for parser manager"""
15
-
16
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
17
-
18
-
19
- class BrowserStats(BaseModel):
20
- """Browser usage statistics"""
21
-
22
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
23
-
24
- pages_visited: int = Field(default=0, ge=0)
25
- total_load_time: float = Field(default=0.0, ge=0.0)
26
- average_load_time: float = Field(default=0.0, ge=0.0)
27
- screenshots_taken: int = Field(default=0, ge=0)
28
- cookies_saved: int = Field(default=0, ge=0)
29
- errors_count: int = Field(default=0, ge=0)
30
- session_duration: float = Field(default=0.0, ge=0.0)
31
-
32
-
33
- class BrowserManager(BaseBrowserManager):
34
- """
35
- 🌐 Browser Manager - Wrapper over base browser manager
36
-
37
- Simple wrapper that extends the base BrowserManager with parser-specific functionality
38
- """
39
-
40
- def __init__(self, config: BrowserConfig):
41
- super().__init__(config)
42
- self._stats = BrowserStats()
43
-
44
- def get_stats(self) -> BrowserStats:
45
- """Get browser usage statistics"""
46
- return self._stats
47
-
48
- async def health_check(self) -> Dict[str, Any]:
49
- """Browser health check"""
50
- base_health = await super().health_check()
51
- return {**base_health, "parser_manager": True}