unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.4.dist-info/METADATA +658 -0
  3. unrealon-1.1.4.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,249 @@
1
+ """
2
+ Independent WebSocket Client for unrealon_driver.
3
+
4
+ Provides WebSocket connectivity without dependencies on unrealon_server or unrealon_rpc.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import logging
10
+ from typing import Optional, Dict, Any, Callable, Awaitable
11
+ from dataclasses import dataclass
12
+ from datetime import datetime, timezone
13
+
14
+ import websockets
15
+
16
+
17
+ @dataclass
18
+ class WebSocketConfig:
19
+ """WebSocket client configuration"""
20
+ url: str
21
+ api_key: Optional[str] = None
22
+ parser_id: Optional[str] = None
23
+ reconnect_interval: float = 5.0
24
+ max_reconnect_attempts: int = 10
25
+ ping_interval: float = 30.0
26
+ ping_timeout: float = 10.0
27
+
28
+
29
+ class WebSocketClient:
30
+ """
31
+ Independent WebSocket client for driver-server communication.
32
+
33
+ Features:
34
+ - Auto-reconnection
35
+ - Message queuing during disconnection
36
+ - Request-response pattern support
37
+ - Event-based message handling
38
+ """
39
+
40
+ def __init__(self, config: WebSocketConfig):
41
+ self.config = config
42
+ self._websocket = None
43
+ self._connected = False
44
+ self._reconnect_task = None
45
+ self._message_handlers: Dict[str, Callable[[Dict[str, Any]], Awaitable[None]]] = {}
46
+ self._pending_requests: Dict[str, asyncio.Future] = {}
47
+ self._message_queue = []
48
+ self._logger = logging.getLogger(__name__)
49
+
50
+ async def connect(self) -> bool:
51
+ """Connect to WebSocket server"""
52
+ try:
53
+ # Build connection parameters
54
+ connect_params = {
55
+ "ping_interval": self.config.ping_interval,
56
+ "ping_timeout": self.config.ping_timeout
57
+ }
58
+
59
+ # Add headers if supported (websockets >= 10.0)
60
+ headers = {}
61
+ if self.config.api_key:
62
+ headers["Authorization"] = f"Bearer {self.config.api_key}"
63
+ if self.config.parser_id:
64
+ headers["X-Parser-ID"] = self.config.parser_id
65
+
66
+ if headers:
67
+ try:
68
+ # Try with extra_headers first (newer versions)
69
+ connect_params["extra_headers"] = headers
70
+ self._websocket = await websockets.connect(self.config.url, **connect_params)
71
+ except TypeError:
72
+ # Fallback for older versions without extra_headers support
73
+ connect_params.pop("extra_headers", None)
74
+ self._websocket = await websockets.connect(self.config.url, **connect_params)
75
+ else:
76
+ self._websocket = await websockets.connect(self.config.url, **connect_params)
77
+
78
+ self._connected = True
79
+ self._logger.info(f"Connected to WebSocket: {self.config.url}")
80
+
81
+ # Start message listener
82
+ asyncio.create_task(self._message_listener())
83
+
84
+ # Send queued messages
85
+ await self._send_queued_messages()
86
+
87
+ return True
88
+
89
+ except Exception as e:
90
+ self._logger.error(f"Failed to connect to WebSocket: {e}")
91
+ self._connected = False
92
+ return False
93
+
94
+ async def disconnect(self):
95
+ """Disconnect from WebSocket server"""
96
+ self._connected = False
97
+
98
+ if self._reconnect_task:
99
+ self._reconnect_task.cancel()
100
+
101
+ if self._websocket:
102
+ await self._websocket.close()
103
+ self._websocket = None
104
+
105
+ self._logger.info("Disconnected from WebSocket")
106
+
107
+ async def send_message(self, message: Dict[str, Any]) -> bool:
108
+ """Send message to server"""
109
+ if not self._connected or not self._websocket:
110
+ # Queue message for later
111
+ self._message_queue.append(message)
112
+ self._logger.debug("Message queued (not connected)")
113
+ return False
114
+
115
+ try:
116
+ await self._websocket.send(json.dumps(message))
117
+ return True
118
+ except Exception as e:
119
+ self._logger.error(f"Failed to send message: {e}")
120
+ self._connected = False
121
+ self._message_queue.append(message) # Re-queue
122
+ asyncio.create_task(self._reconnect())
123
+ return False
124
+
125
+ async def send_request(self, message: Dict[str, Any], timeout: float = 30.0) -> Optional[Dict[str, Any]]:
126
+ """Send request and wait for response"""
127
+ import uuid
128
+
129
+ request_id = str(uuid.uuid4())
130
+ message["request_id"] = request_id
131
+
132
+ # Create future for response
133
+ future = asyncio.Future()
134
+ self._pending_requests[request_id] = future
135
+
136
+ try:
137
+ # Send request
138
+ success = await self.send_message(message)
139
+ if not success:
140
+ return None
141
+
142
+ # Wait for response
143
+ response = await asyncio.wait_for(future, timeout=timeout)
144
+ return response
145
+
146
+ except asyncio.TimeoutError:
147
+ self._logger.error(f"Request timeout: {request_id}")
148
+ return None
149
+ except Exception as e:
150
+ self._logger.error(f"Request failed: {e}")
151
+ return None
152
+ finally:
153
+ # Clean up
154
+ self._pending_requests.pop(request_id, None)
155
+
156
+ def add_message_handler(self, message_type: str, handler: Callable[[Dict[str, Any]], Awaitable[None]]):
157
+ """Add handler for specific message type"""
158
+ self._message_handlers[message_type] = handler
159
+
160
+ async def _message_listener(self):
161
+ """Listen for incoming messages"""
162
+ try:
163
+ async for message_str in self._websocket:
164
+ try:
165
+ message = json.loads(message_str)
166
+ await self._handle_message(message)
167
+ except json.JSONDecodeError:
168
+ self._logger.error(f"Invalid JSON received: {message_str}")
169
+ except Exception as e:
170
+ self._logger.error(f"Error handling message: {e}")
171
+
172
+ except websockets.exceptions.ConnectionClosed:
173
+ self._logger.warning("WebSocket connection closed")
174
+ self._connected = False
175
+ asyncio.create_task(self._reconnect())
176
+ except Exception as e:
177
+ self._logger.error(f"Message listener error: {e}")
178
+ self._connected = False
179
+ asyncio.create_task(self._reconnect())
180
+
181
+ async def _handle_message(self, message: Dict[str, Any]):
182
+ """Handle incoming message"""
183
+ # Check if it's a response to a pending request
184
+ request_id = message.get("request_id")
185
+ if request_id and request_id in self._pending_requests:
186
+ future = self._pending_requests[request_id]
187
+ if not future.done():
188
+ future.set_result(message)
189
+ return
190
+
191
+ # Handle by message type
192
+ message_type = message.get("type")
193
+ if message_type and message_type in self._message_handlers:
194
+ try:
195
+ await self._message_handlers[message_type](message)
196
+ except Exception as e:
197
+ self._logger.error(f"Handler error for {message_type}: {e}")
198
+
199
+ async def _send_queued_messages(self):
200
+ """Send all queued messages"""
201
+ while self._message_queue and self._connected:
202
+ message = self._message_queue.pop(0)
203
+ success = await self.send_message(message)
204
+ if not success:
205
+ # Re-queue and stop
206
+ self._message_queue.insert(0, message)
207
+ break
208
+
209
+ async def _reconnect(self):
210
+ """Auto-reconnect with exponential backoff"""
211
+ if self._reconnect_task:
212
+ return # Already reconnecting
213
+
214
+ self._reconnect_task = asyncio.create_task(self._reconnect_loop())
215
+
216
+ async def _reconnect_loop(self):
217
+ """Reconnection loop with backoff"""
218
+ attempt = 0
219
+
220
+ while attempt < self.config.max_reconnect_attempts and not self._connected:
221
+ attempt += 1
222
+ wait_time = min(self.config.reconnect_interval * (2 ** (attempt - 1)), 60)
223
+
224
+ self._logger.info(f"Reconnecting in {wait_time}s (attempt {attempt}/{self.config.max_reconnect_attempts})")
225
+ await asyncio.sleep(wait_time)
226
+
227
+ if await self.connect():
228
+ self._logger.info("Reconnected successfully")
229
+ break
230
+
231
+ if not self._connected:
232
+ self._logger.error("Max reconnection attempts reached")
233
+
234
+ self._reconnect_task = None
235
+
236
+ @property
237
+ def connected(self) -> bool:
238
+ """Check if connected"""
239
+ return self._connected and self._websocket is not None
240
+
241
+ def get_stats(self) -> Dict[str, Any]:
242
+ """Get connection statistics"""
243
+ return {
244
+ "connected": self._connected,
245
+ "url": self.config.url,
246
+ "queued_messages": len(self._message_queue),
247
+ "pending_requests": len(self._pending_requests),
248
+ "handlers": list(self._message_handlers.keys())
249
+ }
@@ -0,0 +1,188 @@
1
+ """
2
+ Global WebSocket Configuration for UnrealOn Driver
3
+
4
+ Provides automatic WebSocket URL detection and configuration management.
5
+ No need to specify URLs in parser config files - everything is handled automatically.
6
+
7
+ Strict compliance with CRITICAL_REQUIREMENTS.md:
8
+ - Pydantic v2 models everywhere
9
+ - No Dict[str, Any] usage
10
+ - Complete type annotations
11
+ - Proper error handling
12
+ """
13
+
14
+ import os
15
+ import socket
16
+ from typing import Optional, Dict, Any
17
+ from enum import Enum
18
+ from pydantic import BaseModel, Field, ConfigDict, field_validator
19
+
20
+
21
+ class Environment(str, Enum):
22
+ """Environment types for automatic URL detection"""
23
+ DEVELOPMENT = "development"
24
+ PRODUCTION = "production"
25
+ LOCAL = "local"
26
+
27
+
28
+ class EnvironmentInfo(BaseModel):
29
+ """Environment detection information with full typing"""
30
+ model_config = ConfigDict(validate_assignment=True, extra="forbid")
31
+
32
+ detected_environment: Environment
33
+ websocket_url: str = Field(..., description="Detected WebSocket URL")
34
+ localhost_available: bool = Field(..., description="Whether localhost service is available")
35
+ environment_variables: Dict[str, Optional[str]] = Field(
36
+ default_factory=dict,
37
+ description="Relevant environment variables"
38
+ )
39
+
40
+
41
+ class GlobalWebSocketConfig(BaseModel):
42
+ """Global WebSocket configuration with automatic URL detection"""
43
+ model_config = ConfigDict(validate_assignment=True, extra="forbid")
44
+
45
+ # Production WebSocket URL
46
+ production_websocket_url: str = Field(
47
+ default="wss://ws.unrealon.com/ws",
48
+ description="Production WebSocket URL"
49
+ )
50
+
51
+ # Development WebSocket URL
52
+ development_websocket_url: str = Field(
53
+ default="ws://localhost:8002/ws",
54
+ description="Development WebSocket URL"
55
+ )
56
+
57
+ # Default environment
58
+ default_environment: Environment = Field(
59
+ default=Environment.PRODUCTION,
60
+ description="Default environment when detection fails"
61
+ )
62
+
63
+ @field_validator('production_websocket_url', 'development_websocket_url')
64
+ @classmethod
65
+ def validate_websocket_url(cls, v: str) -> str:
66
+ """Validate WebSocket URL format"""
67
+ if not v.startswith(('ws://', 'wss://')):
68
+ raise ValueError("WebSocket URL must start with ws:// or wss://")
69
+ return v
70
+
71
+ def get_websocket_url(self, environment: Optional[Environment] = None) -> str:
72
+ """Get WebSocket URL for specified environment"""
73
+ env = environment or self._detect_environment()
74
+
75
+ if env == Environment.PRODUCTION:
76
+ return self.production_websocket_url
77
+ else:
78
+ # Development and Local both use localhost
79
+ return self.development_websocket_url
80
+
81
+ def _detect_environment(self) -> Environment:
82
+ """Automatically detect environment based on various indicators"""
83
+
84
+ # Check explicit environment variable
85
+ env_var = os.getenv("UNREALON_ENV", "").lower()
86
+ if env_var == Environment.PRODUCTION.value:
87
+ return Environment.PRODUCTION
88
+ elif env_var == Environment.DEVELOPMENT.value:
89
+ return Environment.DEVELOPMENT
90
+ elif env_var == Environment.LOCAL.value:
91
+ return Environment.LOCAL
92
+
93
+ # Check if we're in development mode
94
+ if os.getenv("DEBUG", "").lower() in ("true", "1", "yes"):
95
+ return Environment.DEVELOPMENT
96
+
97
+ # Check if localhost services are available
98
+ if self._is_localhost_available():
99
+ return Environment.DEVELOPMENT
100
+
101
+ # Check common development indicators
102
+ development_indicators = [
103
+ os.getenv("NODE_ENV") == "development",
104
+ os.getenv("DJANGO_DEBUG", "").lower() in ("true", "1"),
105
+ os.getenv("FLASK_ENV") == "development",
106
+ os.path.exists(".env"),
107
+ os.path.exists("docker-compose.yml"),
108
+ os.path.exists("pyproject.toml") and os.getcwd().endswith("unrealon-rpc")
109
+ ]
110
+
111
+ if any(development_indicators):
112
+ return Environment.DEVELOPMENT
113
+
114
+ # Default to production for safety
115
+ return Environment.PRODUCTION
116
+
117
+ def _is_localhost_available(self) -> bool:
118
+ """Check if localhost WebSocket service is available"""
119
+ try:
120
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
121
+ sock.settimeout(1)
122
+ result = sock.connect_ex(('localhost', 8002))
123
+ return result == 0
124
+ except OSError:
125
+ return False
126
+ except Exception:
127
+ return False
128
+
129
+ def get_environment_info(self) -> EnvironmentInfo:
130
+ """Get current environment information for debugging"""
131
+ current_env = self._detect_environment()
132
+
133
+ env_vars = {
134
+ "UNREALON_ENV": os.getenv("UNREALON_ENV"),
135
+ "DEBUG": os.getenv("DEBUG"),
136
+ "NODE_ENV": os.getenv("NODE_ENV"),
137
+ "DJANGO_DEBUG": os.getenv("DJANGO_DEBUG"),
138
+ }
139
+
140
+ return EnvironmentInfo(
141
+ detected_environment=current_env,
142
+ websocket_url=self.get_websocket_url(current_env),
143
+ localhost_available=self._is_localhost_available(),
144
+ environment_variables=env_vars
145
+ )
146
+
147
+
148
+ # Global configuration instance
149
+ global_websocket_config = GlobalWebSocketConfig()
150
+
151
+
152
+ def get_websocket_url(environment: Optional[Environment] = None) -> str:
153
+ """Get WebSocket URL for current or specified environment"""
154
+ return global_websocket_config.get_websocket_url(environment)
155
+
156
+
157
+
158
+
159
+
160
+ def get_environment() -> Environment:
161
+ """Get current detected environment"""
162
+ return global_websocket_config._detect_environment()
163
+
164
+
165
+ def set_environment(environment: Environment) -> None:
166
+ """Override environment detection (for testing)"""
167
+ os.environ["UNREALON_ENV"] = environment.value
168
+
169
+
170
+ def get_debug_info() -> EnvironmentInfo:
171
+ """Get debug information about current configuration"""
172
+ return global_websocket_config.get_environment_info()
173
+
174
+
175
+ # Convenience functions for common use cases
176
+ def is_production() -> bool:
177
+ """Check if running in production environment"""
178
+ return get_environment() == Environment.PRODUCTION
179
+
180
+
181
+ def is_development() -> bool:
182
+ """Check if running in development environment"""
183
+ return get_environment() == Environment.DEVELOPMENT
184
+
185
+
186
+ def is_local() -> bool:
187
+ """Check if running in local environment"""
188
+ return get_environment() == Environment.LOCAL
@@ -0,0 +1,90 @@
1
+ """
2
+ WebSocket Manager for unrealon_driver.
3
+
4
+ Manages shared WebSocket connection for multiple use cases:
5
+ - Logging transport
6
+ - HTML analysis requests
7
+ - Other driver-server communication
8
+ """
9
+
10
+ import asyncio
11
+ from typing import Optional, Dict, Any, Callable, Awaitable
12
+ from .client import WebSocketClient, WebSocketConfig
13
+
14
+
15
+ class WebSocketManager:
16
+ """
17
+ Singleton WebSocket manager for the driver.
18
+
19
+ Provides shared WebSocket connection for:
20
+ - SmartLogger (log batching)
21
+ - ParserManager (HTML analysis)
22
+ - Other driver components
23
+ """
24
+
25
+ _instance: Optional['WebSocketManager'] = None
26
+ _client: Optional[WebSocketClient] = None
27
+
28
+ def __new__(cls) -> 'WebSocketManager':
29
+ if cls._instance is None:
30
+ cls._instance = super().__new__(cls)
31
+ return cls._instance
32
+
33
+ def __init__(self):
34
+ if not hasattr(self, '_initialized'):
35
+ self._initialized = True
36
+ self._client = None
37
+ self._config = None
38
+
39
+ async def initialize(self, config: WebSocketConfig) -> bool:
40
+ """Initialize WebSocket connection"""
41
+ if self._client:
42
+ await self._client.disconnect()
43
+
44
+ self._config = config
45
+ self._client = WebSocketClient(config)
46
+
47
+ return await self._client.connect()
48
+
49
+ async def send_message(self, message: Dict[str, Any]) -> bool:
50
+ """Send message via WebSocket"""
51
+ if not self._client:
52
+ return False
53
+ return await self._client.send_message(message)
54
+
55
+ async def send_request(self, message: Dict[str, Any], timeout: float = 30.0) -> Optional[Dict[str, Any]]:
56
+ """Send request and wait for response"""
57
+ if not self._client:
58
+ return None
59
+ return await self._client.send_request(message, timeout)
60
+
61
+ def add_message_handler(self, message_type: str, handler: Callable[[Dict[str, Any]], Awaitable[None]]):
62
+ """Add message handler"""
63
+ if self._client:
64
+ self._client.add_message_handler(message_type, handler)
65
+
66
+ async def disconnect(self):
67
+ """Disconnect WebSocket"""
68
+ if self._client:
69
+ await self._client.disconnect()
70
+ self._client = None
71
+
72
+ @property
73
+ def connected(self) -> bool:
74
+ """Check if connected"""
75
+ return self._client is not None and self._client.connected
76
+
77
+ @property
78
+ def client(self) -> Optional[WebSocketClient]:
79
+ """Get underlying client"""
80
+ return self._client
81
+
82
+ def get_stats(self) -> Dict[str, Any]:
83
+ """Get connection statistics"""
84
+ if self._client:
85
+ return self._client.get_stats()
86
+ return {"connected": False}
87
+
88
+
89
+ # Global instance
90
+ websocket_manager = WebSocketManager()