unrealon 1.1.1__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.5.dist-info/METADATA +621 -0
  3. unrealon-1.1.5.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,24 @@
1
+ """
2
+ Smart logging module for unrealon_driver.
3
+
4
+ Provides intelligent logging with batching, WebSocket transport, and fallback mechanisms.
5
+ """
6
+
7
+ from .smart_logger import SmartLogger, create_smart_logger
8
+ from .unified_logger import UnifiedLogger, create_unified_logger
9
+ from .models import LogEntry, LogLevel, LogContext
10
+
11
+ __all__ = [
12
+ # Main loggers
13
+ "SmartLogger",
14
+ "UnifiedLogger",
15
+
16
+ # Factory functions
17
+ "create_smart_logger",
18
+ "create_unified_logger",
19
+
20
+ # Models
21
+ "LogEntry",
22
+ "LogLevel",
23
+ "LogContext"
24
+ ]
@@ -0,0 +1,44 @@
1
+ """
2
+ Common models for smart logging system.
3
+ """
4
+
5
+ from typing import Optional, Dict, Any
6
+ from dataclasses import dataclass
7
+ from pydantic import BaseModel, Field, ConfigDict
8
+ from enum import Enum
9
+
10
+
11
+ class LogLevel(str, Enum):
12
+ """Log levels for driver logger"""
13
+ DEBUG = "DEBUG"
14
+ INFO = "INFO"
15
+ WARNING = "WARNING"
16
+ ERROR = "ERROR"
17
+ CRITICAL = "CRITICAL"
18
+
19
+
20
+ @dataclass
21
+ class LogEntry:
22
+ """Structure for log entry"""
23
+ timestamp: str
24
+ level: str
25
+ message: str
26
+ parser_id: str
27
+ session_id: Optional[str] = None
28
+ url: Optional[str] = None
29
+ operation: Optional[str] = None
30
+ extra: Optional[Dict[str, Any]] = None
31
+
32
+
33
+ class LogContext(BaseModel):
34
+ """Log context information"""
35
+ model_config = ConfigDict(
36
+ validate_assignment=True,
37
+ extra="forbid"
38
+ )
39
+
40
+ session_id: Optional[str] = Field(default=None)
41
+ command_id: Optional[str] = Field(default=None)
42
+ operation: Optional[str] = Field(default=None)
43
+ url: Optional[str] = Field(default=None)
44
+ additional_data: dict[str, Any] = Field(default_factory=dict)
@@ -0,0 +1,406 @@
1
+ """
2
+ SmartLogger: Intelligent logging with batching and WebSocket transport.
3
+
4
+ Features:
5
+ - Automatic batching of logs (every 5 seconds)
6
+ - Smart WebSocket connection management (daemon vs script mode)
7
+ - Local file/console logging as fallback
8
+ - Standard logging API for developers
9
+ - No blocking of main thread
10
+ """
11
+
12
+ import asyncio
13
+ import json
14
+ import logging
15
+ import time
16
+ import weakref
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Optional, Dict, Any, List
20
+ from dataclasses import dataclass, asdict
21
+ from collections import deque
22
+
23
+ import websockets
24
+ from websockets.exceptions import ConnectionClosed, WebSocketException
25
+
26
+
27
+ @dataclass
28
+ class LogEntry:
29
+ """Structure for log entry"""
30
+ timestamp: str
31
+ level: str
32
+ message: str
33
+ parser_id: str
34
+ session_id: Optional[str] = None
35
+ url: Optional[str] = None
36
+ operation: Optional[str] = None
37
+ extra: Optional[Dict[str, Any]] = None
38
+
39
+
40
+ class LogBuffer:
41
+ """Thread-safe buffer for accumulating logs"""
42
+
43
+ def __init__(self, max_size: int = 1000):
44
+ self.buffer: deque = deque(maxlen=max_size)
45
+ self.lock = asyncio.Lock()
46
+
47
+ async def add(self, entry: LogEntry):
48
+ """Add log entry to buffer"""
49
+ async with self.lock:
50
+ self.buffer.append(entry)
51
+
52
+ async def flush(self) -> List[LogEntry]:
53
+ """Get all logs and clear buffer"""
54
+ async with self.lock:
55
+ entries = list(self.buffer)
56
+ self.buffer.clear()
57
+ return entries
58
+
59
+ def size(self) -> int:
60
+ """Get buffer size"""
61
+ return len(self.buffer)
62
+
63
+
64
+ class ConnectionManager:
65
+ """Smart WebSocket connection manager"""
66
+
67
+ def __init__(self, bridge_logs_url: str, parser_id: str):
68
+ self.bridge_logs_url = bridge_logs_url
69
+ self.parser_id = parser_id
70
+ self.websocket = None
71
+ self.is_connected = False
72
+ self.connection_lock = asyncio.Lock()
73
+ self.last_activity = time.time()
74
+
75
+ # Connection mode
76
+ self.daemon_mode = False
77
+ self.connection_timeout = 30 # Close connection after 30s of inactivity
78
+
79
+ async def ensure_connection(self) -> bool:
80
+ """Ensure active WebSocket connection"""
81
+ async with self.connection_lock:
82
+ if self.is_connected and self.websocket:
83
+ return True
84
+
85
+ try:
86
+ self.websocket = await websockets.connect(
87
+ self.bridge_logs_url,
88
+ ping_interval=20,
89
+ ping_timeout=10
90
+ )
91
+ self.is_connected = True
92
+ self.last_activity = time.time()
93
+ return True
94
+
95
+ except Exception:
96
+ self.is_connected = False
97
+ self.websocket = None
98
+ return False
99
+
100
+ async def send_batch(self, entries: List[LogEntry]) -> bool:
101
+ """Send batch of logs"""
102
+ if not entries:
103
+ return True
104
+
105
+ if not await self.ensure_connection():
106
+ return False
107
+
108
+ try:
109
+ # Prepare batch
110
+ batch = {
111
+ "type": "log_batch",
112
+ "parser_id": self.parser_id,
113
+ "timestamp": datetime.now(timezone.utc).isoformat(),
114
+ "entries": [asdict(entry) for entry in entries]
115
+ }
116
+
117
+ # Send batch
118
+ await self.websocket.send(json.dumps(batch))
119
+ self.last_activity = time.time()
120
+
121
+ # In script mode, close connection immediately
122
+ if not self.daemon_mode:
123
+ await self.close()
124
+
125
+ return True
126
+
127
+ except Exception:
128
+ self.is_connected = False
129
+ if self.websocket:
130
+ try:
131
+ await self.websocket.close()
132
+ except:
133
+ pass
134
+ finally:
135
+ self.websocket = None
136
+ return False
137
+
138
+ async def close(self):
139
+ """Close WebSocket connection"""
140
+ async with self.connection_lock:
141
+ if self.websocket:
142
+ try:
143
+ await self.websocket.close()
144
+ except:
145
+ pass
146
+ finally:
147
+ self.websocket = None
148
+ self.is_connected = False
149
+
150
+ def set_daemon_mode(self, daemon_mode: bool):
151
+ """Set connection mode"""
152
+ self.daemon_mode = daemon_mode
153
+
154
+
155
+ class SmartLogger:
156
+ """
157
+ Smart logger with batching and WebSocket transport.
158
+
159
+ Features:
160
+ - Buffers logs in memory
161
+ - Sends batches every 5 seconds
162
+ - Smart WebSocket connection management
163
+ - Auto-detects daemon vs script mode
164
+ - Local logs as fallback
165
+ """
166
+
167
+ # Global registry for cleanup
168
+ _instances = weakref.WeakSet()
169
+ _cleanup_task = None
170
+
171
+ def __init__(
172
+ self,
173
+ parser_id: str,
174
+ bridge_logs_url: Optional[str] = None,
175
+ log_file: Optional[Path] = None,
176
+ console_enabled: bool = True,
177
+ batch_interval: float = 5.0,
178
+ daemon_mode: Optional[bool] = None
179
+ ):
180
+ self.parser_id = parser_id
181
+ self.bridge_logs_url = bridge_logs_url
182
+ self.batch_interval = batch_interval
183
+ self.session_id = None
184
+
185
+ # Local logger (always works)
186
+ self.local_logger = self._setup_local_logger(log_file, console_enabled)
187
+
188
+ # Bridge components (optional)
189
+ self.bridge_enabled = bridge_logs_url is not None
190
+ self.log_buffer = LogBuffer() if self.bridge_enabled else None
191
+ self.connection_manager = ConnectionManager(bridge_logs_url, parser_id) if self.bridge_enabled else None
192
+
193
+ # Detect daemon mode
194
+ if daemon_mode is None:
195
+ daemon_mode = self._detect_daemon_mode()
196
+
197
+ if self.connection_manager:
198
+ self.connection_manager.set_daemon_mode(daemon_mode)
199
+
200
+ # Batch timer (lazy initialization)
201
+ self._batch_task = None
202
+ self._batch_timer_started = False
203
+
204
+ # Register for cleanup
205
+ SmartLogger._instances.add(self)
206
+
207
+ # Global cleanup task (lazy initialization)
208
+ self._ensure_global_cleanup()
209
+
210
+ def info(self, message: str, **kwargs):
211
+ """Log INFO message"""
212
+ self._log("INFO", message, **kwargs)
213
+
214
+ def error(self, message: str, **kwargs):
215
+ """Log ERROR message"""
216
+ self._log("ERROR", message, **kwargs)
217
+
218
+ def warning(self, message: str, **kwargs):
219
+ """Log WARNING message"""
220
+ self._log("WARNING", message, **kwargs)
221
+
222
+ def debug(self, message: str, **kwargs):
223
+ """Log DEBUG message"""
224
+ self._log("DEBUG", message, **kwargs)
225
+
226
+ def critical(self, message: str, **kwargs):
227
+ """Log CRITICAL message"""
228
+ self._log("CRITICAL", message, **kwargs)
229
+
230
+ def set_session(self, session_id: str):
231
+ """Set session ID for all future logs"""
232
+ self.session_id = session_id
233
+
234
+ def _log(self, level: str, message: str, **kwargs):
235
+ """Internal logging method"""
236
+ # Local log (always, synchronous)
237
+ extra = {k: v for k, v in kwargs.items() if isinstance(v, (str, int, float, bool))}
238
+ getattr(self.local_logger, level.lower())(message, extra=extra)
239
+
240
+ # Bridge log (asynchronous, if enabled)
241
+ if self.bridge_enabled and self.log_buffer:
242
+ entry = LogEntry(
243
+ timestamp=datetime.now(timezone.utc).isoformat(),
244
+ level=level,
245
+ message=message,
246
+ parser_id=self.parser_id,
247
+ session_id=self.session_id,
248
+ url=kwargs.get('url'),
249
+ operation=kwargs.get('operation'),
250
+ extra=kwargs if kwargs else None
251
+ )
252
+
253
+ # Add to buffer (non-blocking)
254
+ asyncio.create_task(self.log_buffer.add(entry))
255
+
256
+ def _detect_daemon_mode(self) -> bool:
257
+ """Auto-detect daemon vs script mode"""
258
+ try:
259
+ # If there's an active event loop, likely daemon mode
260
+ asyncio.get_running_loop()
261
+ return True
262
+ except RuntimeError:
263
+ return False
264
+
265
+ def _ensure_global_cleanup(self):
266
+ """Ensure global cleanup task is started (lazy)"""
267
+ try:
268
+ # Only start if we have an event loop
269
+ asyncio.get_running_loop()
270
+ if SmartLogger._cleanup_task is None:
271
+ SmartLogger._cleanup_task = asyncio.create_task(SmartLogger._global_cleanup())
272
+ except RuntimeError:
273
+ # No event loop, will start later when needed
274
+ pass
275
+
276
+ def _ensure_batch_timer(self):
277
+ """Ensure batch timer is started (lazy)"""
278
+ if not self.bridge_enabled or self._batch_timer_started:
279
+ return
280
+
281
+ try:
282
+ # Only start if we have an event loop
283
+ asyncio.get_running_loop()
284
+ self._start_batch_timer()
285
+ self._batch_timer_started = True
286
+ except RuntimeError:
287
+ # No event loop, will start later when logging happens
288
+ pass
289
+
290
+ def _start_batch_timer(self):
291
+ """Start batch timer"""
292
+ if self._batch_task is None or self._batch_task.done():
293
+ self._batch_task = asyncio.create_task(self._batch_loop())
294
+
295
+ async def _batch_loop(self):
296
+ """Main batch sending loop"""
297
+ try:
298
+ while True:
299
+ await asyncio.sleep(self.batch_interval)
300
+ await self._send_batch()
301
+ except asyncio.CancelledError:
302
+ # Final batch send on cancellation
303
+ await self._send_batch()
304
+ raise
305
+
306
+ async def _send_batch(self):
307
+ """Send accumulated logs"""
308
+ if not self.log_buffer or not self.connection_manager:
309
+ return
310
+
311
+ entries = await self.log_buffer.flush()
312
+ if entries:
313
+ success = await self.connection_manager.send_batch(entries)
314
+ # If sending failed, logs are already saved locally
315
+
316
+ async def flush(self):
317
+ """Force send all accumulated logs"""
318
+ if self.bridge_enabled:
319
+ await self._send_batch()
320
+
321
+ async def close(self):
322
+ """Close logger and cleanup resources"""
323
+ # Send remaining logs
324
+ await self.flush()
325
+
326
+ # Stop batch timer
327
+ if self._batch_task and not self._batch_task.done():
328
+ self._batch_task.cancel()
329
+ try:
330
+ await self._batch_task
331
+ except asyncio.CancelledError:
332
+ pass
333
+
334
+ # Close connection
335
+ if self.connection_manager:
336
+ await self.connection_manager.close()
337
+
338
+ def _setup_local_logger(self, log_file: Optional[Path], console_enabled: bool):
339
+ """Setup local file/console logger"""
340
+ logger = logging.getLogger(f"unrealon_parser_{self.parser_id}")
341
+ logger.setLevel(logging.DEBUG)
342
+
343
+ # Clear existing handlers
344
+ logger.handlers.clear()
345
+
346
+ # File handler
347
+ if log_file:
348
+ log_file.parent.mkdir(parents=True, exist_ok=True)
349
+ file_handler = logging.FileHandler(log_file)
350
+ file_handler.setFormatter(
351
+ logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
352
+ )
353
+ logger.addHandler(file_handler)
354
+
355
+ # Console handler
356
+ if console_enabled:
357
+ console_handler = logging.StreamHandler()
358
+ console_handler.setFormatter(
359
+ logging.Formatter('%(asctime)s - [%(levelname)s] %(message)s')
360
+ )
361
+ logger.addHandler(console_handler)
362
+
363
+ return logger
364
+
365
+ @classmethod
366
+ async def _global_cleanup(cls):
367
+ """Global cleanup of all loggers on program exit"""
368
+ try:
369
+ # Wait for program termination
370
+ while True:
371
+ await asyncio.sleep(10)
372
+ except asyncio.CancelledError:
373
+ # Close all active loggers
374
+ for logger in list(cls._instances):
375
+ try:
376
+ await logger.close()
377
+ except:
378
+ pass
379
+
380
+ def __del__(self):
381
+ """Destructor - attempt to cleanup resources"""
382
+ if self._batch_task and not self._batch_task.done():
383
+ self._batch_task.cancel()
384
+
385
+
386
+ def create_smart_logger(
387
+ parser_id: str,
388
+ bridge_logs_url: Optional[str] = None,
389
+ **kwargs
390
+ ) -> SmartLogger:
391
+ """
392
+ Create smart logger with optimal settings.
393
+
394
+ Args:
395
+ parser_id: Parser identifier
396
+ bridge_logs_url: WebSocket URL for Bridge logs (ws://localhost:8001/logs)
397
+ **kwargs: Additional parameters
398
+
399
+ Returns:
400
+ Configured SmartLogger instance
401
+ """
402
+ return SmartLogger(
403
+ parser_id=parser_id,
404
+ bridge_logs_url=bridge_logs_url,
405
+ **kwargs
406
+ )