unrealon 1.1.6__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {unrealon-1.1.6.dist-info/licenses → unrealon-2.0.5.dist-info}/LICENSE +1 -1
  2. unrealon-2.0.5.dist-info/METADATA +491 -0
  3. unrealon-2.0.5.dist-info/RECORD +128 -0
  4. {unrealon-1.1.6.dist-info → unrealon-2.0.5.dist-info}/WHEEL +2 -1
  5. unrealon-2.0.5.dist-info/entry_points.txt +3 -0
  6. unrealon-2.0.5.dist-info/top_level.txt +3 -0
  7. unrealon_browser/__init__.py +5 -6
  8. unrealon_browser/cli/browser_cli.py +18 -9
  9. unrealon_browser/cli/interactive_mode.py +13 -4
  10. unrealon_browser/core/browser_manager.py +29 -16
  11. unrealon_browser/dto/__init__.py +21 -0
  12. unrealon_browser/dto/bot_detection.py +175 -0
  13. unrealon_browser/dto/models/config.py +9 -3
  14. unrealon_browser/managers/__init__.py +1 -1
  15. unrealon_browser/managers/logger_bridge.py +1 -4
  16. unrealon_browser/stealth/__init__.py +27 -0
  17. unrealon_browser/stealth/bypass_techniques.pyc +0 -0
  18. unrealon_browser/stealth/manager.pyc +0 -0
  19. unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
  20. unrealon_browser/stealth/playwright_stealth.pyc +0 -0
  21. unrealon_browser/stealth/scanner_tester.pyc +0 -0
  22. unrealon_browser/stealth/undetected_chrome.pyc +0 -0
  23. unrealon_core/__init__.py +172 -0
  24. unrealon_core/config/__init__.py +16 -0
  25. unrealon_core/config/environment.py +151 -0
  26. unrealon_core/config/urls.py +94 -0
  27. unrealon_core/enums/__init__.py +24 -0
  28. unrealon_core/enums/status.py +216 -0
  29. unrealon_core/enums/types.py +240 -0
  30. unrealon_core/error_handling/__init__.py +45 -0
  31. unrealon_core/error_handling/circuit_breaker.py +292 -0
  32. unrealon_core/error_handling/error_context.py +324 -0
  33. unrealon_core/error_handling/recovery.py +371 -0
  34. unrealon_core/error_handling/retry.py +268 -0
  35. unrealon_core/exceptions/__init__.py +46 -0
  36. unrealon_core/exceptions/base.py +292 -0
  37. unrealon_core/exceptions/communication.py +22 -0
  38. unrealon_core/exceptions/driver.py +11 -0
  39. unrealon_core/exceptions/proxy.py +11 -0
  40. unrealon_core/exceptions/task.py +12 -0
  41. unrealon_core/exceptions/validation.py +17 -0
  42. unrealon_core/models/__init__.py +79 -0
  43. unrealon_core/models/arq_context.py +252 -0
  44. unrealon_core/models/arq_responses.py +125 -0
  45. unrealon_core/models/base.py +291 -0
  46. unrealon_core/models/bridge_stats.py +58 -0
  47. unrealon_core/models/communication.py +39 -0
  48. unrealon_core/models/connection_stats.py +47 -0
  49. unrealon_core/models/driver.py +30 -0
  50. unrealon_core/models/driver_details.py +98 -0
  51. unrealon_core/models/logging.py +28 -0
  52. unrealon_core/models/task.py +21 -0
  53. unrealon_core/models/typed_responses.py +210 -0
  54. unrealon_core/models/websocket/__init__.py +91 -0
  55. unrealon_core/models/websocket/base.py +49 -0
  56. unrealon_core/models/websocket/config.py +200 -0
  57. unrealon_core/models/websocket/driver.py +215 -0
  58. unrealon_core/models/websocket/errors.py +138 -0
  59. unrealon_core/models/websocket/heartbeat.py +100 -0
  60. unrealon_core/models/websocket/logging.py +261 -0
  61. unrealon_core/models/websocket/proxy.py +496 -0
  62. unrealon_core/models/websocket/tasks.py +275 -0
  63. unrealon_core/models/websocket/utils.py +153 -0
  64. unrealon_core/models/websocket_session.py +144 -0
  65. unrealon_core/monitoring/__init__.py +43 -0
  66. unrealon_core/monitoring/alerts.py +398 -0
  67. unrealon_core/monitoring/dashboard.py +307 -0
  68. unrealon_core/monitoring/health_check.py +354 -0
  69. unrealon_core/monitoring/metrics.py +352 -0
  70. unrealon_core/utils/__init__.py +11 -0
  71. unrealon_core/utils/time.py +61 -0
  72. unrealon_core/version.py +219 -0
  73. unrealon_driver/__init__.py +90 -51
  74. unrealon_driver/core_module/__init__.py +34 -0
  75. unrealon_driver/core_module/base.py +184 -0
  76. unrealon_driver/core_module/config.py +30 -0
  77. unrealon_driver/core_module/event_manager.py +127 -0
  78. unrealon_driver/core_module/protocols.py +98 -0
  79. unrealon_driver/core_module/registry.py +146 -0
  80. unrealon_driver/decorators/__init__.py +15 -0
  81. unrealon_driver/decorators/retry.py +117 -0
  82. unrealon_driver/decorators/schedule.py +137 -0
  83. unrealon_driver/decorators/task.py +61 -0
  84. unrealon_driver/decorators/timing.py +132 -0
  85. unrealon_driver/driver/__init__.py +20 -0
  86. unrealon_driver/driver/communication/__init__.py +10 -0
  87. unrealon_driver/driver/communication/session.py +203 -0
  88. unrealon_driver/driver/communication/websocket_client.py +205 -0
  89. unrealon_driver/driver/core/__init__.py +10 -0
  90. unrealon_driver/driver/core/config.py +175 -0
  91. unrealon_driver/driver/core/driver.py +221 -0
  92. unrealon_driver/driver/factory/__init__.py +9 -0
  93. unrealon_driver/driver/factory/manager_factory.py +130 -0
  94. unrealon_driver/driver/lifecycle/__init__.py +11 -0
  95. unrealon_driver/driver/lifecycle/daemon.py +76 -0
  96. unrealon_driver/driver/lifecycle/initialization.py +97 -0
  97. unrealon_driver/driver/lifecycle/shutdown.py +48 -0
  98. unrealon_driver/driver/monitoring/__init__.py +9 -0
  99. unrealon_driver/driver/monitoring/health.py +63 -0
  100. unrealon_driver/driver/utilities/__init__.py +10 -0
  101. unrealon_driver/driver/utilities/logging.py +51 -0
  102. unrealon_driver/driver/utilities/serialization.py +61 -0
  103. unrealon_driver/managers/__init__.py +32 -0
  104. unrealon_driver/managers/base.py +174 -0
  105. unrealon_driver/managers/browser.py +98 -0
  106. unrealon_driver/managers/cache.py +116 -0
  107. unrealon_driver/managers/http.py +107 -0
  108. unrealon_driver/managers/logger.py +286 -0
  109. unrealon_driver/managers/proxy.py +99 -0
  110. unrealon_driver/managers/registry.py +87 -0
  111. unrealon_driver/managers/threading.py +54 -0
  112. unrealon_driver/managers/update.py +107 -0
  113. unrealon_driver/utils/__init__.py +9 -0
  114. unrealon_driver/utils/time.py +10 -0
  115. unrealon-1.1.6.dist-info/METADATA +0 -625
  116. unrealon-1.1.6.dist-info/RECORD +0 -55
  117. unrealon-1.1.6.dist-info/entry_points.txt +0 -9
  118. unrealon_browser/managers/stealth.py +0 -388
  119. unrealon_driver/README.md +0 -0
  120. unrealon_driver/exceptions.py +0 -33
  121. unrealon_driver/html_analyzer/__init__.py +0 -32
  122. unrealon_driver/html_analyzer/cleaner.py +0 -657
  123. unrealon_driver/html_analyzer/config.py +0 -64
  124. unrealon_driver/html_analyzer/manager.py +0 -247
  125. unrealon_driver/html_analyzer/models.py +0 -115
  126. unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
  127. unrealon_driver/models/__init__.py +0 -31
  128. unrealon_driver/models/websocket.py +0 -98
  129. unrealon_driver/parser/__init__.py +0 -36
  130. unrealon_driver/parser/cli_manager.py +0 -142
  131. unrealon_driver/parser/daemon_manager.py +0 -403
  132. unrealon_driver/parser/managers/__init__.py +0 -25
  133. unrealon_driver/parser/managers/config.py +0 -293
  134. unrealon_driver/parser/managers/error.py +0 -412
  135. unrealon_driver/parser/managers/result.py +0 -321
  136. unrealon_driver/parser/parser_manager.py +0 -458
  137. unrealon_driver/smart_logging/__init__.py +0 -24
  138. unrealon_driver/smart_logging/models.py +0 -44
  139. unrealon_driver/smart_logging/smart_logger.py +0 -406
  140. unrealon_driver/smart_logging/unified_logger.py +0 -525
  141. unrealon_driver/websocket/__init__.py +0 -31
  142. unrealon_driver/websocket/client.py +0 -249
  143. unrealon_driver/websocket/config.py +0 -188
  144. unrealon_driver/websocket/manager.py +0 -90
@@ -1,458 +0,0 @@
1
- """
2
- Parser Manager - Unified parser management system with Pydantic v2
3
-
4
- Strict compliance with CRITICAL_REQUIREMENTS.md:
5
- - No Dict[str, Any] usage
6
- - Complete type annotations
7
- - Pydantic v2 models everywhere
8
- - Custom exception hierarchy
9
- - No try blocks in imports
10
- """
11
-
12
- from datetime import datetime, timezone
13
- from typing import Optional
14
- from pydantic import BaseModel, Field, ConfigDict
15
-
16
- from .managers import ConfigManager, ParserConfig, ResultManager, ErrorManager, RetryConfig
17
-
18
- # from unrealon_browser import BrowserManager, BrowserConfig # Temporary comment to avoid circular import
19
-
20
- # Import UnifiedLogger and HTML Analyzer
21
- from unrealon_driver.smart_logging import create_unified_logger, LogLevel
22
- from unrealon_driver.html_analyzer import create_html_analyzer, HTMLCleaningConfig, HTMLParseResult
23
- from unrealon_driver.websocket import websocket_manager, WebSocketConfig
24
- from unrealon_browser.core import BrowserManager
25
- from unrealon_browser.dto.models.config import BrowserConfig
26
-
27
-
28
- class ParserManagerConfig(BaseModel):
29
- """Complete parser manager configuration"""
30
-
31
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
32
-
33
- # Core configuration
34
- parser_config: ParserConfig = Field(default_factory=ParserConfig, description="Core parser configuration")
35
-
36
- # Logging configuration (simplified)
37
- console_enabled: bool = Field(default=True, description="Enable console logging")
38
- file_enabled: bool = Field(default=True, description="Enable file logging")
39
- console_level: LogLevel = Field(default=LogLevel.INFO, description="Console log level")
40
- file_level: LogLevel = Field(default=LogLevel.DEBUG, description="File log level")
41
- html_config: HTMLCleaningConfig = Field(default_factory=HTMLCleaningConfig, description="HTML cleaning configuration")
42
- retry_config: RetryConfig = Field(default_factory=RetryConfig, description="Retry configuration")
43
-
44
- # Bridge settings
45
- bridge_enabled: bool = Field(default=True, description="Enable bridge connection")
46
- auto_register: bool = Field(default=True, description="Auto-register parser with bridge")
47
-
48
- # SmartLogger settings
49
- bridge_logs_url: Optional[str] = Field(default=None, description="Bridge logs WebSocket URL (ws://localhost:8001/logs)")
50
- log_batch_interval: float = Field(default=5.0, description="Log batch interval in seconds")
51
- daemon_mode: Optional[bool] = Field(default=None, description="Daemon mode for logging (None = auto-detect)")
52
-
53
-
54
- class ParserStats(BaseModel):
55
- """Comprehensive parser statistics"""
56
-
57
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
58
-
59
- parser_id: str = Field(...)
60
- parser_name: str = Field(...)
61
- session_id: Optional[str] = Field(default=None)
62
-
63
- # Timing
64
- session_start: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
65
- session_duration: float = Field(default=0.0, ge=0.0)
66
-
67
- # Operations
68
- operations_completed: int = Field(default=0, ge=0)
69
- operations_failed: int = Field(default=0, ge=0)
70
- success_rate: float = Field(default=0.0, ge=0.0, le=100.0)
71
-
72
- # Content processing
73
- pages_processed: int = Field(default=0, ge=0)
74
- html_cleaned_count: int = Field(default=0, ge=0)
75
- total_html_reduction: float = Field(default=0.0, ge=0.0)
76
-
77
- # Errors
78
- total_errors: int = Field(default=0, ge=0)
79
- retries_attempted: int = Field(default=0, ge=0)
80
-
81
- # Bridge
82
- bridge_connected: bool = Field(default=False)
83
- bridge_messages_sent: int = Field(default=0, ge=0)
84
-
85
-
86
- class ParserManagerError(Exception):
87
- """Base exception for parser manager"""
88
-
89
- def __init__(self, message: str, operation: str, details: Optional[dict[str, str]] = None):
90
- self.message = message
91
- self.operation = operation
92
- self.details = details or {}
93
- super().__init__(message)
94
-
95
-
96
- class InitializationError(ParserManagerError):
97
- """Raised when parser manager initialization fails"""
98
-
99
- pass
100
-
101
-
102
- class OperationError(ParserManagerError):
103
- """Raised when parser operation fails"""
104
-
105
- pass
106
-
107
-
108
- class ParserManager:
109
- """
110
- 🚀 Parser Manager - Unified parser management system
111
-
112
- Features:
113
- - Unified Configuration: Single config for all managers
114
- - Automatic Lifecycle: Handles initialization, execution, cleanup
115
- - Error Recovery: Smart retry logic with exponential backoff
116
- - Performance Monitoring: Comprehensive statistics and metrics
117
- - Bridge Integration: Seamless communication with Django
118
- - Type Safety: Full Pydantic v2 compliance
119
-
120
- Usage:
121
- config = ParserManagerConfig(
122
- parser_config=ParserConfig(parser_name="MyParser"),
123
- bridge_enabled=True
124
- )
125
-
126
- async with ParserManager(config) as parser:
127
- # Navigate and extract
128
- html = await parser.get_html("https://example.com")
129
- cleaned_html = await parser.clean_html(html)
130
- result = await parser.analyze_html(cleaned_html)
131
-
132
- # Results are automatically tracked
133
- stats = parser.get_stats()
134
- """
135
-
136
- def __init__(self, config: ParserManagerConfig):
137
- self.config = config
138
-
139
- # Initialize managers
140
- self.config_manager = ConfigManager(self.config.parser_config)
141
- self.result_manager = ResultManager(self.config.parser_config.parser_id)
142
- self.error_manager = ErrorManager()
143
- # Initialize HTML Analyzer (WebSocket URL auto-detected)
144
- self.html_analyzer = create_html_analyzer(parser_id=self.config.parser_config.parser_id, api_key=self.config.parser_config.api_key, cleaning_config=self.config.html_config)
145
- # Create default browser config
146
- browser_config = BrowserConfig(parser_name=self.config.parser_config.parser_name)
147
- self.browser_manager = BrowserManager(browser_config, parser_id=self.config.parser_config.parser_id)
148
-
149
- # Initialize WebSocket connection config
150
- if self.config.bridge_logs_url:
151
- self._websocket_config = WebSocketConfig(url=self.config.bridge_logs_url, api_key=self.config.parser_config.api_key, parser_id=self.config.parser_config.parser_id)
152
- else:
153
- self._websocket_config = None
154
-
155
- # Initialize UnifiedLogger
156
- log_file = None
157
- if self.config.parser_config.system_dir:
158
- log_file = self.config.parser_config.system_dir / "logs" / f"{self.config.parser_config.parser_name}.log"
159
-
160
- self.logger = create_unified_logger(
161
- parser_id=self.config.parser_config.parser_id,
162
- parser_name=self.config.parser_config.parser_name,
163
- bridge_logs_url=self.config.bridge_logs_url,
164
- log_file=log_file,
165
- console_enabled=self.config.console_enabled,
166
- file_enabled=self.config.file_enabled,
167
- console_level=self.config.console_level,
168
- file_level=self.config.file_level,
169
- batch_interval=self.config.log_batch_interval,
170
- daemon_mode=self.config.daemon_mode,
171
- )
172
-
173
- # State
174
- self._is_initialized = False
175
- self._session_id: Optional[str] = None
176
- self._stats = ParserStats(parser_id=self.config.parser_config.parser_id, parser_name=self.config.parser_config.parser_name)
177
-
178
- # Register retry configurations
179
- self._setup_retry_configs()
180
-
181
- # ==========================================
182
- # LIFECYCLE MANAGEMENT
183
- # ==========================================
184
-
185
- async def initialize(self) -> None:
186
- """Initialize all managers and establish connections"""
187
- if self._is_initialized:
188
- return
189
-
190
- try:
191
- self.logger.info("🚀 Initializing parser manager...")
192
-
193
- # Initialize WebSocket connection
194
- if self._websocket_config:
195
- await websocket_manager.initialize(self._websocket_config)
196
- if websocket_manager.connected:
197
- self.logger.info("🔌 WebSocket connected")
198
- else:
199
- self.logger.warning("🔌 WebSocket connection failed")
200
-
201
- # Initialize browser
202
- await self.browser_manager.initialize_async()
203
-
204
- self._is_initialized = True
205
- self.logger.info("✅ Parser manager initialized successfully")
206
-
207
- except Exception as e:
208
- self.error_manager.record_error(e, "initialization")
209
- raise InitializationError(message=f"Failed to initialize parser manager: {e}", operation="initialization") from e
210
-
211
- async def cleanup(self) -> None:
212
- """Clean up all resources"""
213
- self.logger.info("🧹 Cleaning up parser manager...")
214
-
215
- cleanup_errors = []
216
-
217
- # End session if active
218
- if self._session_id:
219
- await self.end_session()
220
-
221
- # Cleanup browser
222
- try:
223
- await self.browser_manager.close_async()
224
- except Exception as e:
225
- cleanup_errors.append(f"browser_cleanup: {e}")
226
-
227
- # Disconnect WebSocket
228
- try:
229
- await websocket_manager.disconnect()
230
- except Exception as e:
231
- cleanup_errors.append(f"websocket_disconnect: {e}")
232
-
233
- # Update final stats
234
- self._update_session_stats()
235
-
236
- # Cleanup UnifiedLogger
237
- try:
238
- await self.logger.close()
239
- except Exception as e:
240
- cleanup_errors.append(f"logger_cleanup: {e}")
241
-
242
- # Log cleanup errors but don't raise
243
- if cleanup_errors:
244
- self.logger.warning(f"Cleanup errors: {'; '.join(cleanup_errors)}")
245
-
246
- self.logger.info("✅ Parser manager cleanup completed")
247
-
248
- # ==========================================
249
- # CORE PARSING METHODS
250
- # ==========================================
251
-
252
- async def get_html(self, url: str) -> str:
253
- """Get HTML content from URL with error handling"""
254
- if not self._is_initialized:
255
- await self.initialize()
256
-
257
- @self.error_manager.with_retry("get_html", self.config.retry_config)
258
- async def _get_html_with_retry():
259
- self.logger.url_access(url, "fetching")
260
- html = await self.browser_manager.get_html(url)
261
- self._stats.pages_processed += 1
262
- return html
263
-
264
- try:
265
- return await _get_html_with_retry()
266
- except Exception as e:
267
- self._stats.total_errors += 1
268
- raise OperationError(message=f"Failed to get HTML from {url}: {e}", operation="get_html", details={"url": url}) from e
269
-
270
- async def parse_url(self, url: str, instructions: Optional[str] = None, **kwargs) -> HTMLParseResult:
271
- """Complete parsing workflow: fetch → clean → analyze via HTML Analyzer"""
272
- operation = self.result_manager.start_operation()
273
-
274
- try:
275
- self.logger.start_operation("parse_url")
276
-
277
- # Fetch HTML
278
- html = await self.get_html(url)
279
-
280
- # Delegate complete HTML processing to HTML Analyzer
281
- analysis_result = await self.html_analyzer.parse_html(html=html, url=url, instructions=instructions, session_id=self._session_id, **kwargs)
282
-
283
- # Update stats from HTML Analyzer
284
- html_stats = self.html_analyzer.get_stats()
285
- self._stats.html_cleaned_count += html_stats.cleaned_count
286
- self._stats.total_html_reduction += html_stats.total_reduction
287
-
288
- # Complete operation
289
- success = analysis_result.success == "true"
290
- self.result_manager.complete_operation(data=[], source_urls=[url], success=success)
291
-
292
- if success:
293
- self._stats.operations_completed += 1
294
- else:
295
- self._stats.operations_failed += 1
296
-
297
- self.logger.end_operation("parse_url", operation.duration_seconds)
298
-
299
- return analysis_result
300
-
301
- except Exception as e:
302
- self.result_manager.complete_operation(data=[], source_urls=[url], success=False, error_message=str(e))
303
-
304
- self._stats.operations_failed += 1
305
- self.logger.error(f"❌ Failed parse_url: {str(e)}")
306
- raise
307
-
308
- # ==========================================
309
- # SESSION MANAGEMENT (Simplified - Local Only)
310
- # ==========================================
311
-
312
- async def start_session(self, session_type: str = "parsing") -> str:
313
- """Start a new parsing session (local only)"""
314
- import uuid
315
-
316
- session_id = f"{session_type}_{uuid.uuid4().hex[:8]}"
317
- self._session_id = session_id
318
- self._stats.session_id = session_id
319
- self.logger.set_session(session_id)
320
-
321
- self.logger.info(f"📋 Local session started: {session_id}")
322
- return session_id
323
-
324
- async def end_session(self) -> None:
325
- """End current parsing session"""
326
- if not self._session_id:
327
- return
328
-
329
- self.logger.info(f"📋 Local session ended: {self._session_id}")
330
- self._session_id = None
331
- self._stats.session_id = None
332
-
333
- # ==========================================
334
- # STATISTICS AND MONITORING
335
- # ==========================================
336
-
337
- def get_stats(self) -> ParserStats:
338
- """Get comprehensive parser statistics"""
339
- self._update_session_stats()
340
- return ParserStats.model_validate(self._stats.model_dump())
341
-
342
- def get_manager_stats(self) -> dict[str, dict[str, str]]:
343
- """Get statistics from all managers"""
344
- return {
345
- "result_manager": self.result_manager.get_stats(),
346
- "error_manager": self.error_manager.get_error_stats(),
347
- "browser_manager": self.browser_manager.get_stats().model_dump(mode="json"),
348
- # Logging stats removed - using UnifiedLogger now
349
- }
350
-
351
- async def health_check(self) -> dict[str, str]:
352
- """Comprehensive health check"""
353
- health = {"status": "healthy", "parser_id": self.config.parser_config.parser_id, "parser_name": self.config.parser_config.parser_name, "initialized": str(self._is_initialized), "session_active": str(self._session_id is not None)}
354
-
355
- # Check browser health
356
- try:
357
- browser_health = await self.browser_manager.health_check()
358
- health["browser_status"] = browser_health.get("status", "unknown")
359
- except Exception as e:
360
- health["browser_status"] = f"error: {e}"
361
-
362
- # Check WebSocket connection health
363
- health["websocket_connected"] = str(websocket_manager.connected)
364
-
365
- return health
366
-
367
- # ==========================================
368
- # INTERNAL METHODS
369
- # ==========================================
370
-
371
- def _setup_retry_configs(self) -> None:
372
- """Setup retry configurations for different operations"""
373
- # Navigation retry config
374
- nav_config = RetryConfig(max_attempts=3, base_delay=2.0, retry_on_exceptions=["NavigationError", "TimeoutError", "ConnectionError"])
375
- self.error_manager.register_retry_config("get_html", nav_config)
376
-
377
- # Bridge communication retry config
378
- bridge_config = RetryConfig(max_attempts=2, base_delay=1.0, retry_on_exceptions=["ConnectionError", "TimeoutError"])
379
- self.error_manager.register_retry_config("analyze_html", bridge_config)
380
-
381
- def _update_session_stats(self) -> None:
382
- """Update session statistics"""
383
- self._stats.session_duration = (datetime.now(timezone.utc) - self._stats.session_start).total_seconds()
384
-
385
- total_operations = self._stats.operations_completed + self._stats.operations_failed
386
- if total_operations > 0:
387
- self._stats.success_rate = (self._stats.operations_completed / total_operations) * 100.0
388
-
389
- # ==========================================
390
- # CONTEXT MANAGER SUPPORT
391
- # ==========================================
392
-
393
- async def __aenter__(self):
394
- """Async context manager entry"""
395
- await self.initialize()
396
- return self
397
-
398
- async def __aexit__(self, exc_type, exc_val, exc_tb):
399
- """Async context manager exit"""
400
- await self.cleanup()
401
- return False
402
-
403
- # ==========================================
404
- # LOGGING CONVENIENCE
405
- # ==========================================
406
-
407
- def set_session_id(self, session_id: str):
408
- """Set session ID for both internal tracking and logger"""
409
- self._session_id = session_id
410
- self.logger.set_session(session_id)
411
-
412
- async def flush_logs(self):
413
- """Force flush all accumulated logs"""
414
- await self.logger.flush()
415
-
416
- def __repr__(self) -> str:
417
- return f"<ParserManager(id='{self.config.parser_config.parser_id}', name='{self.config.parser_config.parser_name}')>"
418
-
419
-
420
- # ==========================================
421
- # CONVENIENCE FUNCTIONS
422
- # ==========================================
423
-
424
-
425
- def get_parser_manager(parser_name: str, parser_type: str = "generic", **kwargs) -> ParserManager:
426
- """
427
- Get a parser manager instance with minimal configuration
428
-
429
- Args:
430
- parser_name: Name of the parser
431
- parser_type: Type of parser (generic, ecommerce, news, etc.)
432
- **kwargs: Additional configuration options
433
-
434
- Returns:
435
- Configured ParserManager instance
436
- """
437
- parser_config = ParserConfig(parser_name=parser_name, parser_type=parser_type, **{k: v for k, v in kwargs.items() if k in ParserConfig.model_fields})
438
-
439
- config = ParserManagerConfig(parser_config=parser_config, **{k: v for k, v in kwargs.items() if k in ParserManagerConfig.model_fields and k not in ["parser_config"]})
440
-
441
- return ParserManager(config)
442
-
443
-
444
- async def quick_parse(url: str, parser_name: str = "QuickParser", instructions: Optional[str] = None, **kwargs) -> HTMLParseResult:
445
- """
446
- Quick parsing convenience function
447
-
448
- Args:
449
- url: URL to parse
450
- parser_name: Name for the parser
451
- instructions: Optional parsing instructions
452
- **kwargs: Additional configuration
453
-
454
- Returns:
455
- Parsing result
456
- """
457
- async with get_parser_manager(parser_name, **kwargs) as parser:
458
- return await parser.parse_url(url, instructions, **kwargs)
@@ -1,24 +0,0 @@
1
- """
2
- Smart logging module for unrealon_driver.
3
-
4
- Provides intelligent logging with batching, WebSocket transport, and fallback mechanisms.
5
- """
6
-
7
- from .smart_logger import SmartLogger, create_smart_logger
8
- from .unified_logger import UnifiedLogger, create_unified_logger
9
- from .models import LogEntry, LogLevel, LogContext
10
-
11
- __all__ = [
12
- # Main loggers
13
- "SmartLogger",
14
- "UnifiedLogger",
15
-
16
- # Factory functions
17
- "create_smart_logger",
18
- "create_unified_logger",
19
-
20
- # Models
21
- "LogEntry",
22
- "LogLevel",
23
- "LogContext"
24
- ]
@@ -1,44 +0,0 @@
1
- """
2
- Common models for smart logging system.
3
- """
4
-
5
- from typing import Optional, Dict, Any
6
- from dataclasses import dataclass
7
- from pydantic import BaseModel, Field, ConfigDict
8
- from enum import Enum
9
-
10
-
11
- class LogLevel(str, Enum):
12
- """Log levels for driver logger"""
13
- DEBUG = "DEBUG"
14
- INFO = "INFO"
15
- WARNING = "WARNING"
16
- ERROR = "ERROR"
17
- CRITICAL = "CRITICAL"
18
-
19
-
20
- @dataclass
21
- class LogEntry:
22
- """Structure for log entry"""
23
- timestamp: str
24
- level: str
25
- message: str
26
- parser_id: str
27
- session_id: Optional[str] = None
28
- url: Optional[str] = None
29
- operation: Optional[str] = None
30
- extra: Optional[Dict[str, Any]] = None
31
-
32
-
33
- class LogContext(BaseModel):
34
- """Log context information"""
35
- model_config = ConfigDict(
36
- validate_assignment=True,
37
- extra="forbid"
38
- )
39
-
40
- session_id: Optional[str] = Field(default=None)
41
- command_id: Optional[str] = Field(default=None)
42
- operation: Optional[str] = Field(default=None)
43
- url: Optional[str] = Field(default=None)
44
- additional_data: dict[str, Any] = Field(default_factory=dict)