unrealon 1.1.5__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.5.dist-info/licenses → unrealon-2.0.4.dist-info}/LICENSE +1 -1
- unrealon-2.0.4.dist-info/METADATA +491 -0
- unrealon-2.0.4.dist-info/RECORD +129 -0
- {unrealon-1.1.5.dist-info → unrealon-2.0.4.dist-info}/WHEEL +2 -1
- unrealon-2.0.4.dist-info/entry_points.txt +3 -0
- unrealon-2.0.4.dist-info/top_level.txt +3 -0
- unrealon_browser/__init__.py +5 -2
- unrealon_browser/cli/browser_cli.py +18 -9
- unrealon_browser/cli/interactive_mode.py +18 -7
- unrealon_browser/core/browser_manager.py +76 -13
- unrealon_browser/dto/__init__.py +21 -0
- unrealon_browser/dto/bot_detection.py +175 -0
- unrealon_browser/dto/models/config.py +14 -1
- unrealon_browser/managers/__init__.py +4 -1
- unrealon_browser/managers/logger_bridge.py +3 -6
- unrealon_browser/managers/page_wait_manager.py +198 -0
- unrealon_browser/stealth/__init__.py +27 -0
- unrealon_browser/stealth/bypass_techniques.pyc +0 -0
- unrealon_browser/stealth/manager.pyc +0 -0
- unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
- unrealon_browser/stealth/playwright_stealth.pyc +0 -0
- unrealon_browser/stealth/scanner_tester.pyc +0 -0
- unrealon_browser/stealth/undetected_chrome.pyc +0 -0
- unrealon_core/__init__.py +160 -0
- unrealon_core/config/__init__.py +16 -0
- unrealon_core/config/environment.py +98 -0
- unrealon_core/config/urls.py +93 -0
- unrealon_core/enums/__init__.py +24 -0
- unrealon_core/enums/status.py +216 -0
- unrealon_core/enums/types.py +240 -0
- unrealon_core/error_handling/__init__.py +45 -0
- unrealon_core/error_handling/circuit_breaker.py +292 -0
- unrealon_core/error_handling/error_context.py +324 -0
- unrealon_core/error_handling/recovery.py +371 -0
- unrealon_core/error_handling/retry.py +268 -0
- unrealon_core/exceptions/__init__.py +46 -0
- unrealon_core/exceptions/base.py +292 -0
- unrealon_core/exceptions/communication.py +22 -0
- unrealon_core/exceptions/driver.py +11 -0
- unrealon_core/exceptions/proxy.py +11 -0
- unrealon_core/exceptions/task.py +12 -0
- unrealon_core/exceptions/validation.py +17 -0
- unrealon_core/models/__init__.py +98 -0
- unrealon_core/models/arq_context.py +252 -0
- unrealon_core/models/arq_responses.py +125 -0
- unrealon_core/models/base.py +291 -0
- unrealon_core/models/bridge_stats.py +58 -0
- unrealon_core/models/communication.py +39 -0
- unrealon_core/models/config.py +47 -0
- unrealon_core/models/connection_stats.py +47 -0
- unrealon_core/models/driver.py +30 -0
- unrealon_core/models/driver_details.py +98 -0
- unrealon_core/models/logging.py +28 -0
- unrealon_core/models/task.py +21 -0
- unrealon_core/models/typed_responses.py +210 -0
- unrealon_core/models/websocket/__init__.py +91 -0
- unrealon_core/models/websocket/base.py +49 -0
- unrealon_core/models/websocket/config.py +200 -0
- unrealon_core/models/websocket/driver.py +215 -0
- unrealon_core/models/websocket/errors.py +138 -0
- unrealon_core/models/websocket/heartbeat.py +100 -0
- unrealon_core/models/websocket/logging.py +261 -0
- unrealon_core/models/websocket/proxy.py +496 -0
- unrealon_core/models/websocket/tasks.py +275 -0
- unrealon_core/models/websocket/utils.py +153 -0
- unrealon_core/models/websocket_session.py +144 -0
- unrealon_core/monitoring/__init__.py +43 -0
- unrealon_core/monitoring/alerts.py +398 -0
- unrealon_core/monitoring/dashboard.py +307 -0
- unrealon_core/monitoring/health_check.py +354 -0
- unrealon_core/monitoring/metrics.py +352 -0
- unrealon_core/utils/__init__.py +11 -0
- unrealon_core/utils/time.py +61 -0
- unrealon_core/version.py +219 -0
- unrealon_driver/__init__.py +88 -50
- unrealon_driver/core_module/__init__.py +34 -0
- unrealon_driver/core_module/base.py +184 -0
- unrealon_driver/core_module/config.py +30 -0
- unrealon_driver/core_module/event_manager.py +127 -0
- unrealon_driver/core_module/protocols.py +98 -0
- unrealon_driver/core_module/registry.py +146 -0
- unrealon_driver/decorators/__init__.py +15 -0
- unrealon_driver/decorators/retry.py +117 -0
- unrealon_driver/decorators/schedule.py +137 -0
- unrealon_driver/decorators/task.py +61 -0
- unrealon_driver/decorators/timing.py +132 -0
- unrealon_driver/driver/__init__.py +20 -0
- unrealon_driver/driver/communication/__init__.py +10 -0
- unrealon_driver/driver/communication/session.py +203 -0
- unrealon_driver/driver/communication/websocket_client.py +197 -0
- unrealon_driver/driver/core/__init__.py +10 -0
- unrealon_driver/driver/core/config.py +85 -0
- unrealon_driver/driver/core/driver.py +221 -0
- unrealon_driver/driver/factory/__init__.py +9 -0
- unrealon_driver/driver/factory/manager_factory.py +130 -0
- unrealon_driver/driver/lifecycle/__init__.py +11 -0
- unrealon_driver/driver/lifecycle/daemon.py +76 -0
- unrealon_driver/driver/lifecycle/initialization.py +97 -0
- unrealon_driver/driver/lifecycle/shutdown.py +48 -0
- unrealon_driver/driver/monitoring/__init__.py +9 -0
- unrealon_driver/driver/monitoring/health.py +63 -0
- unrealon_driver/driver/utilities/__init__.py +10 -0
- unrealon_driver/driver/utilities/logging.py +51 -0
- unrealon_driver/driver/utilities/serialization.py +61 -0
- unrealon_driver/managers/__init__.py +32 -0
- unrealon_driver/managers/base.py +174 -0
- unrealon_driver/managers/browser.py +98 -0
- unrealon_driver/managers/cache.py +116 -0
- unrealon_driver/managers/http.py +107 -0
- unrealon_driver/managers/logger.py +286 -0
- unrealon_driver/managers/proxy.py +99 -0
- unrealon_driver/managers/registry.py +87 -0
- unrealon_driver/managers/threading.py +54 -0
- unrealon_driver/managers/update.py +107 -0
- unrealon_driver/utils/__init__.py +9 -0
- unrealon_driver/utils/time.py +10 -0
- unrealon/__init__.py +0 -40
- unrealon-1.1.5.dist-info/METADATA +0 -621
- unrealon-1.1.5.dist-info/RECORD +0 -54
- unrealon-1.1.5.dist-info/entry_points.txt +0 -9
- unrealon_browser/managers/stealth.py +0 -388
- unrealon_driver/exceptions.py +0 -33
- unrealon_driver/html_analyzer/__init__.py +0 -32
- unrealon_driver/html_analyzer/cleaner.py +0 -657
- unrealon_driver/html_analyzer/config.py +0 -64
- unrealon_driver/html_analyzer/manager.py +0 -247
- unrealon_driver/html_analyzer/models.py +0 -115
- unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
- unrealon_driver/models/__init__.py +0 -31
- unrealon_driver/models/websocket.py +0 -98
- unrealon_driver/parser/__init__.py +0 -36
- unrealon_driver/parser/cli_manager.py +0 -142
- unrealon_driver/parser/daemon_manager.py +0 -403
- unrealon_driver/parser/managers/__init__.py +0 -25
- unrealon_driver/parser/managers/config.py +0 -293
- unrealon_driver/parser/managers/error.py +0 -412
- unrealon_driver/parser/managers/result.py +0 -321
- unrealon_driver/parser/parser_manager.py +0 -458
- unrealon_driver/smart_logging/__init__.py +0 -24
- unrealon_driver/smart_logging/models.py +0 -44
- unrealon_driver/smart_logging/smart_logger.py +0 -406
- unrealon_driver/smart_logging/unified_logger.py +0 -525
- unrealon_driver/websocket/__init__.py +0 -31
- unrealon_driver/websocket/client.py +0 -249
- unrealon_driver/websocket/config.py +0 -188
- unrealon_driver/websocket/manager.py +0 -90
|
@@ -1,458 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Parser Manager - Unified parser management system with Pydantic v2
|
|
3
|
-
|
|
4
|
-
Strict compliance with CRITICAL_REQUIREMENTS.md:
|
|
5
|
-
- No Dict[str, Any] usage
|
|
6
|
-
- Complete type annotations
|
|
7
|
-
- Pydantic v2 models everywhere
|
|
8
|
-
- Custom exception hierarchy
|
|
9
|
-
- No try blocks in imports
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
from datetime import datetime, timezone
|
|
13
|
-
from typing import Optional
|
|
14
|
-
from pydantic import BaseModel, Field, ConfigDict
|
|
15
|
-
|
|
16
|
-
from .managers import ConfigManager, ParserConfig, ResultManager, ErrorManager, RetryConfig
|
|
17
|
-
|
|
18
|
-
# from unrealon_browser import BrowserManager, BrowserConfig # Temporary comment to avoid circular import
|
|
19
|
-
|
|
20
|
-
# Import UnifiedLogger and HTML Analyzer
|
|
21
|
-
from unrealon_driver.smart_logging import create_unified_logger, LogLevel
|
|
22
|
-
from unrealon_driver.html_analyzer import create_html_analyzer, HTMLCleaningConfig, HTMLParseResult
|
|
23
|
-
from unrealon_driver.websocket import websocket_manager, WebSocketConfig
|
|
24
|
-
from unrealon_browser.core import BrowserManager
|
|
25
|
-
from unrealon_browser.dto.models.config import BrowserConfig
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class ParserManagerConfig(BaseModel):
|
|
29
|
-
"""Complete parser manager configuration"""
|
|
30
|
-
|
|
31
|
-
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
32
|
-
|
|
33
|
-
# Core configuration
|
|
34
|
-
parser_config: ParserConfig = Field(default_factory=ParserConfig, description="Core parser configuration")
|
|
35
|
-
|
|
36
|
-
# Logging configuration (simplified)
|
|
37
|
-
console_enabled: bool = Field(default=True, description="Enable console logging")
|
|
38
|
-
file_enabled: bool = Field(default=True, description="Enable file logging")
|
|
39
|
-
console_level: LogLevel = Field(default=LogLevel.INFO, description="Console log level")
|
|
40
|
-
file_level: LogLevel = Field(default=LogLevel.DEBUG, description="File log level")
|
|
41
|
-
html_config: HTMLCleaningConfig = Field(default_factory=HTMLCleaningConfig, description="HTML cleaning configuration")
|
|
42
|
-
retry_config: RetryConfig = Field(default_factory=RetryConfig, description="Retry configuration")
|
|
43
|
-
|
|
44
|
-
# Bridge settings
|
|
45
|
-
bridge_enabled: bool = Field(default=True, description="Enable bridge connection")
|
|
46
|
-
auto_register: bool = Field(default=True, description="Auto-register parser with bridge")
|
|
47
|
-
|
|
48
|
-
# SmartLogger settings
|
|
49
|
-
bridge_logs_url: Optional[str] = Field(default=None, description="Bridge logs WebSocket URL (ws://localhost:8001/logs)")
|
|
50
|
-
log_batch_interval: float = Field(default=5.0, description="Log batch interval in seconds")
|
|
51
|
-
daemon_mode: Optional[bool] = Field(default=None, description="Daemon mode for logging (None = auto-detect)")
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class ParserStats(BaseModel):
|
|
55
|
-
"""Comprehensive parser statistics"""
|
|
56
|
-
|
|
57
|
-
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
58
|
-
|
|
59
|
-
parser_id: str = Field(...)
|
|
60
|
-
parser_name: str = Field(...)
|
|
61
|
-
session_id: Optional[str] = Field(default=None)
|
|
62
|
-
|
|
63
|
-
# Timing
|
|
64
|
-
session_start: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
65
|
-
session_duration: float = Field(default=0.0, ge=0.0)
|
|
66
|
-
|
|
67
|
-
# Operations
|
|
68
|
-
operations_completed: int = Field(default=0, ge=0)
|
|
69
|
-
operations_failed: int = Field(default=0, ge=0)
|
|
70
|
-
success_rate: float = Field(default=0.0, ge=0.0, le=100.0)
|
|
71
|
-
|
|
72
|
-
# Content processing
|
|
73
|
-
pages_processed: int = Field(default=0, ge=0)
|
|
74
|
-
html_cleaned_count: int = Field(default=0, ge=0)
|
|
75
|
-
total_html_reduction: float = Field(default=0.0, ge=0.0)
|
|
76
|
-
|
|
77
|
-
# Errors
|
|
78
|
-
total_errors: int = Field(default=0, ge=0)
|
|
79
|
-
retries_attempted: int = Field(default=0, ge=0)
|
|
80
|
-
|
|
81
|
-
# Bridge
|
|
82
|
-
bridge_connected: bool = Field(default=False)
|
|
83
|
-
bridge_messages_sent: int = Field(default=0, ge=0)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
class ParserManagerError(Exception):
|
|
87
|
-
"""Base exception for parser manager"""
|
|
88
|
-
|
|
89
|
-
def __init__(self, message: str, operation: str, details: Optional[dict[str, str]] = None):
|
|
90
|
-
self.message = message
|
|
91
|
-
self.operation = operation
|
|
92
|
-
self.details = details or {}
|
|
93
|
-
super().__init__(message)
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
class InitializationError(ParserManagerError):
|
|
97
|
-
"""Raised when parser manager initialization fails"""
|
|
98
|
-
|
|
99
|
-
pass
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
class OperationError(ParserManagerError):
|
|
103
|
-
"""Raised when parser operation fails"""
|
|
104
|
-
|
|
105
|
-
pass
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
class ParserManager:
|
|
109
|
-
"""
|
|
110
|
-
🚀 Parser Manager - Unified parser management system
|
|
111
|
-
|
|
112
|
-
Features:
|
|
113
|
-
- Unified Configuration: Single config for all managers
|
|
114
|
-
- Automatic Lifecycle: Handles initialization, execution, cleanup
|
|
115
|
-
- Error Recovery: Smart retry logic with exponential backoff
|
|
116
|
-
- Performance Monitoring: Comprehensive statistics and metrics
|
|
117
|
-
- Bridge Integration: Seamless communication with Django
|
|
118
|
-
- Type Safety: Full Pydantic v2 compliance
|
|
119
|
-
|
|
120
|
-
Usage:
|
|
121
|
-
config = ParserManagerConfig(
|
|
122
|
-
parser_config=ParserConfig(parser_name="MyParser"),
|
|
123
|
-
bridge_enabled=True
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
async with ParserManager(config) as parser:
|
|
127
|
-
# Navigate and extract
|
|
128
|
-
html = await parser.get_html("https://example.com")
|
|
129
|
-
cleaned_html = await parser.clean_html(html)
|
|
130
|
-
result = await parser.analyze_html(cleaned_html)
|
|
131
|
-
|
|
132
|
-
# Results are automatically tracked
|
|
133
|
-
stats = parser.get_stats()
|
|
134
|
-
"""
|
|
135
|
-
|
|
136
|
-
def __init__(self, config: ParserManagerConfig):
|
|
137
|
-
self.config = config
|
|
138
|
-
|
|
139
|
-
# Initialize managers
|
|
140
|
-
self.config_manager = ConfigManager(self.config.parser_config)
|
|
141
|
-
self.result_manager = ResultManager(self.config.parser_config.parser_id)
|
|
142
|
-
self.error_manager = ErrorManager()
|
|
143
|
-
# Initialize HTML Analyzer (WebSocket URL auto-detected)
|
|
144
|
-
self.html_analyzer = create_html_analyzer(parser_id=self.config.parser_config.parser_id, api_key=self.config.parser_config.api_key, cleaning_config=self.config.html_config)
|
|
145
|
-
# Create default browser config
|
|
146
|
-
browser_config = BrowserConfig(parser_name=self.config.parser_config.parser_name)
|
|
147
|
-
self.browser_manager = BrowserManager(browser_config, parser_id=self.config.parser_config.parser_id)
|
|
148
|
-
|
|
149
|
-
# Initialize WebSocket connection config
|
|
150
|
-
if self.config.bridge_logs_url:
|
|
151
|
-
self._websocket_config = WebSocketConfig(url=self.config.bridge_logs_url, api_key=self.config.parser_config.api_key, parser_id=self.config.parser_config.parser_id)
|
|
152
|
-
else:
|
|
153
|
-
self._websocket_config = None
|
|
154
|
-
|
|
155
|
-
# Initialize UnifiedLogger
|
|
156
|
-
log_file = None
|
|
157
|
-
if self.config.parser_config.system_dir:
|
|
158
|
-
log_file = self.config.parser_config.system_dir / "logs" / f"{self.config.parser_config.parser_name}.log"
|
|
159
|
-
|
|
160
|
-
self.logger = create_unified_logger(
|
|
161
|
-
parser_id=self.config.parser_config.parser_id,
|
|
162
|
-
parser_name=self.config.parser_config.parser_name,
|
|
163
|
-
bridge_logs_url=self.config.bridge_logs_url,
|
|
164
|
-
log_file=log_file,
|
|
165
|
-
console_enabled=self.config.console_enabled,
|
|
166
|
-
file_enabled=self.config.file_enabled,
|
|
167
|
-
console_level=self.config.console_level,
|
|
168
|
-
file_level=self.config.file_level,
|
|
169
|
-
batch_interval=self.config.log_batch_interval,
|
|
170
|
-
daemon_mode=self.config.daemon_mode,
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
# State
|
|
174
|
-
self._is_initialized = False
|
|
175
|
-
self._session_id: Optional[str] = None
|
|
176
|
-
self._stats = ParserStats(parser_id=self.config.parser_config.parser_id, parser_name=self.config.parser_config.parser_name)
|
|
177
|
-
|
|
178
|
-
# Register retry configurations
|
|
179
|
-
self._setup_retry_configs()
|
|
180
|
-
|
|
181
|
-
# ==========================================
|
|
182
|
-
# LIFECYCLE MANAGEMENT
|
|
183
|
-
# ==========================================
|
|
184
|
-
|
|
185
|
-
async def initialize(self) -> None:
|
|
186
|
-
"""Initialize all managers and establish connections"""
|
|
187
|
-
if self._is_initialized:
|
|
188
|
-
return
|
|
189
|
-
|
|
190
|
-
try:
|
|
191
|
-
self.logger.info("🚀 Initializing parser manager...")
|
|
192
|
-
|
|
193
|
-
# Initialize WebSocket connection
|
|
194
|
-
if self._websocket_config:
|
|
195
|
-
await websocket_manager.initialize(self._websocket_config)
|
|
196
|
-
if websocket_manager.connected:
|
|
197
|
-
self.logger.info("🔌 WebSocket connected")
|
|
198
|
-
else:
|
|
199
|
-
self.logger.warning("🔌 WebSocket connection failed")
|
|
200
|
-
|
|
201
|
-
# Initialize browser
|
|
202
|
-
await self.browser_manager.initialize_async()
|
|
203
|
-
|
|
204
|
-
self._is_initialized = True
|
|
205
|
-
self.logger.info("✅ Parser manager initialized successfully")
|
|
206
|
-
|
|
207
|
-
except Exception as e:
|
|
208
|
-
self.error_manager.record_error(e, "initialization")
|
|
209
|
-
raise InitializationError(message=f"Failed to initialize parser manager: {e}", operation="initialization") from e
|
|
210
|
-
|
|
211
|
-
async def cleanup(self) -> None:
|
|
212
|
-
"""Clean up all resources"""
|
|
213
|
-
self.logger.info("🧹 Cleaning up parser manager...")
|
|
214
|
-
|
|
215
|
-
cleanup_errors = []
|
|
216
|
-
|
|
217
|
-
# End session if active
|
|
218
|
-
if self._session_id:
|
|
219
|
-
await self.end_session()
|
|
220
|
-
|
|
221
|
-
# Cleanup browser
|
|
222
|
-
try:
|
|
223
|
-
await self.browser_manager.close_async()
|
|
224
|
-
except Exception as e:
|
|
225
|
-
cleanup_errors.append(f"browser_cleanup: {e}")
|
|
226
|
-
|
|
227
|
-
# Disconnect WebSocket
|
|
228
|
-
try:
|
|
229
|
-
await websocket_manager.disconnect()
|
|
230
|
-
except Exception as e:
|
|
231
|
-
cleanup_errors.append(f"websocket_disconnect: {e}")
|
|
232
|
-
|
|
233
|
-
# Update final stats
|
|
234
|
-
self._update_session_stats()
|
|
235
|
-
|
|
236
|
-
# Cleanup UnifiedLogger
|
|
237
|
-
try:
|
|
238
|
-
await self.logger.close()
|
|
239
|
-
except Exception as e:
|
|
240
|
-
cleanup_errors.append(f"logger_cleanup: {e}")
|
|
241
|
-
|
|
242
|
-
# Log cleanup errors but don't raise
|
|
243
|
-
if cleanup_errors:
|
|
244
|
-
self.logger.warning(f"Cleanup errors: {'; '.join(cleanup_errors)}")
|
|
245
|
-
|
|
246
|
-
self.logger.info("✅ Parser manager cleanup completed")
|
|
247
|
-
|
|
248
|
-
# ==========================================
|
|
249
|
-
# CORE PARSING METHODS
|
|
250
|
-
# ==========================================
|
|
251
|
-
|
|
252
|
-
async def get_html(self, url: str) -> str:
|
|
253
|
-
"""Get HTML content from URL with error handling"""
|
|
254
|
-
if not self._is_initialized:
|
|
255
|
-
await self.initialize()
|
|
256
|
-
|
|
257
|
-
@self.error_manager.with_retry("get_html", self.config.retry_config)
|
|
258
|
-
async def _get_html_with_retry():
|
|
259
|
-
self.logger.url_access(url, "fetching")
|
|
260
|
-
html = await self.browser_manager.get_html(url)
|
|
261
|
-
self._stats.pages_processed += 1
|
|
262
|
-
return html
|
|
263
|
-
|
|
264
|
-
try:
|
|
265
|
-
return await _get_html_with_retry()
|
|
266
|
-
except Exception as e:
|
|
267
|
-
self._stats.total_errors += 1
|
|
268
|
-
raise OperationError(message=f"Failed to get HTML from {url}: {e}", operation="get_html", details={"url": url}) from e
|
|
269
|
-
|
|
270
|
-
async def parse_url(self, url: str, instructions: Optional[str] = None, **kwargs) -> HTMLParseResult:
|
|
271
|
-
"""Complete parsing workflow: fetch → clean → analyze via HTML Analyzer"""
|
|
272
|
-
operation = self.result_manager.start_operation()
|
|
273
|
-
|
|
274
|
-
try:
|
|
275
|
-
self.logger.start_operation("parse_url")
|
|
276
|
-
|
|
277
|
-
# Fetch HTML
|
|
278
|
-
html = await self.get_html(url)
|
|
279
|
-
|
|
280
|
-
# Delegate complete HTML processing to HTML Analyzer
|
|
281
|
-
analysis_result = await self.html_analyzer.parse_html(html=html, url=url, instructions=instructions, session_id=self._session_id, **kwargs)
|
|
282
|
-
|
|
283
|
-
# Update stats from HTML Analyzer
|
|
284
|
-
html_stats = self.html_analyzer.get_stats()
|
|
285
|
-
self._stats.html_cleaned_count += html_stats.cleaned_count
|
|
286
|
-
self._stats.total_html_reduction += html_stats.total_reduction
|
|
287
|
-
|
|
288
|
-
# Complete operation
|
|
289
|
-
success = analysis_result.success == "true"
|
|
290
|
-
self.result_manager.complete_operation(data=[], source_urls=[url], success=success)
|
|
291
|
-
|
|
292
|
-
if success:
|
|
293
|
-
self._stats.operations_completed += 1
|
|
294
|
-
else:
|
|
295
|
-
self._stats.operations_failed += 1
|
|
296
|
-
|
|
297
|
-
self.logger.end_operation("parse_url", operation.duration_seconds)
|
|
298
|
-
|
|
299
|
-
return analysis_result
|
|
300
|
-
|
|
301
|
-
except Exception as e:
|
|
302
|
-
self.result_manager.complete_operation(data=[], source_urls=[url], success=False, error_message=str(e))
|
|
303
|
-
|
|
304
|
-
self._stats.operations_failed += 1
|
|
305
|
-
self.logger.error(f"❌ Failed parse_url: {str(e)}")
|
|
306
|
-
raise
|
|
307
|
-
|
|
308
|
-
# ==========================================
|
|
309
|
-
# SESSION MANAGEMENT (Simplified - Local Only)
|
|
310
|
-
# ==========================================
|
|
311
|
-
|
|
312
|
-
async def start_session(self, session_type: str = "parsing") -> str:
|
|
313
|
-
"""Start a new parsing session (local only)"""
|
|
314
|
-
import uuid
|
|
315
|
-
|
|
316
|
-
session_id = f"{session_type}_{uuid.uuid4().hex[:8]}"
|
|
317
|
-
self._session_id = session_id
|
|
318
|
-
self._stats.session_id = session_id
|
|
319
|
-
self.logger.set_session(session_id)
|
|
320
|
-
|
|
321
|
-
self.logger.info(f"📋 Local session started: {session_id}")
|
|
322
|
-
return session_id
|
|
323
|
-
|
|
324
|
-
async def end_session(self) -> None:
|
|
325
|
-
"""End current parsing session"""
|
|
326
|
-
if not self._session_id:
|
|
327
|
-
return
|
|
328
|
-
|
|
329
|
-
self.logger.info(f"📋 Local session ended: {self._session_id}")
|
|
330
|
-
self._session_id = None
|
|
331
|
-
self._stats.session_id = None
|
|
332
|
-
|
|
333
|
-
# ==========================================
|
|
334
|
-
# STATISTICS AND MONITORING
|
|
335
|
-
# ==========================================
|
|
336
|
-
|
|
337
|
-
def get_stats(self) -> ParserStats:
|
|
338
|
-
"""Get comprehensive parser statistics"""
|
|
339
|
-
self._update_session_stats()
|
|
340
|
-
return ParserStats.model_validate(self._stats.model_dump())
|
|
341
|
-
|
|
342
|
-
def get_manager_stats(self) -> dict[str, dict[str, str]]:
|
|
343
|
-
"""Get statistics from all managers"""
|
|
344
|
-
return {
|
|
345
|
-
"result_manager": self.result_manager.get_stats(),
|
|
346
|
-
"error_manager": self.error_manager.get_error_stats(),
|
|
347
|
-
"browser_manager": self.browser_manager.get_stats().model_dump(mode="json"),
|
|
348
|
-
# Logging stats removed - using UnifiedLogger now
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
async def health_check(self) -> dict[str, str]:
|
|
352
|
-
"""Comprehensive health check"""
|
|
353
|
-
health = {"status": "healthy", "parser_id": self.config.parser_config.parser_id, "parser_name": self.config.parser_config.parser_name, "initialized": str(self._is_initialized), "session_active": str(self._session_id is not None)}
|
|
354
|
-
|
|
355
|
-
# Check browser health
|
|
356
|
-
try:
|
|
357
|
-
browser_health = await self.browser_manager.health_check()
|
|
358
|
-
health["browser_status"] = browser_health.get("status", "unknown")
|
|
359
|
-
except Exception as e:
|
|
360
|
-
health["browser_status"] = f"error: {e}"
|
|
361
|
-
|
|
362
|
-
# Check WebSocket connection health
|
|
363
|
-
health["websocket_connected"] = str(websocket_manager.connected)
|
|
364
|
-
|
|
365
|
-
return health
|
|
366
|
-
|
|
367
|
-
# ==========================================
|
|
368
|
-
# INTERNAL METHODS
|
|
369
|
-
# ==========================================
|
|
370
|
-
|
|
371
|
-
def _setup_retry_configs(self) -> None:
|
|
372
|
-
"""Setup retry configurations for different operations"""
|
|
373
|
-
# Navigation retry config
|
|
374
|
-
nav_config = RetryConfig(max_attempts=3, base_delay=2.0, retry_on_exceptions=["NavigationError", "TimeoutError", "ConnectionError"])
|
|
375
|
-
self.error_manager.register_retry_config("get_html", nav_config)
|
|
376
|
-
|
|
377
|
-
# Bridge communication retry config
|
|
378
|
-
bridge_config = RetryConfig(max_attempts=2, base_delay=1.0, retry_on_exceptions=["ConnectionError", "TimeoutError"])
|
|
379
|
-
self.error_manager.register_retry_config("analyze_html", bridge_config)
|
|
380
|
-
|
|
381
|
-
def _update_session_stats(self) -> None:
|
|
382
|
-
"""Update session statistics"""
|
|
383
|
-
self._stats.session_duration = (datetime.now(timezone.utc) - self._stats.session_start).total_seconds()
|
|
384
|
-
|
|
385
|
-
total_operations = self._stats.operations_completed + self._stats.operations_failed
|
|
386
|
-
if total_operations > 0:
|
|
387
|
-
self._stats.success_rate = (self._stats.operations_completed / total_operations) * 100.0
|
|
388
|
-
|
|
389
|
-
# ==========================================
|
|
390
|
-
# CONTEXT MANAGER SUPPORT
|
|
391
|
-
# ==========================================
|
|
392
|
-
|
|
393
|
-
async def __aenter__(self):
|
|
394
|
-
"""Async context manager entry"""
|
|
395
|
-
await self.initialize()
|
|
396
|
-
return self
|
|
397
|
-
|
|
398
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
399
|
-
"""Async context manager exit"""
|
|
400
|
-
await self.cleanup()
|
|
401
|
-
return False
|
|
402
|
-
|
|
403
|
-
# ==========================================
|
|
404
|
-
# LOGGING CONVENIENCE
|
|
405
|
-
# ==========================================
|
|
406
|
-
|
|
407
|
-
def set_session_id(self, session_id: str):
|
|
408
|
-
"""Set session ID for both internal tracking and logger"""
|
|
409
|
-
self._session_id = session_id
|
|
410
|
-
self.logger.set_session(session_id)
|
|
411
|
-
|
|
412
|
-
async def flush_logs(self):
|
|
413
|
-
"""Force flush all accumulated logs"""
|
|
414
|
-
await self.logger.flush()
|
|
415
|
-
|
|
416
|
-
def __repr__(self) -> str:
|
|
417
|
-
return f"<ParserManager(id='{self.config.parser_config.parser_id}', name='{self.config.parser_config.parser_name}')>"
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
# ==========================================
|
|
421
|
-
# CONVENIENCE FUNCTIONS
|
|
422
|
-
# ==========================================
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
def get_parser_manager(parser_name: str, parser_type: str = "generic", **kwargs) -> ParserManager:
|
|
426
|
-
"""
|
|
427
|
-
Get a parser manager instance with minimal configuration
|
|
428
|
-
|
|
429
|
-
Args:
|
|
430
|
-
parser_name: Name of the parser
|
|
431
|
-
parser_type: Type of parser (generic, ecommerce, news, etc.)
|
|
432
|
-
**kwargs: Additional configuration options
|
|
433
|
-
|
|
434
|
-
Returns:
|
|
435
|
-
Configured ParserManager instance
|
|
436
|
-
"""
|
|
437
|
-
parser_config = ParserConfig(parser_name=parser_name, parser_type=parser_type, **{k: v for k, v in kwargs.items() if k in ParserConfig.model_fields})
|
|
438
|
-
|
|
439
|
-
config = ParserManagerConfig(parser_config=parser_config, **{k: v for k, v in kwargs.items() if k in ParserManagerConfig.model_fields and k not in ["parser_config"]})
|
|
440
|
-
|
|
441
|
-
return ParserManager(config)
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
async def quick_parse(url: str, parser_name: str = "QuickParser", instructions: Optional[str] = None, **kwargs) -> HTMLParseResult:
|
|
445
|
-
"""
|
|
446
|
-
Quick parsing convenience function
|
|
447
|
-
|
|
448
|
-
Args:
|
|
449
|
-
url: URL to parse
|
|
450
|
-
parser_name: Name for the parser
|
|
451
|
-
instructions: Optional parsing instructions
|
|
452
|
-
**kwargs: Additional configuration
|
|
453
|
-
|
|
454
|
-
Returns:
|
|
455
|
-
Parsing result
|
|
456
|
-
"""
|
|
457
|
-
async with get_parser_manager(parser_name, **kwargs) as parser:
|
|
458
|
-
return await parser.parse_url(url, instructions, **kwargs)
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Smart logging module for unrealon_driver.
|
|
3
|
-
|
|
4
|
-
Provides intelligent logging with batching, WebSocket transport, and fallback mechanisms.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from .smart_logger import SmartLogger, create_smart_logger
|
|
8
|
-
from .unified_logger import UnifiedLogger, create_unified_logger
|
|
9
|
-
from .models import LogEntry, LogLevel, LogContext
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
# Main loggers
|
|
13
|
-
"SmartLogger",
|
|
14
|
-
"UnifiedLogger",
|
|
15
|
-
|
|
16
|
-
# Factory functions
|
|
17
|
-
"create_smart_logger",
|
|
18
|
-
"create_unified_logger",
|
|
19
|
-
|
|
20
|
-
# Models
|
|
21
|
-
"LogEntry",
|
|
22
|
-
"LogLevel",
|
|
23
|
-
"LogContext"
|
|
24
|
-
]
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Common models for smart logging system.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from typing import Optional, Dict, Any
|
|
6
|
-
from dataclasses import dataclass
|
|
7
|
-
from pydantic import BaseModel, Field, ConfigDict
|
|
8
|
-
from enum import Enum
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class LogLevel(str, Enum):
|
|
12
|
-
"""Log levels for driver logger"""
|
|
13
|
-
DEBUG = "DEBUG"
|
|
14
|
-
INFO = "INFO"
|
|
15
|
-
WARNING = "WARNING"
|
|
16
|
-
ERROR = "ERROR"
|
|
17
|
-
CRITICAL = "CRITICAL"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class LogEntry:
|
|
22
|
-
"""Structure for log entry"""
|
|
23
|
-
timestamp: str
|
|
24
|
-
level: str
|
|
25
|
-
message: str
|
|
26
|
-
parser_id: str
|
|
27
|
-
session_id: Optional[str] = None
|
|
28
|
-
url: Optional[str] = None
|
|
29
|
-
operation: Optional[str] = None
|
|
30
|
-
extra: Optional[Dict[str, Any]] = None
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class LogContext(BaseModel):
|
|
34
|
-
"""Log context information"""
|
|
35
|
-
model_config = ConfigDict(
|
|
36
|
-
validate_assignment=True,
|
|
37
|
-
extra="forbid"
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
session_id: Optional[str] = Field(default=None)
|
|
41
|
-
command_id: Optional[str] = Field(default=None)
|
|
42
|
-
operation: Optional[str] = Field(default=None)
|
|
43
|
-
url: Optional[str] = Field(default=None)
|
|
44
|
-
additional_data: dict[str, Any] = Field(default_factory=dict)
|