unrealon 1.1.5__py3-none-any.whl → 2.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-1.1.5.dist-info/licenses → unrealon-2.0.4.dist-info}/LICENSE +1 -1
- unrealon-2.0.4.dist-info/METADATA +491 -0
- unrealon-2.0.4.dist-info/RECORD +129 -0
- {unrealon-1.1.5.dist-info → unrealon-2.0.4.dist-info}/WHEEL +2 -1
- unrealon-2.0.4.dist-info/entry_points.txt +3 -0
- unrealon-2.0.4.dist-info/top_level.txt +3 -0
- unrealon_browser/__init__.py +5 -2
- unrealon_browser/cli/browser_cli.py +18 -9
- unrealon_browser/cli/interactive_mode.py +18 -7
- unrealon_browser/core/browser_manager.py +76 -13
- unrealon_browser/dto/__init__.py +21 -0
- unrealon_browser/dto/bot_detection.py +175 -0
- unrealon_browser/dto/models/config.py +14 -1
- unrealon_browser/managers/__init__.py +4 -1
- unrealon_browser/managers/logger_bridge.py +3 -6
- unrealon_browser/managers/page_wait_manager.py +198 -0
- unrealon_browser/stealth/__init__.py +27 -0
- unrealon_browser/stealth/bypass_techniques.pyc +0 -0
- unrealon_browser/stealth/manager.pyc +0 -0
- unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
- unrealon_browser/stealth/playwright_stealth.pyc +0 -0
- unrealon_browser/stealth/scanner_tester.pyc +0 -0
- unrealon_browser/stealth/undetected_chrome.pyc +0 -0
- unrealon_core/__init__.py +160 -0
- unrealon_core/config/__init__.py +16 -0
- unrealon_core/config/environment.py +98 -0
- unrealon_core/config/urls.py +93 -0
- unrealon_core/enums/__init__.py +24 -0
- unrealon_core/enums/status.py +216 -0
- unrealon_core/enums/types.py +240 -0
- unrealon_core/error_handling/__init__.py +45 -0
- unrealon_core/error_handling/circuit_breaker.py +292 -0
- unrealon_core/error_handling/error_context.py +324 -0
- unrealon_core/error_handling/recovery.py +371 -0
- unrealon_core/error_handling/retry.py +268 -0
- unrealon_core/exceptions/__init__.py +46 -0
- unrealon_core/exceptions/base.py +292 -0
- unrealon_core/exceptions/communication.py +22 -0
- unrealon_core/exceptions/driver.py +11 -0
- unrealon_core/exceptions/proxy.py +11 -0
- unrealon_core/exceptions/task.py +12 -0
- unrealon_core/exceptions/validation.py +17 -0
- unrealon_core/models/__init__.py +98 -0
- unrealon_core/models/arq_context.py +252 -0
- unrealon_core/models/arq_responses.py +125 -0
- unrealon_core/models/base.py +291 -0
- unrealon_core/models/bridge_stats.py +58 -0
- unrealon_core/models/communication.py +39 -0
- unrealon_core/models/config.py +47 -0
- unrealon_core/models/connection_stats.py +47 -0
- unrealon_core/models/driver.py +30 -0
- unrealon_core/models/driver_details.py +98 -0
- unrealon_core/models/logging.py +28 -0
- unrealon_core/models/task.py +21 -0
- unrealon_core/models/typed_responses.py +210 -0
- unrealon_core/models/websocket/__init__.py +91 -0
- unrealon_core/models/websocket/base.py +49 -0
- unrealon_core/models/websocket/config.py +200 -0
- unrealon_core/models/websocket/driver.py +215 -0
- unrealon_core/models/websocket/errors.py +138 -0
- unrealon_core/models/websocket/heartbeat.py +100 -0
- unrealon_core/models/websocket/logging.py +261 -0
- unrealon_core/models/websocket/proxy.py +496 -0
- unrealon_core/models/websocket/tasks.py +275 -0
- unrealon_core/models/websocket/utils.py +153 -0
- unrealon_core/models/websocket_session.py +144 -0
- unrealon_core/monitoring/__init__.py +43 -0
- unrealon_core/monitoring/alerts.py +398 -0
- unrealon_core/monitoring/dashboard.py +307 -0
- unrealon_core/monitoring/health_check.py +354 -0
- unrealon_core/monitoring/metrics.py +352 -0
- unrealon_core/utils/__init__.py +11 -0
- unrealon_core/utils/time.py +61 -0
- unrealon_core/version.py +219 -0
- unrealon_driver/__init__.py +88 -50
- unrealon_driver/core_module/__init__.py +34 -0
- unrealon_driver/core_module/base.py +184 -0
- unrealon_driver/core_module/config.py +30 -0
- unrealon_driver/core_module/event_manager.py +127 -0
- unrealon_driver/core_module/protocols.py +98 -0
- unrealon_driver/core_module/registry.py +146 -0
- unrealon_driver/decorators/__init__.py +15 -0
- unrealon_driver/decorators/retry.py +117 -0
- unrealon_driver/decorators/schedule.py +137 -0
- unrealon_driver/decorators/task.py +61 -0
- unrealon_driver/decorators/timing.py +132 -0
- unrealon_driver/driver/__init__.py +20 -0
- unrealon_driver/driver/communication/__init__.py +10 -0
- unrealon_driver/driver/communication/session.py +203 -0
- unrealon_driver/driver/communication/websocket_client.py +197 -0
- unrealon_driver/driver/core/__init__.py +10 -0
- unrealon_driver/driver/core/config.py +85 -0
- unrealon_driver/driver/core/driver.py +221 -0
- unrealon_driver/driver/factory/__init__.py +9 -0
- unrealon_driver/driver/factory/manager_factory.py +130 -0
- unrealon_driver/driver/lifecycle/__init__.py +11 -0
- unrealon_driver/driver/lifecycle/daemon.py +76 -0
- unrealon_driver/driver/lifecycle/initialization.py +97 -0
- unrealon_driver/driver/lifecycle/shutdown.py +48 -0
- unrealon_driver/driver/monitoring/__init__.py +9 -0
- unrealon_driver/driver/monitoring/health.py +63 -0
- unrealon_driver/driver/utilities/__init__.py +10 -0
- unrealon_driver/driver/utilities/logging.py +51 -0
- unrealon_driver/driver/utilities/serialization.py +61 -0
- unrealon_driver/managers/__init__.py +32 -0
- unrealon_driver/managers/base.py +174 -0
- unrealon_driver/managers/browser.py +98 -0
- unrealon_driver/managers/cache.py +116 -0
- unrealon_driver/managers/http.py +107 -0
- unrealon_driver/managers/logger.py +286 -0
- unrealon_driver/managers/proxy.py +99 -0
- unrealon_driver/managers/registry.py +87 -0
- unrealon_driver/managers/threading.py +54 -0
- unrealon_driver/managers/update.py +107 -0
- unrealon_driver/utils/__init__.py +9 -0
- unrealon_driver/utils/time.py +10 -0
- unrealon/__init__.py +0 -40
- unrealon-1.1.5.dist-info/METADATA +0 -621
- unrealon-1.1.5.dist-info/RECORD +0 -54
- unrealon-1.1.5.dist-info/entry_points.txt +0 -9
- unrealon_browser/managers/stealth.py +0 -388
- unrealon_driver/exceptions.py +0 -33
- unrealon_driver/html_analyzer/__init__.py +0 -32
- unrealon_driver/html_analyzer/cleaner.py +0 -657
- unrealon_driver/html_analyzer/config.py +0 -64
- unrealon_driver/html_analyzer/manager.py +0 -247
- unrealon_driver/html_analyzer/models.py +0 -115
- unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
- unrealon_driver/models/__init__.py +0 -31
- unrealon_driver/models/websocket.py +0 -98
- unrealon_driver/parser/__init__.py +0 -36
- unrealon_driver/parser/cli_manager.py +0 -142
- unrealon_driver/parser/daemon_manager.py +0 -403
- unrealon_driver/parser/managers/__init__.py +0 -25
- unrealon_driver/parser/managers/config.py +0 -293
- unrealon_driver/parser/managers/error.py +0 -412
- unrealon_driver/parser/managers/result.py +0 -321
- unrealon_driver/parser/parser_manager.py +0 -458
- unrealon_driver/smart_logging/__init__.py +0 -24
- unrealon_driver/smart_logging/models.py +0 -44
- unrealon_driver/smart_logging/smart_logger.py +0 -406
- unrealon_driver/smart_logging/unified_logger.py +0 -525
- unrealon_driver/websocket/__init__.py +0 -31
- unrealon_driver/websocket/client.py +0 -249
- unrealon_driver/websocket/config.py +0 -188
- unrealon_driver/websocket/manager.py +0 -90
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
WebSocket message models for daemon communication.
|
|
3
|
-
|
|
4
|
-
Strict Pydantic v2 compliance and type safety.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from typing import Optional, List, Any
|
|
8
|
-
from pydantic import BaseModel, Field
|
|
9
|
-
from enum import Enum
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class MessageType(str, Enum):
|
|
13
|
-
"""WebSocket message types."""
|
|
14
|
-
REGISTER = "register"
|
|
15
|
-
COMMAND = "command"
|
|
16
|
-
COMMAND_RESPONSE = "command_response"
|
|
17
|
-
STATUS = "status"
|
|
18
|
-
HEARTBEAT = "heartbeat"
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class BridgeMessageType(str, Enum):
|
|
22
|
-
"""Bridge WebSocket message types."""
|
|
23
|
-
REGISTER = "register"
|
|
24
|
-
RPC_CALL = "rpc_call"
|
|
25
|
-
PUBSUB_PUBLISH = "pubsub_publish"
|
|
26
|
-
HEARTBEAT = "heartbeat"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class RegistrationMessage(BaseModel):
|
|
30
|
-
"""Daemon registration message."""
|
|
31
|
-
type: MessageType = Field(default=MessageType.REGISTER)
|
|
32
|
-
parser_id: str = Field(..., min_length=1, description="Parser identifier")
|
|
33
|
-
parser_type: str = Field(default="daemon", description="Parser type")
|
|
34
|
-
version: str = Field(default="1.0.0", description="Parser version")
|
|
35
|
-
capabilities: List[str] = Field(default_factory=lambda: ["parse", "search", "status", "health"])
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class CommandMessage(BaseModel):
|
|
39
|
-
"""Incoming command message."""
|
|
40
|
-
type: MessageType = Field(default=MessageType.COMMAND)
|
|
41
|
-
command_type: str = Field(..., min_length=1, description="Command type")
|
|
42
|
-
command_id: str = Field(..., min_length=1, description="Command identifier")
|
|
43
|
-
parameters: dict[str, Any] = Field(default_factory=dict, description="Command parameters")
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class CommandResponseMessage(BaseModel):
|
|
47
|
-
"""Command response message."""
|
|
48
|
-
type: MessageType = Field(default=MessageType.COMMAND_RESPONSE)
|
|
49
|
-
command_id: str = Field(..., min_length=1, description="Command identifier")
|
|
50
|
-
success: bool = Field(..., description="Command success status")
|
|
51
|
-
result_data: Optional[dict[str, Any]] = Field(default=None, description="Command result data")
|
|
52
|
-
error: Optional[str] = Field(default=None, description="Error message if failed")
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class StatusMessage(BaseModel):
|
|
56
|
-
"""Daemon status message."""
|
|
57
|
-
type: MessageType = Field(default=MessageType.STATUS)
|
|
58
|
-
parser_id: str = Field(..., min_length=1, description="Parser identifier")
|
|
59
|
-
running: bool = Field(..., description="Daemon running status")
|
|
60
|
-
uptime_seconds: float = Field(..., ge=0, description="Uptime in seconds")
|
|
61
|
-
total_runs: int = Field(..., ge=0, description="Total runs executed")
|
|
62
|
-
successful_runs: int = Field(..., ge=0, description="Successful runs")
|
|
63
|
-
failed_runs: int = Field(..., ge=0, description="Failed runs")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class HeartbeatMessage(BaseModel):
|
|
67
|
-
"""Daemon heartbeat message."""
|
|
68
|
-
type: MessageType = Field(default=MessageType.HEARTBEAT)
|
|
69
|
-
parser_id: str = Field(..., min_length=1, description="Parser identifier")
|
|
70
|
-
timestamp: str = Field(..., description="Heartbeat timestamp")
|
|
71
|
-
status: str = Field(default="alive", description="Daemon status")
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
# Bridge message models
|
|
75
|
-
class BridgeRegistrationPayload(BaseModel):
|
|
76
|
-
"""Payload for bridge registration message."""
|
|
77
|
-
client_type: str = Field(default="daemon", description="Client type")
|
|
78
|
-
parser_id: str = Field(..., min_length=1, description="Parser identifier")
|
|
79
|
-
version: str = Field(default="1.0.0", description="Parser version")
|
|
80
|
-
capabilities: List[str] = Field(default_factory=lambda: ["parse", "search", "status", "health"])
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class BridgeMessage(BaseModel):
|
|
84
|
-
"""Bridge WebSocket message format."""
|
|
85
|
-
message_type: BridgeMessageType = Field(..., description="Message type")
|
|
86
|
-
payload: dict[str, Any] = Field(default_factory=dict, description="Message payload")
|
|
87
|
-
message_id: Optional[str] = Field(default=None, description="Message ID")
|
|
88
|
-
api_key: Optional[str] = Field(default=None, description="API key")
|
|
89
|
-
correlation_id: Optional[str] = Field(default=None, description="Correlation ID")
|
|
90
|
-
reply_to: Optional[str] = Field(default=None, description="Reply to address")
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
class BridgeRegistrationMessage(BaseModel):
|
|
94
|
-
"""Bridge registration message."""
|
|
95
|
-
message_type: BridgeMessageType = Field(default=BridgeMessageType.REGISTER)
|
|
96
|
-
payload: BridgeRegistrationPayload = Field(..., description="Registration payload")
|
|
97
|
-
message_id: Optional[str] = Field(default=None, description="Message ID")
|
|
98
|
-
api_key: Optional[str] = Field(default=None, description="API key")
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Parser management system with specialized managers
|
|
3
|
-
|
|
4
|
-
Strict Pydantic v2 compliance and type safety
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from .parser_manager import ParserManager, ParserManagerConfig, ParserStats, get_parser_manager, quick_parse
|
|
8
|
-
from .daemon_manager import DaemonManager, DaemonStatus
|
|
9
|
-
from .cli_manager import CLIManager
|
|
10
|
-
from .managers import ConfigManager, ParserConfig, ResultManager, ParseResult, ParseMetrics, OperationStatus, ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity
|
|
11
|
-
|
|
12
|
-
__all__ = [
|
|
13
|
-
# Main Parser Manager
|
|
14
|
-
"ParserManager",
|
|
15
|
-
"ParserManagerConfig",
|
|
16
|
-
"ParserStats",
|
|
17
|
-
"get_parser_manager",
|
|
18
|
-
"quick_parse",
|
|
19
|
-
# Daemon Manager
|
|
20
|
-
"DaemonManager",
|
|
21
|
-
"DaemonStatus",
|
|
22
|
-
# CLI Manager
|
|
23
|
-
"CLIManager",
|
|
24
|
-
# Individual Managers
|
|
25
|
-
"ConfigManager",
|
|
26
|
-
"ParserConfig",
|
|
27
|
-
"ResultManager",
|
|
28
|
-
"ParseResult",
|
|
29
|
-
"ParseMetrics",
|
|
30
|
-
"OperationStatus",
|
|
31
|
-
"ErrorManager",
|
|
32
|
-
"RetryConfig",
|
|
33
|
-
"ErrorInfo",
|
|
34
|
-
"ErrorSeverity",
|
|
35
|
-
|
|
36
|
-
]
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
CLI Manager - Base class for parser CLI interfaces
|
|
3
|
-
|
|
4
|
-
Strict Pydantic v2 compliance and type safety
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import asyncio
|
|
8
|
-
import sys
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import List, Optional, Any, Dict
|
|
11
|
-
import click
|
|
12
|
-
|
|
13
|
-
from .parser_manager import ParserManager, ParserManagerConfig
|
|
14
|
-
from .managers import ParserConfig
|
|
15
|
-
from unrealon_browser.dto.models.config import BrowserConfig
|
|
16
|
-
from unrealon_driver.html_analyzer import HTMLCleaningConfig
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class CLIManager(ParserManager):
|
|
20
|
-
"""Base CLI manager with common CLI functionality."""
|
|
21
|
-
|
|
22
|
-
def __init__(self, parser_name: str, parser_type: str, system_dir: str,
|
|
23
|
-
bridge_enabled: bool = False):
|
|
24
|
-
# Create parser config
|
|
25
|
-
parser_config = ParserConfig(
|
|
26
|
-
parser_name=parser_name,
|
|
27
|
-
parser_type=parser_type,
|
|
28
|
-
system_dir=Path(system_dir)
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
# Create logging config
|
|
32
|
-
# Logging config is now handled internally by ParserManagerConfig
|
|
33
|
-
|
|
34
|
-
# Create other configs
|
|
35
|
-
html_config = HTMLCleaningConfig()
|
|
36
|
-
browser_config = BrowserConfig()
|
|
37
|
-
|
|
38
|
-
# Create manager config
|
|
39
|
-
manager_config = ParserManagerConfig(
|
|
40
|
-
parser_config=parser_config,
|
|
41
|
-
html_config=html_config,
|
|
42
|
-
browser_config=browser_config,
|
|
43
|
-
bridge_enabled=bridge_enabled
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
super().__init__(manager_config)
|
|
47
|
-
|
|
48
|
-
async def run_parse_command(self, urls: Optional[List[str]] = None) -> bool:
|
|
49
|
-
"""Run parse command."""
|
|
50
|
-
try:
|
|
51
|
-
await self.initialize()
|
|
52
|
-
|
|
53
|
-
if urls:
|
|
54
|
-
click.echo(f"🚀 Parsing {len(urls)} URLs...")
|
|
55
|
-
results = []
|
|
56
|
-
for url in urls:
|
|
57
|
-
result = await self.parse_url(url)
|
|
58
|
-
results.append(result)
|
|
59
|
-
|
|
60
|
-
success_count = sum(1 for r in results if r.get("success") == "true")
|
|
61
|
-
click.echo(f"✅ Parse completed: {success_count}/{len(results)} URLs successful")
|
|
62
|
-
return success_count > 0
|
|
63
|
-
else:
|
|
64
|
-
click.echo("❌ No URLs provided", err=True)
|
|
65
|
-
return False
|
|
66
|
-
|
|
67
|
-
except Exception as e:
|
|
68
|
-
click.echo(f"❌ Parse error: {e}", err=True)
|
|
69
|
-
return False
|
|
70
|
-
finally:
|
|
71
|
-
await self.cleanup()
|
|
72
|
-
|
|
73
|
-
async def run_test_command(self) -> bool:
|
|
74
|
-
"""Run test command."""
|
|
75
|
-
try:
|
|
76
|
-
click.echo("🧪 Running test...")
|
|
77
|
-
|
|
78
|
-
await self.initialize()
|
|
79
|
-
click.echo("✅ Parser initialization: OK")
|
|
80
|
-
|
|
81
|
-
# Test HTML cleaning
|
|
82
|
-
html = "<html><body><h1>Test</h1></body></html>"
|
|
83
|
-
cleaned = await self.clean_html(html)
|
|
84
|
-
click.echo(f"✅ HTML cleaning: OK ({len(html)} → {len(cleaned)} chars)")
|
|
85
|
-
|
|
86
|
-
click.echo("✅ All tests passed!")
|
|
87
|
-
return True
|
|
88
|
-
|
|
89
|
-
except Exception as e:
|
|
90
|
-
click.echo(f"❌ Test failed: {e}", err=True)
|
|
91
|
-
return False
|
|
92
|
-
finally:
|
|
93
|
-
await self.cleanup()
|
|
94
|
-
|
|
95
|
-
async def run_quick_command(self, urls: List[str]) -> bool:
|
|
96
|
-
"""Run quick parse command."""
|
|
97
|
-
try:
|
|
98
|
-
click.echo(f"⚡ Quick parse of {len(urls)} URLs...")
|
|
99
|
-
|
|
100
|
-
await self.initialize()
|
|
101
|
-
results = []
|
|
102
|
-
for url in urls:
|
|
103
|
-
result = await self.parse_url(url)
|
|
104
|
-
results.append(result)
|
|
105
|
-
|
|
106
|
-
success_count = sum(1 for r in results if r.get("success") == "true")
|
|
107
|
-
click.echo(f"✅ Quick parse completed: {success_count}/{len(results)} URLs successful")
|
|
108
|
-
|
|
109
|
-
return success_count > 0
|
|
110
|
-
|
|
111
|
-
except Exception as e:
|
|
112
|
-
click.echo(f"❌ Quick parse error: {e}", err=True)
|
|
113
|
-
return False
|
|
114
|
-
finally:
|
|
115
|
-
await self.cleanup()
|
|
116
|
-
|
|
117
|
-
def show_status(self, config_data: Dict[str, Any]) -> None:
|
|
118
|
-
"""Show parser status."""
|
|
119
|
-
click.echo("📊 Parser Status")
|
|
120
|
-
click.echo("=" * 40)
|
|
121
|
-
click.echo(f"Parser Name: {self.config.parser_name}")
|
|
122
|
-
click.echo(f"Parser Type: {self.config.parser_type}")
|
|
123
|
-
click.echo(f"System Dir: {self.config.system_dir}")
|
|
124
|
-
click.echo(f"Bridge: {'Enabled' if self.config.bridge_enabled else 'Disabled'}")
|
|
125
|
-
if self.config.bridge_enabled:
|
|
126
|
-
click.echo(f" URL: {self.config.parser_config.websocket_url} (auto-detected)")
|
|
127
|
-
|
|
128
|
-
@staticmethod
|
|
129
|
-
def create_config_file(config_path: Path, create_func) -> None:
|
|
130
|
-
"""Create configuration file."""
|
|
131
|
-
try:
|
|
132
|
-
create_func(config_path)
|
|
133
|
-
click.echo(f"✅ Configuration file created: {config_path}")
|
|
134
|
-
click.echo(" Edit the file to customize your parser settings")
|
|
135
|
-
except Exception as e:
|
|
136
|
-
click.echo(f"❌ Failed to create configuration: {e}", err=True)
|
|
137
|
-
|
|
138
|
-
@staticmethod
|
|
139
|
-
def run_async_command(coro):
|
|
140
|
-
"""Helper to run async command and exit with proper code."""
|
|
141
|
-
success = asyncio.run(coro)
|
|
142
|
-
sys.exit(0 if success else 1)
|
|
@@ -1,403 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Daemon Manager - Base class for parser daemons
|
|
3
|
-
|
|
4
|
-
Strict Pydantic v2 compliance and type safety
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import asyncio
|
|
8
|
-
import signal
|
|
9
|
-
import time
|
|
10
|
-
from datetime import datetime, timedelta
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Optional, Callable, Awaitable
|
|
13
|
-
from pydantic import BaseModel, Field
|
|
14
|
-
|
|
15
|
-
from .parser_manager import ParserManager, ParserManagerConfig
|
|
16
|
-
from .managers import ParserConfig
|
|
17
|
-
from unrealon_driver.models import (
|
|
18
|
-
RegistrationMessage, CommandMessage, CommandResponseMessage,
|
|
19
|
-
StatusMessage, HeartbeatMessage, MessageType,
|
|
20
|
-
BridgeRegistrationMessage, BridgeRegistrationPayload
|
|
21
|
-
)
|
|
22
|
-
from unrealon_driver.html_analyzer import HTMLCleaningConfig
|
|
23
|
-
from unrealon_driver.websocket import WebSocketClient, WebSocketConfig
|
|
24
|
-
from unrealon_browser.dto.models.config import BrowserConfig
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class DaemonStatus(BaseModel):
|
|
28
|
-
"""Daemon status information."""
|
|
29
|
-
|
|
30
|
-
running: bool = Field(..., description="Whether daemon is running")
|
|
31
|
-
parser_id: str = Field(..., description="Parser identifier")
|
|
32
|
-
started_at: datetime = Field(..., description="Daemon start time")
|
|
33
|
-
uptime_seconds: float = Field(..., description="Uptime in seconds")
|
|
34
|
-
schedule_enabled: bool = Field(default=False, description="Whether scheduling is active")
|
|
35
|
-
next_run_at: Optional[datetime] = Field(default=None, description="Next scheduled run")
|
|
36
|
-
total_runs: int = Field(default=0, description="Total completed runs")
|
|
37
|
-
successful_runs: int = Field(default=0, description="Successful runs")
|
|
38
|
-
failed_runs: int = Field(default=0, description="Failed runs")
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class DaemonManager(ParserManager):
|
|
42
|
-
"""Base daemon manager with scheduling and status display."""
|
|
43
|
-
|
|
44
|
-
def __init__(self, parser_name: str, parser_type: str, system_dir: str, bridge_enabled: bool = False):
|
|
45
|
-
# Create parser config
|
|
46
|
-
parser_config = ParserConfig(parser_name=parser_name, parser_type=parser_type, system_dir=Path(system_dir))
|
|
47
|
-
|
|
48
|
-
# Create configs
|
|
49
|
-
html_config = HTMLCleaningConfig()
|
|
50
|
-
|
|
51
|
-
# Create manager config
|
|
52
|
-
manager_config = ParserManagerConfig(
|
|
53
|
-
parser_config=parser_config,
|
|
54
|
-
html_config=html_config,
|
|
55
|
-
bridge_enabled=bridge_enabled,
|
|
56
|
-
console_enabled=True,
|
|
57
|
-
file_enabled=True
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
super().__init__(manager_config)
|
|
61
|
-
|
|
62
|
-
# Daemon state
|
|
63
|
-
self.running = False
|
|
64
|
-
self.started_at: Optional[datetime] = None
|
|
65
|
-
self.next_run_at: Optional[datetime] = None
|
|
66
|
-
|
|
67
|
-
# Statistics
|
|
68
|
-
self.total_runs = 0
|
|
69
|
-
self.successful_runs = 0
|
|
70
|
-
self.failed_runs = 0
|
|
71
|
-
|
|
72
|
-
# Setup signal handlers
|
|
73
|
-
signal.signal(signal.SIGINT, self._signal_handler)
|
|
74
|
-
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
75
|
-
|
|
76
|
-
# WebSocket bridge connection
|
|
77
|
-
self.bridge_enabled = bridge_enabled
|
|
78
|
-
self.websocket_client: Optional[WebSocketClient] = None
|
|
79
|
-
|
|
80
|
-
# Registration status
|
|
81
|
-
self.registered = False
|
|
82
|
-
|
|
83
|
-
# Command handlers registry
|
|
84
|
-
self.command_handlers: dict[str, Callable[[dict[str, str]], Awaitable[dict[str, str]]]] = {}
|
|
85
|
-
|
|
86
|
-
# Register built-in commands
|
|
87
|
-
self._register_builtin_commands()
|
|
88
|
-
|
|
89
|
-
def _signal_handler(self, signum: int, frame) -> None:
|
|
90
|
-
"""Handle shutdown signals."""
|
|
91
|
-
self.logger.info(f"🛑 Received signal {signum}, shutting down...")
|
|
92
|
-
self.running = False
|
|
93
|
-
|
|
94
|
-
# RPC methods removed - commands handled through WebSocket bridge
|
|
95
|
-
|
|
96
|
-
async def start_daemon(self, schedule_enabled: bool = False, interval_minutes: Optional[int] = None) -> bool:
|
|
97
|
-
"""Start the daemon."""
|
|
98
|
-
try:
|
|
99
|
-
self.logger.info("🚀 Starting daemon...")
|
|
100
|
-
self.running = True
|
|
101
|
-
self.started_at = datetime.now()
|
|
102
|
-
|
|
103
|
-
# Initialize parser
|
|
104
|
-
await self.initialize()
|
|
105
|
-
|
|
106
|
-
# Connect to WebSocket bridge
|
|
107
|
-
if self.bridge_enabled:
|
|
108
|
-
bridge_connected = await self._connect_to_bridge()
|
|
109
|
-
if not bridge_connected:
|
|
110
|
-
self.logger.warning("⚠️ Failed to connect to bridge, continuing without WebSocket commands")
|
|
111
|
-
else:
|
|
112
|
-
# Register daemon with bridge server
|
|
113
|
-
self.logger.info("🔗 Attempting to register with bridge server...")
|
|
114
|
-
registration_success = await self._register_with_bridge()
|
|
115
|
-
if not registration_success:
|
|
116
|
-
self.logger.warning("⚠️ Failed to register with bridge server")
|
|
117
|
-
|
|
118
|
-
# Calculate next run if scheduling enabled
|
|
119
|
-
if schedule_enabled and interval_minutes:
|
|
120
|
-
self._calculate_next_run(interval_minutes)
|
|
121
|
-
|
|
122
|
-
# Start main loop
|
|
123
|
-
await self._daemon_loop(schedule_enabled, interval_minutes)
|
|
124
|
-
|
|
125
|
-
return True
|
|
126
|
-
|
|
127
|
-
except Exception as e:
|
|
128
|
-
self.logger.error(f"❌ Daemon startup failed: {e}")
|
|
129
|
-
return False
|
|
130
|
-
finally:
|
|
131
|
-
await self.cleanup()
|
|
132
|
-
|
|
133
|
-
def _calculate_next_run(self, interval_minutes: int) -> None:
|
|
134
|
-
"""Calculate next scheduled run time."""
|
|
135
|
-
now = datetime.now()
|
|
136
|
-
self.next_run_at = now + timedelta(minutes=interval_minutes)
|
|
137
|
-
|
|
138
|
-
async def _daemon_loop(self, schedule_enabled: bool, interval_minutes: Optional[int]) -> None:
|
|
139
|
-
"""Main daemon loop."""
|
|
140
|
-
self.logger.info("🔄 Daemon loop started")
|
|
141
|
-
|
|
142
|
-
if schedule_enabled and self.next_run_at:
|
|
143
|
-
self.logger.info(f"⏰ Next run: {self.next_run_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
144
|
-
else:
|
|
145
|
-
self.logger.info("📋 Manual mode")
|
|
146
|
-
|
|
147
|
-
last_status_update = time.time()
|
|
148
|
-
|
|
149
|
-
while self.running:
|
|
150
|
-
try:
|
|
151
|
-
current_time = time.time()
|
|
152
|
-
|
|
153
|
-
# Update status every second
|
|
154
|
-
if current_time - last_status_update >= 1.0:
|
|
155
|
-
self._display_status(schedule_enabled)
|
|
156
|
-
last_status_update = current_time
|
|
157
|
-
|
|
158
|
-
# Check for scheduled run
|
|
159
|
-
if self._should_run_now():
|
|
160
|
-
await self._execute_run()
|
|
161
|
-
if interval_minutes:
|
|
162
|
-
self._calculate_next_run(interval_minutes)
|
|
163
|
-
|
|
164
|
-
await asyncio.sleep(0.1)
|
|
165
|
-
|
|
166
|
-
except Exception as e:
|
|
167
|
-
self.logger.error(f"❌ Daemon loop error: {e}")
|
|
168
|
-
await asyncio.sleep(1)
|
|
169
|
-
|
|
170
|
-
def _display_status(self, schedule_enabled: bool) -> None:
|
|
171
|
-
"""Display live status."""
|
|
172
|
-
if not self.running:
|
|
173
|
-
return
|
|
174
|
-
|
|
175
|
-
# Clear previous lines
|
|
176
|
-
print("\033[2K\033[1A" * 3, end="")
|
|
177
|
-
|
|
178
|
-
now = datetime.now()
|
|
179
|
-
uptime = (now - self.started_at).total_seconds() if self.started_at else 0
|
|
180
|
-
|
|
181
|
-
print(f"🕐 {now.strftime('%H:%M:%S')} | ⏱️ Uptime: {int(uptime//3600):02d}:{int((uptime%3600)//60):02d}:{int(uptime%60):02d}")
|
|
182
|
-
|
|
183
|
-
# Schedule status
|
|
184
|
-
if self.next_run_at and schedule_enabled:
|
|
185
|
-
seconds_until = (self.next_run_at - now).total_seconds()
|
|
186
|
-
if seconds_until > 0:
|
|
187
|
-
hours = int(seconds_until // 3600)
|
|
188
|
-
minutes = int((seconds_until % 3600) // 60)
|
|
189
|
-
seconds = int(seconds_until % 60)
|
|
190
|
-
print(f"⏰ Next run in: {hours:02d}:{minutes:02d}:{seconds:02d} | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
191
|
-
else:
|
|
192
|
-
print(f"🚀 Running now... | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
193
|
-
else:
|
|
194
|
-
print(f"📋 Manual mode | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
195
|
-
|
|
196
|
-
status = "🟢 RUNNING" if self.running else "🔴 STOPPED"
|
|
197
|
-
print(f"{status} | 💾 System: {self.config.parser_config.system_dir}")
|
|
198
|
-
|
|
199
|
-
def _should_run_now(self) -> bool:
|
|
200
|
-
"""Check if should run now."""
|
|
201
|
-
if not self.next_run_at:
|
|
202
|
-
return False
|
|
203
|
-
return datetime.now() >= self.next_run_at
|
|
204
|
-
|
|
205
|
-
async def _execute_run(self) -> None:
|
|
206
|
-
"""Execute a parsing run - override in subclass."""
|
|
207
|
-
self.logger.info("🚀 Starting parsing run...")
|
|
208
|
-
|
|
209
|
-
try:
|
|
210
|
-
# Default implementation - override in subclass
|
|
211
|
-
result = await self.parse_url("https://example.com")
|
|
212
|
-
|
|
213
|
-
self.total_runs += 1
|
|
214
|
-
|
|
215
|
-
if result.get("success") == "true":
|
|
216
|
-
self.successful_runs += 1
|
|
217
|
-
self.logger.info("✅ Run completed successfully")
|
|
218
|
-
else:
|
|
219
|
-
self.failed_runs += 1
|
|
220
|
-
self.logger.error("❌ Run failed")
|
|
221
|
-
|
|
222
|
-
except Exception as e:
|
|
223
|
-
self.failed_runs += 1
|
|
224
|
-
self.logger.error(f"❌ Run exception: {e}")
|
|
225
|
-
|
|
226
|
-
def get_status(self) -> DaemonStatus:
|
|
227
|
-
"""Get daemon status."""
|
|
228
|
-
now = datetime.now()
|
|
229
|
-
uptime = (now - self.started_at).total_seconds() if self.started_at else 0
|
|
230
|
-
|
|
231
|
-
return DaemonStatus(
|
|
232
|
-
running=self.running,
|
|
233
|
-
parser_id=self.config.parser_config.parser_name,
|
|
234
|
-
started_at=self.started_at or now,
|
|
235
|
-
uptime_seconds=uptime,
|
|
236
|
-
schedule_enabled=bool(self.next_run_at),
|
|
237
|
-
next_run_at=self.next_run_at,
|
|
238
|
-
total_runs=self.total_runs,
|
|
239
|
-
successful_runs=self.successful_runs,
|
|
240
|
-
failed_runs=self.failed_runs,
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
async def cleanup(self):
|
|
244
|
-
"""Cleanup daemon resources."""
|
|
245
|
-
# Disconnect from bridge
|
|
246
|
-
await self._disconnect_from_bridge()
|
|
247
|
-
|
|
248
|
-
# Parent cleanup
|
|
249
|
-
await super().cleanup()
|
|
250
|
-
|
|
251
|
-
# ==========================================
|
|
252
|
-
# WEBSOCKET BRIDGE MANAGEMENT
|
|
253
|
-
# ==========================================
|
|
254
|
-
|
|
255
|
-
async def _connect_to_bridge(self) -> bool:
|
|
256
|
-
"""Connect to WebSocket bridge server."""
|
|
257
|
-
if not self.bridge_enabled:
|
|
258
|
-
return True
|
|
259
|
-
|
|
260
|
-
try:
|
|
261
|
-
self.logger.info(f"🔌 Connecting to bridge: {self.config.parser_config.websocket_url}")
|
|
262
|
-
|
|
263
|
-
# Create WebSocket config
|
|
264
|
-
ws_config = WebSocketConfig(
|
|
265
|
-
url=self.config.parser_config.websocket_url,
|
|
266
|
-
parser_id=self.config.parser_config.parser_name,
|
|
267
|
-
reconnect_interval=5.0,
|
|
268
|
-
max_reconnect_attempts=10
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
# Create and connect WebSocket client
|
|
272
|
-
self.websocket_client = WebSocketClient(ws_config)
|
|
273
|
-
|
|
274
|
-
# Add command handler
|
|
275
|
-
self.websocket_client.add_message_handler("command", self._handle_websocket_command)
|
|
276
|
-
|
|
277
|
-
success = await self.websocket_client.connect()
|
|
278
|
-
if success:
|
|
279
|
-
self.logger.info("✅ Connected to bridge server")
|
|
280
|
-
return True
|
|
281
|
-
else:
|
|
282
|
-
self.logger.error("❌ Failed to connect to bridge server")
|
|
283
|
-
return False
|
|
284
|
-
|
|
285
|
-
except Exception as e:
|
|
286
|
-
self.logger.error(f"❌ Failed to connect to bridge: {e}")
|
|
287
|
-
return False
|
|
288
|
-
|
|
289
|
-
async def _register_with_bridge(self) -> bool:
|
|
290
|
-
"""Register daemon with bridge server via WebSocket."""
|
|
291
|
-
if not self.websocket_client or not self.websocket_client.connected:
|
|
292
|
-
self.logger.warning("⚠️ Cannot register - WebSocket not connected")
|
|
293
|
-
return False
|
|
294
|
-
|
|
295
|
-
try:
|
|
296
|
-
# Create registration message using Pydantic models
|
|
297
|
-
payload = BridgeRegistrationPayload(
|
|
298
|
-
client_type="daemon",
|
|
299
|
-
parser_id=self.config.parser_config.parser_name,
|
|
300
|
-
version="1.0.0",
|
|
301
|
-
capabilities=["parse", "search", "status", "health"]
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
registration_message = BridgeRegistrationMessage(payload=payload)
|
|
305
|
-
|
|
306
|
-
success = await self.websocket_client.send_message(registration_message.model_dump())
|
|
307
|
-
if success:
|
|
308
|
-
self.registered = True
|
|
309
|
-
self.logger.info(f"✅ Registered daemon with bridge server: {self.config.parser_config.parser_name}")
|
|
310
|
-
return True
|
|
311
|
-
else:
|
|
312
|
-
self.logger.error("❌ Failed to send registration message")
|
|
313
|
-
return False
|
|
314
|
-
|
|
315
|
-
except Exception as e:
|
|
316
|
-
self.logger.error(f"❌ Failed to register with bridge: {e}")
|
|
317
|
-
return False
|
|
318
|
-
|
|
319
|
-
async def _disconnect_from_bridge(self):
|
|
320
|
-
"""Disconnect from WebSocket bridge."""
|
|
321
|
-
if self.websocket_client:
|
|
322
|
-
try:
|
|
323
|
-
await self.websocket_client.disconnect()
|
|
324
|
-
self.logger.info("🔌 Disconnected from bridge")
|
|
325
|
-
except Exception as e:
|
|
326
|
-
self.logger.error(f"❌ Error disconnecting from bridge: {e}")
|
|
327
|
-
finally:
|
|
328
|
-
self.websocket_client = None
|
|
329
|
-
self.registered = False
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
async def _handle_websocket_command(self, message_data: dict[str, str]):
|
|
334
|
-
"""Handle incoming WebSocket command."""
|
|
335
|
-
try:
|
|
336
|
-
# Parse command message using Pydantic model
|
|
337
|
-
command_msg = CommandMessage.model_validate(message_data)
|
|
338
|
-
|
|
339
|
-
self.logger.info(f"📨 Received command: {command_msg.command_type} (id: {command_msg.command_id})")
|
|
340
|
-
|
|
341
|
-
# Find and execute command handler
|
|
342
|
-
if command_msg.command_type in self.command_handlers:
|
|
343
|
-
result = await self.command_handlers[command_msg.command_type](command_msg.parameters)
|
|
344
|
-
|
|
345
|
-
# Send success response using Pydantic model
|
|
346
|
-
response = CommandResponseMessage(
|
|
347
|
-
command_id=command_msg.command_id,
|
|
348
|
-
success=True,
|
|
349
|
-
result_data=result
|
|
350
|
-
)
|
|
351
|
-
await self.websocket_client.send_message(response.model_dump())
|
|
352
|
-
self.logger.info(f"✅ Command {command_msg.command_type} completed")
|
|
353
|
-
|
|
354
|
-
else:
|
|
355
|
-
raise ValueError(f"Unknown command type: {command_msg.command_type}")
|
|
356
|
-
|
|
357
|
-
except Exception as e:
|
|
358
|
-
self.logger.error(f"❌ Command failed: {e}")
|
|
359
|
-
|
|
360
|
-
# Send error response using Pydantic model
|
|
361
|
-
command_id = message_data.get("command_id", "unknown")
|
|
362
|
-
response = CommandResponseMessage(
|
|
363
|
-
command_id=command_id,
|
|
364
|
-
success=False,
|
|
365
|
-
error=str(e)
|
|
366
|
-
)
|
|
367
|
-
await self.websocket_client.send_message(response.model_dump())
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
# ==========================================
|
|
372
|
-
# COMMAND SYSTEM
|
|
373
|
-
# ==========================================
|
|
374
|
-
|
|
375
|
-
def register_command(self, command_type: str, handler: Callable[[dict[str, str]], Awaitable[dict[str, str]]]):
|
|
376
|
-
"""Register a command handler."""
|
|
377
|
-
self.command_handlers[command_type] = handler
|
|
378
|
-
self.logger.info(f"🔧 Registered command handler: {command_type}")
|
|
379
|
-
|
|
380
|
-
def _register_builtin_commands(self):
|
|
381
|
-
"""Register built-in command handlers."""
|
|
382
|
-
self.register_command("status", self._handle_status_command)
|
|
383
|
-
self.register_command("health", self._handle_health_command)
|
|
384
|
-
|
|
385
|
-
async def _handle_status_command(self, parameters: dict[str, str]) -> dict[str, str]:
|
|
386
|
-
"""Built-in status command handler."""
|
|
387
|
-
status = self.get_status()
|
|
388
|
-
return {
|
|
389
|
-
"command_type": "status",
|
|
390
|
-
"running": str(status.running),
|
|
391
|
-
"uptime_seconds": str(status.uptime_seconds),
|
|
392
|
-
"total_runs": str(status.total_runs),
|
|
393
|
-
"successful_runs": str(status.successful_runs),
|
|
394
|
-
"failed_runs": str(status.failed_runs)
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
async def _handle_health_command(self, parameters: dict[str, str]) -> dict[str, str]:
|
|
398
|
-
"""Built-in health command handler."""
|
|
399
|
-
return {
|
|
400
|
-
"command_type": "health",
|
|
401
|
-
"status": "healthy",
|
|
402
|
-
"bridge_connected": str(self.websocket_client.connected if self.websocket_client else False)
|
|
403
|
-
}
|