unrealon 1.1.1__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +16 -6
- unrealon-1.1.5.dist-info/METADATA +621 -0
- unrealon-1.1.5.dist-info/RECORD +54 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/entry_points.txt +1 -1
- unrealon_browser/__init__.py +3 -6
- unrealon_browser/core/browser_manager.py +86 -84
- unrealon_browser/dto/models/config.py +2 -0
- unrealon_browser/managers/captcha.py +165 -185
- unrealon_browser/managers/cookies.py +57 -28
- unrealon_browser/managers/logger_bridge.py +94 -34
- unrealon_browser/managers/profile.py +186 -158
- unrealon_browser/managers/stealth.py +58 -47
- unrealon_driver/__init__.py +8 -21
- unrealon_driver/exceptions.py +5 -0
- unrealon_driver/html_analyzer/__init__.py +32 -0
- unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
- unrealon_driver/html_analyzer/config.py +64 -0
- unrealon_driver/html_analyzer/manager.py +247 -0
- unrealon_driver/html_analyzer/models.py +115 -0
- unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
- unrealon_driver/models/__init__.py +31 -0
- unrealon_driver/models/websocket.py +98 -0
- unrealon_driver/parser/__init__.py +4 -23
- unrealon_driver/parser/cli_manager.py +6 -5
- unrealon_driver/parser/daemon_manager.py +242 -66
- unrealon_driver/parser/managers/__init__.py +0 -21
- unrealon_driver/parser/managers/config.py +15 -3
- unrealon_driver/parser/parser_manager.py +225 -395
- unrealon_driver/smart_logging/__init__.py +24 -0
- unrealon_driver/smart_logging/models.py +44 -0
- unrealon_driver/smart_logging/smart_logger.py +406 -0
- unrealon_driver/smart_logging/unified_logger.py +525 -0
- unrealon_driver/websocket/__init__.py +31 -0
- unrealon_driver/websocket/client.py +249 -0
- unrealon_driver/websocket/config.py +188 -0
- unrealon_driver/websocket/manager.py +90 -0
- unrealon-1.1.1.dist-info/METADATA +0 -722
- unrealon-1.1.1.dist-info/RECORD +0 -82
- unrealon_bridge/__init__.py +0 -114
- unrealon_bridge/cli.py +0 -316
- unrealon_bridge/client/__init__.py +0 -93
- unrealon_bridge/client/base.py +0 -78
- unrealon_bridge/client/commands.py +0 -89
- unrealon_bridge/client/connection.py +0 -90
- unrealon_bridge/client/events.py +0 -65
- unrealon_bridge/client/health.py +0 -38
- unrealon_bridge/client/html_parser.py +0 -146
- unrealon_bridge/client/logging.py +0 -139
- unrealon_bridge/client/proxy.py +0 -70
- unrealon_bridge/client/scheduler.py +0 -450
- unrealon_bridge/client/session.py +0 -70
- unrealon_bridge/configs/__init__.py +0 -14
- unrealon_bridge/configs/bridge_config.py +0 -212
- unrealon_bridge/configs/bridge_config.yaml +0 -39
- unrealon_bridge/models/__init__.py +0 -138
- unrealon_bridge/models/base.py +0 -28
- unrealon_bridge/models/command.py +0 -41
- unrealon_bridge/models/events.py +0 -40
- unrealon_bridge/models/html_parser.py +0 -79
- unrealon_bridge/models/logging.py +0 -55
- unrealon_bridge/models/parser.py +0 -63
- unrealon_bridge/models/proxy.py +0 -41
- unrealon_bridge/models/requests.py +0 -95
- unrealon_bridge/models/responses.py +0 -88
- unrealon_bridge/models/scheduler.py +0 -592
- unrealon_bridge/models/session.py +0 -28
- unrealon_bridge/server/__init__.py +0 -91
- unrealon_bridge/server/base.py +0 -171
- unrealon_bridge/server/handlers/__init__.py +0 -23
- unrealon_bridge/server/handlers/command.py +0 -110
- unrealon_bridge/server/handlers/html_parser.py +0 -139
- unrealon_bridge/server/handlers/logging.py +0 -95
- unrealon_bridge/server/handlers/parser.py +0 -95
- unrealon_bridge/server/handlers/proxy.py +0 -75
- unrealon_bridge/server/handlers/scheduler.py +0 -545
- unrealon_bridge/server/handlers/session.py +0 -66
- unrealon_driver/browser/__init__.py +0 -8
- unrealon_driver/browser/config.py +0 -74
- unrealon_driver/browser/manager.py +0 -416
- unrealon_driver/parser/managers/browser.py +0 -51
- unrealon_driver/parser/managers/logging.py +0 -609
- {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/WHEEL +0 -0
- {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Command execution for Parser Bridge Client.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import Optional
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from unrealon_rpc.logging import get_logger
|
|
9
|
-
|
|
10
|
-
from ..models import (
|
|
11
|
-
CommandResult, CommandExecuteRequest, CommandExecuteResponse
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
logger = get_logger(__name__)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class CommandsMixin:
|
|
18
|
-
"""Mixin for command execution functionality."""
|
|
19
|
-
|
|
20
|
-
async def execute_command(self, command_type: str, parameters: Optional[dict[str, str]] = None, timeout: int = 300) -> CommandResult:
|
|
21
|
-
"""
|
|
22
|
-
Execute a parser command.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
command_type: Type of command to execute
|
|
26
|
-
parameters: Command parameters
|
|
27
|
-
timeout: Command timeout in seconds
|
|
28
|
-
|
|
29
|
-
Returns:
|
|
30
|
-
Command execution result
|
|
31
|
-
"""
|
|
32
|
-
self._ensure_registered()
|
|
33
|
-
|
|
34
|
-
request = CommandExecuteRequest(
|
|
35
|
-
command_type=command_type,
|
|
36
|
-
parser_id=self.parser_id,
|
|
37
|
-
parameters=parameters or {},
|
|
38
|
-
timeout=timeout
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
started_at = datetime.now()
|
|
42
|
-
|
|
43
|
-
try:
|
|
44
|
-
result = await self.bridge_client.call_rpc(
|
|
45
|
-
method="parser.execute_command",
|
|
46
|
-
params=request.model_dump(),
|
|
47
|
-
timeout=timeout
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
response = CommandExecuteResponse.model_validate(result)
|
|
51
|
-
|
|
52
|
-
if response.success and response.result:
|
|
53
|
-
execution_time = (datetime.now() - started_at).total_seconds()
|
|
54
|
-
|
|
55
|
-
# Log command completion
|
|
56
|
-
await self._log_command_event(
|
|
57
|
-
event_type="command_completed",
|
|
58
|
-
message=f"Command {command_type} completed",
|
|
59
|
-
data={"command_type": command_type, "execution_time": str(execution_time)}
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
return response.result
|
|
63
|
-
else:
|
|
64
|
-
raise RuntimeError(f"Command execution failed: {response.error}")
|
|
65
|
-
|
|
66
|
-
except Exception as e:
|
|
67
|
-
execution_time = (datetime.now() - started_at).total_seconds()
|
|
68
|
-
|
|
69
|
-
# Create error result
|
|
70
|
-
command_result = CommandResult(
|
|
71
|
-
command_id=str(uuid.uuid4()),
|
|
72
|
-
success=False,
|
|
73
|
-
error_message=str(e),
|
|
74
|
-
execution_time=execution_time
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
# Log command error
|
|
78
|
-
await self._log_command_event(
|
|
79
|
-
event_type="command_failed",
|
|
80
|
-
level="error",
|
|
81
|
-
message=f"Command {command_type} failed: {str(e)}"
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
return command_result
|
|
85
|
-
|
|
86
|
-
async def _log_command_event(self, event_type: str, message: str, level: str = "info", data: Optional[dict[str, str]] = None) -> None:
|
|
87
|
-
"""Log command-related event."""
|
|
88
|
-
# Event logging will be handled by the main client
|
|
89
|
-
pass
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Connection and registration management for Parser Bridge Client.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import Optional
|
|
7
|
-
from unrealon_rpc.logging import get_logger
|
|
8
|
-
|
|
9
|
-
from ..models import (
|
|
10
|
-
ParserInfo, ParserRegisterRequest, ParserRegisterResponse
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
logger = get_logger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ConnectionMixin:
|
|
17
|
-
"""Mixin for connection and registration functionality."""
|
|
18
|
-
|
|
19
|
-
async def connect(self) -> None:
|
|
20
|
-
"""Connect to bridge and register parser."""
|
|
21
|
-
await self.bridge_client.connect()
|
|
22
|
-
# Auto-register parser after connection
|
|
23
|
-
await self.register_parser()
|
|
24
|
-
|
|
25
|
-
async def disconnect(self) -> None:
|
|
26
|
-
"""Disconnect from bridge."""
|
|
27
|
-
if self.session_id:
|
|
28
|
-
# Session cleanup will be handled by the main client
|
|
29
|
-
pass
|
|
30
|
-
|
|
31
|
-
await self.bridge_client.disconnect()
|
|
32
|
-
|
|
33
|
-
async def register_parser(self, metadata: Optional[dict[str, str]] = None) -> ParserInfo:
|
|
34
|
-
"""
|
|
35
|
-
Register parser with the system.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
metadata: Additional parser metadata
|
|
39
|
-
|
|
40
|
-
Returns:
|
|
41
|
-
Parser registration information
|
|
42
|
-
"""
|
|
43
|
-
request = ParserRegisterRequest(
|
|
44
|
-
parser_id=str(uuid.uuid4()),
|
|
45
|
-
parser_type=self.parser_type,
|
|
46
|
-
version=self.parser_version,
|
|
47
|
-
capabilities=self.capabilities,
|
|
48
|
-
metadata=metadata
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
# Prepare params with API key
|
|
52
|
-
params = request.model_dump()
|
|
53
|
-
if hasattr(self, 'api_key') and self.api_key:
|
|
54
|
-
params["api_key"] = self.api_key
|
|
55
|
-
logger.info(f"🔑 Adding API key to params: {self.api_key[:8]}...")
|
|
56
|
-
else:
|
|
57
|
-
logger.warning(f"⚠️ No API key available! hasattr: {hasattr(self, 'api_key')}, api_key: {getattr(self, 'api_key', 'NOT_FOUND')}")
|
|
58
|
-
|
|
59
|
-
result = await self.bridge_client.call_rpc(
|
|
60
|
-
method="parser.register",
|
|
61
|
-
params=params
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Parse response using typed model
|
|
65
|
-
response = ParserRegisterResponse.model_validate(result)
|
|
66
|
-
|
|
67
|
-
if response.success:
|
|
68
|
-
self.parser_id = response.parser_id or request.parser_id
|
|
69
|
-
self.registered = True
|
|
70
|
-
|
|
71
|
-
logger.info(f"Parser registered: {self.parser_id} ({self.parser_type})")
|
|
72
|
-
|
|
73
|
-
return ParserInfo(
|
|
74
|
-
parser_id=self.parser_id,
|
|
75
|
-
parser_type=self.parser_type,
|
|
76
|
-
version=self.parser_version,
|
|
77
|
-
capabilities=self.capabilities,
|
|
78
|
-
metadata=metadata or {}
|
|
79
|
-
)
|
|
80
|
-
else:
|
|
81
|
-
raise RuntimeError(f"Parser registration failed: {response.error}")
|
|
82
|
-
|
|
83
|
-
async def __aenter__(self):
|
|
84
|
-
"""Async context manager entry."""
|
|
85
|
-
await self.connect()
|
|
86
|
-
return self
|
|
87
|
-
|
|
88
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
89
|
-
"""Async context manager exit."""
|
|
90
|
-
await self.disconnect()
|
unrealon_bridge/client/events.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Event logging and heartbeat for Parser Bridge Client.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import Optional
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from unrealon_rpc.logging import get_logger
|
|
9
|
-
|
|
10
|
-
from ..models import ParserEvent, ParserStats
|
|
11
|
-
|
|
12
|
-
logger = get_logger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class EventsMixin:
|
|
16
|
-
"""Mixin for event logging and heartbeat functionality."""
|
|
17
|
-
|
|
18
|
-
async def log_event(self, event_type: str, message: str, level: str = "info", data: Optional[dict[str, str]] = None, command_id: Optional[str] = None) -> None:
|
|
19
|
-
"""
|
|
20
|
-
Log parser event.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
event_type: Type of event
|
|
24
|
-
message: Event message
|
|
25
|
-
level: Log level
|
|
26
|
-
data: Additional event data (string values only)
|
|
27
|
-
command_id: Associated command ID
|
|
28
|
-
"""
|
|
29
|
-
if not self.registered:
|
|
30
|
-
return
|
|
31
|
-
|
|
32
|
-
event = ParserEvent(
|
|
33
|
-
event_id=str(uuid.uuid4()),
|
|
34
|
-
parser_id=self.parser_id,
|
|
35
|
-
event_type=event_type,
|
|
36
|
-
level=level,
|
|
37
|
-
message=message,
|
|
38
|
-
data=data or {},
|
|
39
|
-
session_id=self.session_id,
|
|
40
|
-
command_id=command_id
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# Send event via PubSub
|
|
44
|
-
await self.bridge_client.publish("parser_events", event.model_dump())
|
|
45
|
-
|
|
46
|
-
async def send_heartbeat(self, stats: Optional[ParserStats] = None) -> None:
|
|
47
|
-
"""
|
|
48
|
-
Send parser heartbeat with optional stats.
|
|
49
|
-
|
|
50
|
-
Args:
|
|
51
|
-
stats: Parser statistics
|
|
52
|
-
"""
|
|
53
|
-
if not self.registered:
|
|
54
|
-
return
|
|
55
|
-
|
|
56
|
-
heartbeat_data = {
|
|
57
|
-
"parser_id": self.parser_id,
|
|
58
|
-
"timestamp": datetime.now().isoformat(),
|
|
59
|
-
"session_id": self.session_id or ""
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
if stats:
|
|
63
|
-
heartbeat_data["stats"] = stats.model_dump()
|
|
64
|
-
|
|
65
|
-
await self.bridge_client.send_heartbeat("alive", heartbeat_data)
|
unrealon_bridge/client/health.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Health monitoring for Parser Bridge Client.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from unrealon_rpc.logging import get_logger
|
|
6
|
-
|
|
7
|
-
from ..models import (
|
|
8
|
-
ParserHealth, ParserHealthRequest, ParserHealthResponse
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
logger = get_logger(__name__)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class HealthMixin:
|
|
15
|
-
"""Mixin for health monitoring functionality."""
|
|
16
|
-
|
|
17
|
-
async def get_health(self) -> ParserHealth:
|
|
18
|
-
"""
|
|
19
|
-
Get parser health information.
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
Parser health status
|
|
23
|
-
"""
|
|
24
|
-
self._ensure_registered()
|
|
25
|
-
|
|
26
|
-
request = ParserHealthRequest(parser_id=self.parser_id)
|
|
27
|
-
|
|
28
|
-
result = await self.bridge_client.call_rpc(
|
|
29
|
-
method="parser.get_health",
|
|
30
|
-
params=request.model_dump()
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
response = ParserHealthResponse.model_validate(result)
|
|
34
|
-
|
|
35
|
-
if response.success and response.health:
|
|
36
|
-
return response.health
|
|
37
|
-
else:
|
|
38
|
-
raise RuntimeError(f"Health check failed: {response.error}")
|
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
HTML Parser functionality for Parser Bridge Client.
|
|
3
|
-
|
|
4
|
-
Provides methods for sending HTML content to Django for AI-powered parsing.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from typing import Optional, Dict
|
|
8
|
-
from unrealon_rpc.logging import get_logger
|
|
9
|
-
|
|
10
|
-
from ..models import HTMLParseResult, HTMLParseRPCResponse, HTMLParseRPCRequest
|
|
11
|
-
|
|
12
|
-
logger = get_logger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class HTMLParserMixin:
|
|
16
|
-
"""Mixin for HTML parsing functionality."""
|
|
17
|
-
|
|
18
|
-
async def parse_html(
|
|
19
|
-
self,
|
|
20
|
-
html_content: str,
|
|
21
|
-
url: Optional[str] = None,
|
|
22
|
-
parse_type: str = "general",
|
|
23
|
-
instructions: Optional[str] = None,
|
|
24
|
-
timeout: int = 60,
|
|
25
|
-
metadata: Optional[Dict[str, str]] = None
|
|
26
|
-
) -> HTMLParseResult:
|
|
27
|
-
"""
|
|
28
|
-
Parse HTML content using AI/LLM via Django backend.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
html_content: Raw HTML content to parse
|
|
32
|
-
url: Source URL of the HTML (for context)
|
|
33
|
-
parse_type: Type of parsing (product, listing, article, etc.)
|
|
34
|
-
instructions: Additional parsing instructions for the LLM
|
|
35
|
-
timeout: Timeout in seconds (default 60s for LLM processing)
|
|
36
|
-
metadata: Additional metadata
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
HTMLParseResult with parsed data or error information
|
|
40
|
-
|
|
41
|
-
Example:
|
|
42
|
-
```python
|
|
43
|
-
# Parse product page HTML
|
|
44
|
-
result = await client.parse_html(
|
|
45
|
-
html_content="<html>...</html>",
|
|
46
|
-
url="https://encar.com/car/123456",
|
|
47
|
-
parse_type="car_product",
|
|
48
|
-
instructions="Extract car details, price, and specifications"
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
if result.success:
|
|
52
|
-
print(f"Parsed data: {result.parsed_data}")
|
|
53
|
-
print(f"Instructions: {result.markdown}")
|
|
54
|
-
else:
|
|
55
|
-
print(f"Parse failed: {result.error_message}")
|
|
56
|
-
```
|
|
57
|
-
"""
|
|
58
|
-
if not self.registered:
|
|
59
|
-
logger.warning("Cannot parse HTML - parser not registered")
|
|
60
|
-
return HTMLParseResult(
|
|
61
|
-
success=False,
|
|
62
|
-
error_message="Parser not registered"
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
logger.info(f"Parsing HTML content: {len(html_content)} chars, type: {parse_type}")
|
|
67
|
-
|
|
68
|
-
request = HTMLParseRPCRequest(
|
|
69
|
-
html_content=html_content,
|
|
70
|
-
parser_id=self.parser_id,
|
|
71
|
-
url=url,
|
|
72
|
-
parse_type=parse_type,
|
|
73
|
-
instructions=instructions,
|
|
74
|
-
timeout=timeout,
|
|
75
|
-
metadata=metadata or {}
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
response = await self.bridge_client.call_rpc(
|
|
79
|
-
method="html_parser.parse",
|
|
80
|
-
params=request.model_dump(),
|
|
81
|
-
timeout=timeout + 5 # Add buffer for network/processing
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
rpc_response = HTMLParseRPCResponse.model_validate(response)
|
|
85
|
-
|
|
86
|
-
if rpc_response.success and rpc_response.result:
|
|
87
|
-
logger.info(f"HTML parsing completed successfully")
|
|
88
|
-
return rpc_response.result
|
|
89
|
-
else:
|
|
90
|
-
error_msg = rpc_response.error or "Unknown parsing error"
|
|
91
|
-
logger.error(f"HTML parsing failed: {error_msg}")
|
|
92
|
-
return HTMLParseResult(
|
|
93
|
-
success=False,
|
|
94
|
-
error_message=error_msg
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
except Exception as e:
|
|
98
|
-
error_msg = f"HTML parsing request failed: {e}"
|
|
99
|
-
logger.error(error_msg)
|
|
100
|
-
return HTMLParseResult(
|
|
101
|
-
success=False,
|
|
102
|
-
error_message=error_msg
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
async def parse_html_with_retry(
|
|
106
|
-
self,
|
|
107
|
-
html_content: str,
|
|
108
|
-
max_retries: int = 3,
|
|
109
|
-
**kwargs
|
|
110
|
-
) -> HTMLParseResult:
|
|
111
|
-
"""
|
|
112
|
-
Parse HTML with automatic retry on failure.
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
html_content: Raw HTML content to parse
|
|
116
|
-
max_retries: Maximum number of retry attempts
|
|
117
|
-
**kwargs: Additional arguments passed to parse_html
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
HTMLParseResult with parsed data or error information
|
|
121
|
-
"""
|
|
122
|
-
last_error = None
|
|
123
|
-
|
|
124
|
-
for attempt in range(max_retries):
|
|
125
|
-
try:
|
|
126
|
-
result = await self.parse_html(html_content, **kwargs)
|
|
127
|
-
|
|
128
|
-
if result.success:
|
|
129
|
-
if attempt > 0:
|
|
130
|
-
logger.info(f"HTML parsing succeeded on attempt {attempt + 1}")
|
|
131
|
-
return result
|
|
132
|
-
else:
|
|
133
|
-
last_error = result.error_message
|
|
134
|
-
if attempt < max_retries - 1:
|
|
135
|
-
logger.warning(f"HTML parsing failed on attempt {attempt + 1}, retrying...")
|
|
136
|
-
|
|
137
|
-
except Exception as e:
|
|
138
|
-
last_error = str(e)
|
|
139
|
-
if attempt < max_retries - 1:
|
|
140
|
-
logger.warning(f"HTML parsing error on attempt {attempt + 1}, retrying: {e}")
|
|
141
|
-
|
|
142
|
-
logger.error(f"HTML parsing failed after {max_retries} attempts")
|
|
143
|
-
return HTMLParseResult(
|
|
144
|
-
success=False,
|
|
145
|
-
error_message=f"Failed after {max_retries} attempts. Last error: {last_error}"
|
|
146
|
-
)
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Parser logging functionality for Parser Bridge Client.
|
|
3
|
-
|
|
4
|
-
Provides methods for sending parser logs to Django via WebSocket/Redis.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from typing import Optional, Dict
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
from unrealon_rpc.logging import get_logger
|
|
10
|
-
|
|
11
|
-
from ..models import ParserLogEntry, ParserLogResponse
|
|
12
|
-
|
|
13
|
-
logger = get_logger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class LoggingMixin:
|
|
17
|
-
"""Mixin for parser logging functionality."""
|
|
18
|
-
|
|
19
|
-
async def send_log(
|
|
20
|
-
self,
|
|
21
|
-
level: str,
|
|
22
|
-
message: str,
|
|
23
|
-
session_id: Optional[str] = None,
|
|
24
|
-
command_id: Optional[str] = None,
|
|
25
|
-
url: Optional[str] = None,
|
|
26
|
-
operation: Optional[str] = None,
|
|
27
|
-
data: Optional[Dict[str, str]] = None,
|
|
28
|
-
error_details: Optional[str] = None
|
|
29
|
-
) -> bool:
|
|
30
|
-
"""
|
|
31
|
-
Send log entry to Django via WebSocket/Redis.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
|
35
|
-
message: Log message
|
|
36
|
-
session_id: Parser session ID (optional)
|
|
37
|
-
command_id: Command ID if related to command (optional)
|
|
38
|
-
url: URL being processed (optional)
|
|
39
|
-
operation: Operation being performed (optional)
|
|
40
|
-
data: Additional log data (optional)
|
|
41
|
-
error_details: Error details if error log (optional)
|
|
42
|
-
|
|
43
|
-
Returns:
|
|
44
|
-
True if log was sent successfully, False otherwise
|
|
45
|
-
|
|
46
|
-
Example:
|
|
47
|
-
```python
|
|
48
|
-
# Send info log
|
|
49
|
-
await client.send_log("INFO", "Started parsing product page")
|
|
50
|
-
|
|
51
|
-
# Send error log with details
|
|
52
|
-
await client.send_log(
|
|
53
|
-
"ERROR",
|
|
54
|
-
"Failed to parse product price",
|
|
55
|
-
url="https://example.com/product/123",
|
|
56
|
-
operation="price_extraction",
|
|
57
|
-
error_details="Price element not found in DOM"
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
# Send log with session context
|
|
61
|
-
await client.send_log(
|
|
62
|
-
"DEBUG",
|
|
63
|
-
"Processing page 5 of search results",
|
|
64
|
-
session_id=self.session_id,
|
|
65
|
-
operation="pagination"
|
|
66
|
-
)
|
|
67
|
-
```
|
|
68
|
-
"""
|
|
69
|
-
if not self.registered:
|
|
70
|
-
logger.warning("Cannot send log - parser not registered")
|
|
71
|
-
return False
|
|
72
|
-
|
|
73
|
-
try:
|
|
74
|
-
log_entry = ParserLogEntry(
|
|
75
|
-
parser_id=self.parser_id,
|
|
76
|
-
level=level.upper(),
|
|
77
|
-
message=message,
|
|
78
|
-
session_id=session_id,
|
|
79
|
-
command_id=command_id,
|
|
80
|
-
url=url,
|
|
81
|
-
operation=operation,
|
|
82
|
-
data=data or {},
|
|
83
|
-
error_details=error_details
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
response_dict = await self.bridge_client.call_rpc(
|
|
87
|
-
method="parser.log",
|
|
88
|
-
params=log_entry.model_dump()
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
response = ParserLogResponse.model_validate(response_dict)
|
|
92
|
-
|
|
93
|
-
if response.success:
|
|
94
|
-
logger.debug(f"Log sent to Django: {level} - {message}")
|
|
95
|
-
return True
|
|
96
|
-
else:
|
|
97
|
-
logger.error(f"Failed to send log to Django: {response.error}")
|
|
98
|
-
return False
|
|
99
|
-
|
|
100
|
-
except Exception as e:
|
|
101
|
-
logger.error(f"Log sending failed: {e}")
|
|
102
|
-
return False
|
|
103
|
-
|
|
104
|
-
async def log_debug(self, message: str, **kwargs) -> bool:
|
|
105
|
-
"""Send DEBUG level log."""
|
|
106
|
-
return await self.send_log("DEBUG", message, **kwargs)
|
|
107
|
-
|
|
108
|
-
async def log_info(self, message: str, **kwargs) -> bool:
|
|
109
|
-
"""Send INFO level log."""
|
|
110
|
-
return await self.send_log("INFO", message, **kwargs)
|
|
111
|
-
|
|
112
|
-
async def log_warning(self, message: str, **kwargs) -> bool:
|
|
113
|
-
"""Send WARNING level log."""
|
|
114
|
-
return await self.send_log("WARNING", message, **kwargs)
|
|
115
|
-
|
|
116
|
-
async def log_error(self, message: str, **kwargs) -> bool:
|
|
117
|
-
"""Send ERROR level log."""
|
|
118
|
-
return await self.send_log("ERROR", message, **kwargs)
|
|
119
|
-
|
|
120
|
-
async def log_critical(self, message: str, **kwargs) -> bool:
|
|
121
|
-
"""Send CRITICAL level log."""
|
|
122
|
-
return await self.send_log("CRITICAL", message, **kwargs)
|
|
123
|
-
|
|
124
|
-
async def log_operation_start(self, operation: str, **kwargs) -> bool:
|
|
125
|
-
"""Log the start of an operation."""
|
|
126
|
-
return await self.log_info(f"Started {operation}", operation=operation, **kwargs)
|
|
127
|
-
|
|
128
|
-
async def log_operation_end(self, operation: str, **kwargs) -> bool:
|
|
129
|
-
"""Log the end of an operation."""
|
|
130
|
-
return await self.log_info(f"Completed {operation}", operation=operation, **kwargs)
|
|
131
|
-
|
|
132
|
-
async def log_operation_error(self, operation: str, error: str, **kwargs) -> bool:
|
|
133
|
-
"""Log an operation error."""
|
|
134
|
-
return await self.log_error(
|
|
135
|
-
f"Failed {operation}: {error}",
|
|
136
|
-
operation=operation,
|
|
137
|
-
error_details=error,
|
|
138
|
-
**kwargs
|
|
139
|
-
)
|
unrealon_bridge/client/proxy.py
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Proxy management for Parser Bridge Client.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
from typing import Optional
|
|
6
|
-
from unrealon_rpc.logging import get_logger
|
|
7
|
-
|
|
8
|
-
from ..models import (
|
|
9
|
-
ProxyInfo, ProxyAllocateRequest, ProxyAllocateResponse,
|
|
10
|
-
ProxyReleaseRequest, ProxyReleaseResponse
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
logger = get_logger(__name__)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ProxyMixin:
|
|
17
|
-
"""Mixin for proxy management functionality."""
|
|
18
|
-
|
|
19
|
-
async def request_proxy(self, proxy_type: str = "http", location: Optional[str] = None) -> ProxyInfo:
|
|
20
|
-
"""
|
|
21
|
-
Request proxy allocation.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
proxy_type: Type of proxy needed
|
|
25
|
-
location: Preferred proxy location
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
Allocated proxy information
|
|
29
|
-
"""
|
|
30
|
-
self._ensure_registered()
|
|
31
|
-
|
|
32
|
-
request = ProxyAllocateRequest(
|
|
33
|
-
parser_id=self.parser_id,
|
|
34
|
-
proxy_type=proxy_type,
|
|
35
|
-
location=location
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
result = await self.bridge_client.call_rpc(
|
|
39
|
-
method="proxy.allocate",
|
|
40
|
-
params=request.model_dump()
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
response = ProxyAllocateResponse.model_validate(result)
|
|
44
|
-
|
|
45
|
-
if response.success and response.proxy:
|
|
46
|
-
logger.info(f"Proxy allocated: {response.proxy.proxy_id} for {self.parser_id}")
|
|
47
|
-
return response.proxy
|
|
48
|
-
else:
|
|
49
|
-
raise RuntimeError(f"Proxy allocation failed: {response.error}")
|
|
50
|
-
|
|
51
|
-
async def release_proxy(self, proxy_id: str) -> None:
|
|
52
|
-
"""
|
|
53
|
-
Release proxy allocation.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
proxy_id: Proxy ID to release
|
|
57
|
-
"""
|
|
58
|
-
request = ProxyReleaseRequest(proxy_id=proxy_id)
|
|
59
|
-
|
|
60
|
-
result = await self.bridge_client.call_rpc(
|
|
61
|
-
method="proxy.release",
|
|
62
|
-
params=request.model_dump()
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
response = ProxyReleaseResponse.model_validate(result)
|
|
66
|
-
|
|
67
|
-
if response.success:
|
|
68
|
-
logger.info(f"Proxy released: {proxy_id}")
|
|
69
|
-
else:
|
|
70
|
-
logger.error(f"Proxy release failed: {response.error}")
|