unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.4.dist-info/METADATA +658 -0
  3. unrealon-1.1.4.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,171 +0,0 @@
1
- """
2
- Base Parser Bridge Server.
3
-
4
- Core server functionality and state management.
5
- """
6
-
7
- import asyncio
8
- from typing import Dict, Callable, Optional, List
9
- from unrealon_rpc.bridge import WebSocketBridge
10
- from unrealon_rpc.rpc import RPCServer
11
- from unrealon_rpc.pubsub import PubSubSubscriber
12
- from unrealon_rpc.logging import get_logger
13
-
14
- from ..models import (
15
- ParserInfo, ParserCommand, ParserSession, ParserEvent, ParserSystemStats
16
- )
17
-
18
- logger = get_logger(__name__)
19
-
20
-
21
- class ParserBridgeServerBase:
22
- """
23
- Base parser bridge server with core functionality.
24
-
25
- Manages server state and provides foundation for specialized handlers.
26
- """
27
-
28
- def __init__(self, redis_url: str = "redis://localhost:6379/0", rpc_channel: str = "parser_rpc", pubsub_prefix: str = "parser", **kwargs):
29
- """
30
- Initialize parser bridge server.
31
-
32
- Args:
33
- redis_url: Redis connection URL
34
- rpc_channel: RPC channel name
35
- pubsub_prefix: PubSub channel prefix
36
- **kwargs: Additional arguments for WebSocketBridge
37
- """
38
- self.redis_url = redis_url
39
- self.rpc_channel = rpc_channel
40
- self.pubsub_prefix = pubsub_prefix
41
-
42
- # Initialize bridge components
43
- self.bridge = WebSocketBridge(
44
- redis_url=redis_url,
45
- rpc_channel=rpc_channel,
46
- pubsub_prefix=pubsub_prefix,
47
- **kwargs
48
- )
49
-
50
- # Initialize RPC and PubSub
51
- self.parser_rpc = RPCServer(channel=rpc_channel, redis_url=redis_url)
52
- self.parser_pubsub = PubSubSubscriber(channel_prefix=pubsub_prefix, redis_url=redis_url)
53
-
54
- # Server state
55
- self.parsers: Dict[str, ParserInfo] = {}
56
- self.sessions: Dict[str, ParserSession] = {}
57
- self.commands: Dict[str, ParserCommand] = {}
58
- self.proxies: Dict[str, any] = {} # Will be typed properly in proxy handler
59
-
60
- # Mapping between parser_id and client_id for WebSocket forwarding
61
- self.parser_to_client: Dict[str, str] = {}
62
-
63
- # Custom command handlers
64
- self.command_handlers: Dict[str, Callable] = {}
65
-
66
- # Background tasks
67
- self._tasks: List[asyncio.Task] = []
68
- self._running = False
69
-
70
- def get_client_by_parser_id(self, parser_id: str):
71
- """Get WebSocket client by parser_id."""
72
- client_id = self.parser_to_client.get(parser_id)
73
- if client_id and client_id in self.bridge.connections:
74
- return self.bridge.connections[client_id]
75
- return None
76
-
77
- async def start(self) -> None:
78
- """Start the parser bridge server."""
79
- if self._running:
80
- return
81
-
82
- logger.info("Starting Parser Bridge Server...")
83
-
84
- # Start bridge components
85
- await self.bridge.start()
86
- await self.parser_rpc.start()
87
- await self.parser_pubsub.start()
88
-
89
- # Start background tasks
90
- self._tasks.append(asyncio.create_task(self._pubsub_listener()))
91
-
92
- self._running = True
93
- logger.info("Parser Bridge Server started")
94
-
95
- async def stop(self) -> None:
96
- """Stop the parser bridge server."""
97
- if not self._running:
98
- return
99
-
100
- logger.info("Stopping Parser Bridge Server...")
101
-
102
- # Cancel background tasks
103
- for task in self._tasks:
104
- task.cancel()
105
-
106
- # Wait for tasks to complete
107
- if self._tasks:
108
- await asyncio.gather(*self._tasks, return_exceptions=True)
109
- self._tasks.clear()
110
-
111
- # Stop bridge components
112
- await self.parser_pubsub.stop()
113
- await self.parser_rpc.stop()
114
- await self.bridge.stop()
115
-
116
- self._running = False
117
- logger.info("Parser Bridge Server stopped")
118
-
119
- async def _pubsub_listener(self) -> None:
120
- """Listen to parser events via PubSub."""
121
- try:
122
- # Register handler for parser events
123
- @self.parser_pubsub.subscribe("parser_events")
124
- async def event_handler(payload: dict):
125
- try:
126
- event = ParserEvent.model_validate(payload)
127
- await self._handle_parser_event(event)
128
- except Exception as e:
129
- logger.error(f"Error processing parser event: {e}")
130
-
131
- # Start the subscriber (this will run indefinitely)
132
- await self.parser_pubsub.start()
133
-
134
- except asyncio.CancelledError:
135
- logger.info("PubSub listener cancelled")
136
- except Exception as e:
137
- logger.error(f"PubSub listener error: {e}")
138
-
139
- async def _handle_parser_event(self, event: ParserEvent) -> None:
140
- """
141
- Handle parser event from PubSub.
142
-
143
- Args:
144
- event: Parser event to handle
145
- """
146
- logger.debug(f"Parser event: {event.event_type} from {event.parser_id}")
147
-
148
- def register_command_handler(self, command_type: str, handler: Callable) -> None:
149
- """
150
- Register custom command handler.
151
-
152
- Args:
153
- command_type: Type of command to handle
154
- handler: Async handler function
155
- """
156
- self.command_handlers[command_type] = handler
157
- logger.info(f"Registered command handler for: {command_type}")
158
-
159
- def get_parser_stats(self) -> ParserSystemStats:
160
- """Get parser statistics."""
161
- parser_types = {}
162
- for parser in self.parsers.values():
163
- parser_types[parser.parser_type] = parser_types.get(parser.parser_type, 0) + 1
164
-
165
- return ParserSystemStats(
166
- total_parsers=len(self.parsers),
167
- active_sessions=len([s for s in self.sessions.values() if s.status == "active"]),
168
- total_commands=len(self.commands),
169
- allocated_proxies=len(self.proxies),
170
- parser_types=parser_types
171
- )
@@ -1,23 +0,0 @@
1
- """
2
- RPC Handlers for Parser Bridge Server.
3
-
4
- Modular handlers for different types of RPC operations.
5
- """
6
-
7
- from .parser import ParserHandlers
8
- from .session import SessionHandlers
9
- from .command import CommandHandlers
10
- from .proxy import ProxyHandlers
11
- from .html_parser import HTMLParserHandlers
12
- from .logging import LoggingHandlers
13
- from .scheduler import SchedulerHandlers
14
-
15
- __all__ = [
16
- "ParserHandlers",
17
- "SessionHandlers",
18
- "CommandHandlers",
19
- "ProxyHandlers",
20
- "HTMLParserHandlers",
21
- "LoggingHandlers",
22
- "SchedulerHandlers"
23
- ]
@@ -1,110 +0,0 @@
1
- """
2
- Command-related RPC handlers.
3
- """
4
-
5
- import uuid
6
- from unrealon_rpc.logging import get_logger
7
-
8
- from ...models import (
9
- ParserCommand, CommandResult,
10
- CommandExecuteRequest, CommandExecuteResponse,
11
- CommandCreateRequest, CommandCreateResponse,
12
- CommandStatusRequest, CommandStatusResponse
13
- )
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- class CommandHandlers:
19
- """Handlers for command-related RPC operations."""
20
-
21
- async def handle_command_execute(self, parser_id: str, command_type: str, parameters: dict, timeout: int = 30) -> dict:
22
- """Handle command execution."""
23
- try:
24
- # Create request object for validation
25
- request = CommandExecuteRequest(
26
- parser_id=parser_id,
27
- command_type=command_type,
28
- parameters=parameters,
29
- timeout=timeout
30
- )
31
-
32
- command = ParserCommand(
33
- command_id=str(uuid.uuid4()),
34
- command_type=request.command_type,
35
- parser_id=request.parser_id,
36
- parameters=request.parameters,
37
- timeout=request.timeout
38
- )
39
- self.commands[command.command_id] = command
40
-
41
- # Forward command to daemon via WebSocket
42
- daemon_client = self.get_client_by_parser_id(parser_id)
43
- if daemon_client:
44
- logger.info(f"📤 Forwarding command {command.command_type} to daemon {parser_id}")
45
- # Send command via WebSocket
46
- command_message = {
47
- "message_type": "command",
48
- "command_id": command.command_id,
49
- "command_type": command.command_type,
50
- "parameters": command.parameters,
51
- "parser_id": parser_id
52
- }
53
- await daemon_client.send_message(command_message)
54
- # For now, return mock response - daemon should respond via WebSocket later
55
- result_data = {
56
- "command_type": command.command_type,
57
- "status": "forwarded_to_daemon",
58
- "parser_id": parser_id
59
- }
60
- else:
61
- logger.warning(f"⚠️ No daemon found for parser {parser_id}")
62
- # Fallback to local handlers
63
- handler = self.command_handlers.get(command.command_type)
64
- if handler:
65
- logger.info(f"🔧 Using local handler for {command.command_type}")
66
- result_data = await handler(command)
67
- else:
68
- logger.warning(f"⚠️ No handler found for {command.command_type}")
69
- result_data = {"error": f"No daemon connected for parser {parser_id}"}
70
-
71
- result = CommandResult(
72
- command_id=command.command_id,
73
- success=True,
74
- result_data=result_data,
75
- execution_time=0.5
76
- )
77
-
78
- logger.info(f"Command executed: {command.command_id} ({command.command_type})")
79
-
80
- response = CommandExecuteResponse(success=True, result=result)
81
- return response.model_dump(mode='json')
82
-
83
- except Exception as e:
84
- logger.error(f"Command execution failed: {e}")
85
- response = CommandExecuteResponse(success=False, error=str(e))
86
- return response.model_dump(mode='json')
87
-
88
- async def handle_command_create(self, request: CommandCreateRequest) -> CommandCreateResponse:
89
- """Handle command creation."""
90
- try:
91
- command = ParserCommand(
92
- command_id=str(uuid.uuid4()),
93
- command_type=request.command_type,
94
- parser_id=request.parser_id,
95
- parameters=request.parameters
96
- )
97
- self.commands[command.command_id] = command
98
-
99
- return CommandCreateResponse(success=True, command=command)
100
- except Exception as e:
101
- return CommandCreateResponse(success=False, error=str(e))
102
-
103
- async def handle_command_get_status(self, request: CommandStatusRequest) -> CommandStatusResponse:
104
- """Handle command status request."""
105
- command = self.commands.get(request.command_id)
106
-
107
- if not command:
108
- return CommandStatusResponse(success=False, error=f"Command {request.command_id} not found")
109
-
110
- return CommandStatusResponse(success=True, command=command)
@@ -1,139 +0,0 @@
1
- """
2
- HTML Parser RPC handlers.
3
-
4
- Clean implementation following CRITICAL_REQUIREMENTS.md:
5
- - No inline imports
6
- - Strict Pydantic v2 usage
7
- - Complete type annotations
8
- - No Dict[str, Any] usage
9
- """
10
-
11
- import asyncio
12
- import random
13
- import uuid
14
- from datetime import datetime
15
- from typing import Optional
16
-
17
- from unrealon_rpc.logging import get_logger
18
-
19
- from ...models import HTMLParseRPCRequest, HTMLParseRPCResponse, HTMLParseResult
20
-
21
- logger = get_logger(__name__)
22
-
23
-
24
- class HTMLParserHandlers:
25
- """Handlers for HTML parser RPC operations."""
26
-
27
- def __init__(self) -> None:
28
- """Initialize HTML parser handlers."""
29
- pass
30
-
31
- async def handle_html_parse(self, html_content: str, parser_id: str, url: Optional[str] = None, parse_type: str = "general", instructions: Optional[str] = None, timeout: int = 60, metadata: Optional[dict] = None) -> dict:
32
- """
33
- Handle HTML parsing request.
34
-
35
- Forwards HTML content to Django backend for AI/LLM processing.
36
- Django will parse HTML and return JSON + markdown instructions.
37
-
38
- Args:
39
- html_content: Raw HTML content to parse
40
- parser_id: ID of the parser making the request
41
- url: Source URL of the HTML (optional)
42
- parse_type: Type of parsing (product, listing, article, etc.)
43
- instructions: Additional parsing instructions (optional)
44
- timeout: Timeout in seconds (default 60s for LLM processing)
45
- metadata: Additional metadata (optional)
46
-
47
- Returns:
48
- HTMLParseRPCResponse as dict with success, result, request_id
49
- """
50
- try:
51
- # Create and validate request object
52
- request = HTMLParseRPCRequest(html_content=html_content, parser_id=parser_id, url=url, parse_type=parse_type, instructions=instructions, timeout=timeout, metadata=metadata or {})
53
-
54
- request_id = str(uuid.uuid4())
55
-
56
- logger.info(f"HTML parse request from parser {parser_id}: " f"{len(html_content)} chars, type: {parse_type}")
57
-
58
- # TODO: In production, make RPC call to Django backend
59
- # For now, simulate the response
60
- result = await self._simulate_html_parsing(request)
61
-
62
- response = HTMLParseRPCResponse(success=True, result=result, request_id=request_id, message="HTML parsed successfully" if result.success else "HTML parsing failed")
63
-
64
- return response.model_dump(mode="json")
65
-
66
- except Exception as e:
67
- logger.error(f"HTML parsing failed for parser {parser_id}: {e}")
68
-
69
- response = HTMLParseRPCResponse(success=False, error=str(e), message="HTML parsing request failed")
70
-
71
- return response.model_dump(mode="json")
72
-
73
- async def _simulate_html_parsing(self, request: HTMLParseRPCRequest) -> HTMLParseResult:
74
- """
75
- Simulate HTML parsing for demo purposes.
76
-
77
- In production, this would make an RPC call to Django backend which would:
78
- 1. Receive the HTML content
79
- 2. Use LLM (GPT-4, Claude, etc.) to parse the HTML
80
- 3. Return structured JSON data + markdown instructions
81
-
82
- Args:
83
- request: Validated HTML parse request
84
-
85
- Returns:
86
- HTMLParseResult with success/failure and data/markdown
87
- """
88
- # Simulate brief processing delay
89
- await asyncio.sleep(0.1)
90
-
91
- # Simulate success/failure (85% success rate)
92
- success_rate = 0.85
93
- is_successful = random.random() < success_rate
94
-
95
- if is_successful:
96
- return self._create_success_result(request)
97
- else:
98
- return HTMLParseResult(success=False, error_message="Failed to extract structured data from HTML")
99
-
100
- def _create_success_result(self, request: HTMLParseRPCRequest) -> HTMLParseResult:
101
- """Create successful parsing result with sample data."""
102
- # Sample parsed data
103
- parsed_data = {
104
- "title": "Sample Product Title",
105
- "price": "29,900,000",
106
- "description": "Sample product description extracted from HTML",
107
- "specifications": {"year": "2020", "mileage": "45,000 km", "fuel": "Gasoline"},
108
- "images": ["https://example.com/image1.jpg", "https://example.com/image2.jpg"],
109
- }
110
- # Make parsed_data with pydantic model
111
-
112
- # Generate markdown instructions
113
- markdown_instructions = f"""# HTML Parsing Results
114
-
115
- ## Extracted Data
116
- Successfully parsed {request.parse_type} content from the provided HTML.
117
-
118
- ### Key Findings:
119
- - **Title**: {parsed_data.get('title', 'N/A')}
120
- - **Price**: {parsed_data.get('price', 'N/A')}
121
- - **Content Size**: {len(request.html_content)} characters
122
-
123
- ### Parsing Notes:
124
- - Applied {request.parse_type} parsing rules
125
- - Processed HTML structure successfully
126
- - Extracted all required fields
127
-
128
- ### Recommendations:
129
- - Data quality appears good
130
- - Consider validating price format
131
- - Check for additional product images
132
-
133
- ### Next Steps:
134
- 1. Validate extracted data against business rules
135
- 2. Store in appropriate database tables
136
- 3. Process for further analysis
137
- """
138
-
139
- return HTMLParseResult(success=True, parsed_data=parsed_data, markdown=markdown_instructions)
@@ -1,95 +0,0 @@
1
- """
2
- Parser logging RPC handlers.
3
-
4
- Handles parser log entries sent from parsers to Django.
5
- """
6
-
7
- from typing import Optional
8
- from unrealon_rpc.logging import get_logger
9
-
10
- from ...models import ParserLogEntry, ParserLogRequest, ParserLogResponse
11
-
12
- logger = get_logger(__name__)
13
-
14
-
15
- class LoggingHandlers:
16
- """Handlers for parser logging RPC operations."""
17
-
18
- def __init__(self) -> None:
19
- """Initialize logging handlers."""
20
- pass
21
-
22
- async def handle_parser_log(
23
- self,
24
- parser_id: str,
25
- level: str,
26
- message: str,
27
- session_id: Optional[str] = None,
28
- command_id: Optional[str] = None,
29
- url: Optional[str] = None,
30
- operation: Optional[str] = None,
31
- data: Optional[dict] = None,
32
- error_details: Optional[str] = None
33
- ) -> dict:
34
- """
35
- Handle parser log entry.
36
-
37
- Receives log from parser and forwards to Django for storage/processing.
38
-
39
- Args:
40
- parser_id: ID of the parser sending the log
41
- level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
42
- message: Log message
43
- session_id: Parser session ID (optional)
44
- command_id: Command ID if related to command (optional)
45
- url: URL being processed (optional)
46
- operation: Operation being performed (optional)
47
- data: Additional log data (optional)
48
- error_details: Error details if error log (optional)
49
-
50
- Returns:
51
- ParserLogResponse as dict with success status
52
- """
53
- try:
54
- # Create and validate log entry
55
- log_entry = ParserLogEntry(
56
- parser_id=parser_id,
57
- level=level,
58
- message=message,
59
- session_id=session_id,
60
- command_id=command_id,
61
- url=url,
62
- operation=operation,
63
- data=data or {},
64
- error_details=error_details
65
- )
66
-
67
- # Log locally for debugging
68
- local_logger_method = getattr(logger, level.lower(), logger.info)
69
- local_logger_method(
70
- f"Parser {parser_id} log: {message}",
71
- component="parser_log",
72
- operation=operation
73
- )
74
-
75
- # Django will receive this RPC call via Redis and handle the log
76
-
77
- response = ParserLogResponse(
78
- success=True,
79
- message="Log entry received and forwarded to Django"
80
- )
81
-
82
- return response.model_dump(mode='json')
83
-
84
- except Exception as e:
85
- logger.error(f"Failed to handle parser log from {parser_id}: {e}")
86
-
87
- response = ParserLogResponse(
88
- success=False,
89
- error=str(e),
90
- message="Failed to process log entry"
91
- )
92
-
93
- return response.model_dump(mode='json')
94
-
95
-
@@ -1,95 +0,0 @@
1
- """
2
- Parser-related RPC handlers.
3
- """
4
-
5
- from unrealon_rpc.logging import get_logger
6
- from unrealon_bridge.configs import load_bridge_config
7
- from unrealon_bridge.models import ParserInfo, ParserHealth, ParserSystemStats, ParserRegisterRequest, ParserRegisterResponse, ParserStatusRequest, ParserStatusResponse, ParserListRequest, ParserListResponse, ParserHealthRequest, ParserHealthResponse
8
-
9
- logger = get_logger(__name__)
10
-
11
-
12
- class ParserHandlers:
13
- """Handlers for parser-related RPC operations."""
14
-
15
- async def handle_parser_register(self, parser_id: str, parser_type: str, version: str, capabilities: list, metadata: dict = None, api_key: str = None) -> dict:
16
- """Handle parser registration."""
17
- try:
18
- # Load bridge configuration
19
- config = load_bridge_config()
20
-
21
- # Check if API key is required
22
- if config.security.require_api_key:
23
- if not api_key:
24
- response = ParserRegisterResponse(success=False, error="API key is required")
25
- return response.model_dump(mode="json")
26
-
27
- # Validate API key
28
- if not config.is_valid_api_key(api_key):
29
- logger.warning(f"Invalid API key attempted: {api_key[:8] if api_key else 'None'}...")
30
- response = ParserRegisterResponse(success=False, error="Invalid API key")
31
- return response.model_dump(mode="json")
32
-
33
- # Create request object for validation
34
- request = ParserRegisterRequest(parser_id=parser_id, parser_type=parser_type, version=version, capabilities=capabilities, metadata=metadata)
35
-
36
- parser_info = ParserInfo(parser_id=request.parser_id, parser_type=request.parser_type, version=request.version, capabilities=request.capabilities, metadata=request.metadata or {})
37
- self.parsers[parser_info.parser_id] = parser_info
38
-
39
- # Find and map the most recent WebSocket client (daemon usually connects then registers immediately)
40
- if self.bridge.connections:
41
- # Get the most recently connected client
42
- latest_client_id = max(self.bridge.connections.keys(),
43
- key=lambda cid: self.bridge.connections[cid].client_info.connected_at)
44
- self.parser_to_client[parser_info.parser_id] = latest_client_id
45
- logger.info(f"🔗 Mapped parser {parser_info.parser_id} to client {latest_client_id}")
46
- else:
47
- logger.warning(f"⚠️ No WebSocket clients connected during parser registration")
48
-
49
- # Log successful registration
50
- api_key_display = api_key[:8] + "..." if api_key else "None"
51
- logger.info(f"Parser registered: {parser_info.parser_id} ({parser_info.parser_type}) with API key: {api_key_display}")
52
-
53
- # Log test key usage in development
54
- if config.is_development() and api_key in config.security.test_api_keys:
55
- logger.info(f"🧪 Using test API key for development: {api_key}")
56
-
57
- response = ParserRegisterResponse(success=True, parser_id=parser_info.parser_id, message="Parser registered successfully")
58
- return response.model_dump(mode="json")
59
- except Exception as e:
60
- logger.error(f"Parser registration failed: {e}")
61
- response = ParserRegisterResponse(success=False, error=str(e))
62
- return response.model_dump(mode="json")
63
-
64
- async def handle_parser_get_status(self, parser_id: str) -> dict:
65
- """Handle parser status request."""
66
- parser_info = self.parsers.get(parser_id)
67
-
68
- if not parser_info:
69
- response = ParserStatusResponse(success=False, error=f"Parser {parser_id} not found")
70
- return response.model_dump(mode="json")
71
-
72
- response = ParserStatusResponse(success=True, parser=parser_info)
73
- return response.model_dump(mode="json")
74
-
75
- async def handle_parser_list(self, parser_type: str = None) -> dict:
76
- """Handle parser list request."""
77
- parsers = list(self.parsers.values())
78
-
79
- if parser_type:
80
- parsers = [p for p in parsers if p.parser_type == parser_type]
81
-
82
- response = ParserListResponse(success=True, parsers=parsers, total=len(parsers))
83
- return response.model_dump(mode="json")
84
-
85
- async def handle_parser_get_health(self, parser_id: str) -> dict:
86
- """Handle parser health check."""
87
- if parser_id not in self.parsers:
88
- response = ParserHealthResponse(success=False, error="Parser not found")
89
- return response.model_dump(mode="json")
90
-
91
- # Health check implementation should be provided by external service
92
- health = ParserHealth(parser_id=parser_id, status="healthy", response_time=0.1, memory_usage=50.0, cpu_usage=25.0, active_connections=1, queue_size=0)
93
-
94
- response = ParserHealthResponse(success=True, health=health)
95
- return response.model_dump(mode="json")