unrealon 1.1.1__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.5.dist-info/METADATA +621 -0
  3. unrealon-1.1.5.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,75 +0,0 @@
1
- """
2
- Proxy-related RPC handlers.
3
- """
4
-
5
- import uuid
6
- from unrealon_rpc.logging import get_logger
7
-
8
- from ...models import (
9
- ProxyInfo,
10
- ProxyAllocateRequest, ProxyAllocateResponse,
11
- ProxyReleaseRequest, ProxyReleaseResponse,
12
- ProxyCheckRequest, ProxyCheckResponse
13
- )
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- class ProxyHandlers:
19
- """Handlers for proxy-related RPC operations."""
20
-
21
- async def handle_proxy_allocate(self, parser_id: str, proxy_type: str, location: str = None) -> dict:
22
- """Handle proxy allocation."""
23
- try:
24
- # Create request object for validation
25
- request = ProxyAllocateRequest(
26
- parser_id=parser_id,
27
- proxy_type=proxy_type,
28
- location=location
29
- )
30
-
31
- # Proxy allocation should be implemented by external service
32
- proxy_info = ProxyInfo(
33
- proxy_id=str(uuid.uuid4()),
34
- parser_id=request.parser_id,
35
- host="127.0.0.1",
36
- port=8080,
37
- proxy_type=request.proxy_type,
38
- location=request.location
39
- )
40
-
41
- self.proxies[proxy_info.proxy_id] = proxy_info
42
-
43
- logger.info(f"Proxy allocated: {proxy_info.proxy_id} for {request.parser_id}")
44
-
45
- response = ProxyAllocateResponse(success=True, proxy=proxy_info)
46
- return response.model_dump(mode='json')
47
-
48
- except Exception as e:
49
- response = ProxyAllocateResponse(success=False, error=str(e))
50
- return response.model_dump(mode='json')
51
-
52
- async def handle_proxy_release(self, proxy_id: str) -> dict:
53
- """Handle proxy release."""
54
- try:
55
- # Create request object for validation
56
- request = ProxyReleaseRequest(proxy_id=proxy_id)
57
-
58
- if request.proxy_id in self.proxies:
59
- del self.proxies[request.proxy_id]
60
- logger.info(f"Proxy released: {request.proxy_id}")
61
-
62
- response = ProxyReleaseResponse(success=True, proxy_id=request.proxy_id, message="Proxy released")
63
- return response.model_dump(mode='json')
64
- except Exception as e:
65
- logger.error(f"Proxy release failed: {e}")
66
- response = ProxyReleaseResponse(success=False, proxy_id=proxy_id, error=str(e))
67
- return response.model_dump(mode='json')
68
-
69
- async def handle_proxy_check(self, request: ProxyCheckRequest) -> ProxyCheckResponse:
70
- """Handle proxy health check."""
71
- if request.proxy_id not in self.proxies:
72
- return ProxyCheckResponse(success=False, error="Proxy not found")
73
-
74
- # Proxy health check should be implemented by external service
75
- return ProxyCheckResponse(success=True, proxy_id=request.proxy_id, status="healthy")
@@ -1,545 +0,0 @@
1
- """
2
- Scheduler RPC handlers for Parser Bridge Server.
3
-
4
- Handles scheduler-related RPC operations including task management,
5
- cron scheduling, and parser status tracking.
6
- """
7
-
8
- from typing import Dict, Any, List, Optional
9
- from datetime import datetime, timedelta
10
-
11
- from unrealon_rpc.logging import get_logger
12
- from unrealon_bridge.models.scheduler import (
13
- ScheduledTask,
14
- TaskParameters,
15
- TaskPriority,
16
- ScheduleType,
17
- CronExpression,
18
- TaskRetryConfig,
19
- TaskExecutionResult,
20
- ParserStatus,
21
- TaskQueue,
22
- SchedulerError,
23
- TaskValidationError,
24
- TaskExecutionError,
25
- )
26
- from unrealon_bridge.models.responses import BaseRPCResponse
27
-
28
- logger = get_logger(__name__)
29
-
30
-
31
- class SchedulerTaskCreateResponse(BaseRPCResponse):
32
- """Response for task creation."""
33
- task_id: Optional[str] = None
34
- scheduled_at: Optional[datetime] = None
35
- next_run: Optional[datetime] = None
36
-
37
-
38
- class SchedulerTaskListResponse(BaseRPCResponse):
39
- """Response for task listing."""
40
- tasks: List[ScheduledTask]
41
- total_count: int
42
-
43
-
44
- class SchedulerTaskStatusResponse(BaseRPCResponse):
45
- """Response for task status."""
46
- task: ScheduledTask
47
- execution_history: List[TaskExecutionResult]
48
-
49
-
50
- class SchedulerParserStatusResponse(BaseRPCResponse):
51
- """Response for parser status."""
52
- parser_status: ParserStatus
53
- active_tasks: List[str]
54
-
55
-
56
- class SchedulerStatsResponse(BaseRPCResponse):
57
- """Response for scheduler statistics."""
58
- total_tasks: int
59
- active_tasks: int
60
- completed_tasks: int
61
- failed_tasks: int
62
- parsers_online: int
63
- parsers_offline: int
64
-
65
-
66
- # In-memory storage for demonstration (in production, use Redis/Database)
67
- _scheduled_tasks: Dict[str, ScheduledTask] = {}
68
- _parser_statuses: Dict[str, ParserStatus] = {}
69
- _task_execution_history: Dict[str, List[TaskExecutionResult]] = {}
70
-
71
-
72
- class SchedulerHandlers:
73
- """Scheduler RPC handlers."""
74
-
75
- async def handle_scheduler_create_task(self, parser_id: str, task_type: str, parameters: dict, cron_expression: str = None, scheduled_at: str = None, retry_config: dict = None, priority: int = 5, timeout_seconds: int = 300, api_key: str = None) -> Dict[str, Any]:
76
- """Create a new scheduled task."""
77
- return await handle_scheduler_create_task(parser_id, task_type, parameters, cron_expression, scheduled_at, retry_config, priority, timeout_seconds, api_key)
78
-
79
- async def handle_scheduler_list_tasks(self, parser_id: str = None, task_type: str = None, status: str = None, limit: int = 100, offset: int = 0, api_key: str = None) -> Dict[str, Any]:
80
- """List scheduled tasks."""
81
- return await handle_scheduler_list_tasks(parser_id, task_type, status, limit, offset, api_key)
82
-
83
- async def handle_scheduler_get_task(self, **params) -> Dict[str, Any]:
84
- """Get task status and execution history."""
85
- return await handle_scheduler_get_task(**params)
86
-
87
- async def handle_scheduler_cancel_task(self, **params) -> Dict[str, Any]:
88
- """Cancel a scheduled task."""
89
- return await handle_scheduler_cancel_task(**params)
90
-
91
- async def handle_scheduler_update_parser_status(self, parser_id: str, status: str, last_seen: str = None, current_task_id: str = None, capabilities: list = None, api_key: str = None) -> Dict[str, Any]:
92
- """Update parser status for scheduler."""
93
- return await handle_scheduler_update_parser_status(parser_id, status, last_seen, current_task_id, capabilities, api_key)
94
-
95
- async def handle_scheduler_get_parser_status(self, **params) -> Dict[str, Any]:
96
- """Get parser status and active tasks."""
97
- return await handle_scheduler_get_parser_status(**params)
98
-
99
- async def handle_scheduler_get_stats(self, api_key: str = None) -> Dict[str, Any]:
100
- """Get scheduler statistics."""
101
- return await handle_scheduler_get_stats(api_key)
102
-
103
-
104
- async def handle_scheduler_create_task(parser_id: str, task_type: str, parameters: dict, cron_expression: str = None, scheduled_at: str = None, retry_config: dict = None, priority: int = 5, timeout_seconds: int = 300, api_key: str = None) -> Dict[str, Any]:
105
- """
106
- Create a new scheduled task.
107
-
108
- Expected params:
109
- parser_id: str
110
- task_type: str (command, scrape, parse, etc.)
111
- parameters: TaskParameters
112
- cron_expression: Optional[str]
113
- scheduled_at: Optional[datetime]
114
- retry_config: Optional[TaskRetryConfig]
115
- priority: Optional[int]
116
- timeout_seconds: Optional[int]
117
- api_key: Optional[str]
118
- """
119
- try:
120
- # Validate required parameters
121
- if not parser_id or not task_type:
122
- return SchedulerTaskCreateResponse(
123
- success=False,
124
- error="parser_id and task_type are required"
125
- ).model_dump(mode='json')
126
-
127
- # Create TaskParameters
128
- try:
129
- task_parameters = TaskParameters.model_validate(parameters)
130
- except Exception as e:
131
- raise TaskValidationError(f"Invalid task parameters: {e}")
132
-
133
- # Handle cron expression if provided
134
- cron_expr = None
135
- if cron_expression:
136
- try:
137
- cron_expr = CronExpression(expression=cron_expression)
138
- except Exception as e:
139
- raise TaskValidationError(f"Invalid cron expression: {e}")
140
-
141
- # Handle retry config if provided
142
- retry_cfg = None
143
- if retry_config:
144
- try:
145
- retry_cfg = TaskRetryConfig.model_validate(retry_config)
146
- except Exception as e:
147
- raise TaskValidationError(f"Invalid retry config: {e}")
148
-
149
- # Handle scheduled_at if provided
150
- scheduled_datetime = None
151
- if scheduled_at:
152
- try:
153
- scheduled_datetime = datetime.fromisoformat(scheduled_at)
154
- except Exception as e:
155
- raise TaskValidationError(f"Invalid scheduled_at format: {e}")
156
-
157
- # Determine schedule type
158
- if cron_expr:
159
- schedule_type = ScheduleType.RECURRING
160
- elif scheduled_datetime:
161
- schedule_type = ScheduleType.DELAYED
162
- else:
163
- schedule_type = ScheduleType.IMMEDIATE
164
-
165
- # Create scheduled task
166
- task_data = {
167
- "task_name": f"{task_type}_{parser_id}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}",
168
- "parser_type": task_type, # Using task_type as parser_type for now
169
- "parser_id": parser_id,
170
- "task_parameters": task_parameters,
171
- "priority": TaskPriority(priority) if priority in [1, 3, 5, 7, 9] else TaskPriority.NORMAL,
172
- "schedule_type": schedule_type,
173
- }
174
-
175
- # Add optional fields based on schedule type
176
- if cron_expr:
177
- task_data["cron_schedule"] = cron_expr
178
- if scheduled_datetime:
179
- task_data["scheduled_at"] = scheduled_datetime
180
- if retry_cfg:
181
- task_data["retry_config"] = retry_cfg
182
-
183
- task = ScheduledTask(**task_data)
184
-
185
- # Store task
186
- _scheduled_tasks[task.task_id] = task
187
- _task_execution_history[task.task_id] = []
188
-
189
- logger.info(
190
- f"Created scheduled task: {task.task_id}",
191
- component="scheduler",
192
- operation="create_task",
193
- parser_id=parser_id,
194
- task_type=task_type,
195
- api_key=api_key[:8] + "..." if api_key else None
196
- )
197
-
198
- return SchedulerTaskCreateResponse(
199
- success=True,
200
- task_id=task.task_id,
201
- scheduled_at=task.scheduled_at,
202
- next_run=task.scheduled_at # For now, use scheduled_at as next_run
203
- ).model_dump(mode='json')
204
-
205
- except (TaskValidationError, SchedulerError) as e:
206
- logger.error(f"Task creation failed: {e}", component="scheduler", operation="create_task")
207
- return SchedulerTaskCreateResponse(
208
- success=False,
209
- error=str(e)
210
- ).model_dump(mode='json')
211
- except Exception as e:
212
- logger.error(f"Unexpected error creating task: {e}", component="scheduler", operation="create_task", error=e)
213
- return SchedulerTaskCreateResponse(
214
- success=False,
215
- error="Internal server error"
216
- ).model_dump(mode='json')
217
-
218
-
219
- async def handle_scheduler_list_tasks(parser_id: str = None, task_type: str = None, status: str = None, limit: int = 100, offset: int = 0, api_key: str = None) -> Dict[str, Any]:
220
- """
221
- List scheduled tasks.
222
-
223
- Expected params:
224
- parser_id: Optional[str] - filter by parser
225
- task_type: Optional[str] - filter by task type
226
- status: Optional[str] - filter by status
227
- limit: Optional[int] - limit results
228
- offset: Optional[int] - offset for pagination
229
- api_key: Optional[str]
230
- """
231
- try:
232
-
233
- # Filter tasks
234
- filtered_tasks = []
235
- for task in _scheduled_tasks.values():
236
- if parser_id and task.parser_id != parser_id:
237
- continue
238
- if task_type and task.task_type != task_type:
239
- continue
240
- if status and task.status != status:
241
- continue
242
- filtered_tasks.append(task)
243
-
244
- # Sort by created_at (newest first)
245
- filtered_tasks.sort(key=lambda t: t.created_at, reverse=True)
246
-
247
- # Apply pagination
248
- total_count = len(filtered_tasks)
249
- paginated_tasks = filtered_tasks[offset:offset + limit]
250
-
251
- logger.info(
252
- f"Listed {len(paginated_tasks)} tasks (total: {total_count})",
253
- component="scheduler",
254
- operation="list_tasks",
255
- parser_id=parser_id,
256
- task_type=task_type,
257
- status=status
258
- )
259
-
260
- return SchedulerTaskListResponse(
261
- success=True,
262
- tasks=paginated_tasks,
263
- total_count=total_count
264
- ).model_dump(mode='json')
265
-
266
- except Exception as e:
267
- logger.error(f"Error listing tasks: {e}", component="scheduler", operation="list_tasks", error=e)
268
- return SchedulerTaskListResponse(
269
- success=False,
270
- error="Internal server error",
271
- tasks=[],
272
- total_count=0
273
- ).model_dump(mode='json')
274
-
275
-
276
- async def handle_scheduler_get_task(**params) -> Dict[str, Any]:
277
- """
278
- Get task status and execution history.
279
-
280
- Expected params:
281
- task_id: str
282
- api_key: Optional[str]
283
- """
284
- try:
285
- task_id = params.get("task_id")
286
- if not task_id:
287
- return SchedulerTaskStatusResponse(
288
- success=False,
289
- error="task_id is required",
290
- task=None,
291
- execution_history=[]
292
- ).model_dump(mode='json')
293
-
294
- task = _scheduled_tasks.get(task_id)
295
- if not task:
296
- raise TaskExecutionError(f"Task not found: {task_id}")
297
-
298
- execution_history = _task_execution_history.get(task_id, [])
299
-
300
- logger.info(
301
- f"Retrieved task: {task_id}",
302
- component="scheduler",
303
- operation="get_task",
304
- task_status=task.status
305
- )
306
-
307
- return SchedulerTaskStatusResponse(
308
- success=True,
309
- task=task,
310
- execution_history=execution_history
311
- ).model_dump(mode='json')
312
-
313
- except TaskExecutionError as e:
314
- logger.warning(f"Task not found: {e}", component="scheduler", operation="get_task")
315
- return SchedulerTaskStatusResponse(
316
- success=False,
317
- error=str(e),
318
- task=None,
319
- execution_history=[]
320
- ).model_dump(mode='json')
321
- except Exception as e:
322
- logger.error(f"Error getting task: {e}", component="scheduler", operation="get_task", error=e)
323
- return SchedulerTaskStatusResponse(
324
- success=False,
325
- error="Internal server error",
326
- task=None,
327
- execution_history=[]
328
- ).model_dump(mode='json')
329
-
330
-
331
- async def handle_scheduler_cancel_task(**params) -> Dict[str, Any]:
332
- """
333
- Cancel a scheduled task.
334
-
335
- Expected params:
336
- task_id: str
337
- api_key: Optional[str]
338
- """
339
- try:
340
- task_id = params.get("task_id")
341
- if not task_id:
342
- return BaseRPCResponse(
343
- success=False,
344
- error="task_id is required"
345
- ).model_dump(mode='json')
346
-
347
- task = _scheduled_tasks.get(task_id)
348
- if not task:
349
- raise TaskExecutionError(f"Task not found: {task_id}")
350
-
351
- # Update task status
352
- task.status = "cancelled"
353
- task.updated_at = datetime.utcnow()
354
-
355
- logger.info(
356
- f"Cancelled task: {task_id}",
357
- component="scheduler",
358
- operation="cancel_task",
359
- parser_id=task.parser_id
360
- )
361
-
362
- return BaseRPCResponse(
363
- success=True,
364
- message=f"Task {task_id} cancelled successfully"
365
- ).model_dump(mode='json')
366
-
367
- except TaskExecutionError as e:
368
- logger.warning(f"Task not found: {e}", component="scheduler", operation="cancel_task")
369
- return BaseRPCResponse(
370
- success=False,
371
- error=str(e)
372
- ).model_dump(mode='json')
373
- except Exception as e:
374
- logger.error(f"Error cancelling task: {e}", component="scheduler", operation="cancel_task", error=e)
375
- return BaseRPCResponse(
376
- success=False,
377
- error="Internal server error"
378
- ).model_dump(mode='json')
379
-
380
-
381
- async def handle_scheduler_update_parser_status(parser_id: str, status: str, last_seen: str = None, current_task_id: str = None, capabilities: list = None, api_key: str = None) -> Dict[str, Any]:
382
- """
383
- Update parser status for scheduler.
384
-
385
- Expected params:
386
- parser_id: str
387
- status: str (online, offline, busy, error)
388
- last_seen: Optional[datetime]
389
- current_task_id: Optional[str]
390
- capabilities: Optional[List[str]]
391
- api_key: Optional[str]
392
- """
393
- try:
394
- if not parser_id or not status:
395
- return BaseRPCResponse(
396
- success=False,
397
- error="parser_id and status are required"
398
- ).model_dump(mode='json')
399
-
400
- # Handle last_seen
401
- last_seen_datetime = datetime.utcnow()
402
- if last_seen:
403
- try:
404
- last_seen_datetime = datetime.fromisoformat(last_seen)
405
- except Exception:
406
- pass # Use current time if parsing fails
407
-
408
- # Create or update parser status
409
- parser_status = ParserStatus(
410
- parser_id=parser_id,
411
- parser_type="generic", # Default parser type
412
- status=status,
413
- last_seen=last_seen_datetime,
414
- capabilities=capabilities or []
415
- )
416
-
417
- _parser_statuses[parser_id] = parser_status
418
-
419
- logger.info(
420
- f"Updated parser status: {parser_id} -> {status}",
421
- component="scheduler",
422
- operation="update_parser_status",
423
- parser_id=parser_id,
424
- status=status
425
- )
426
-
427
- return BaseRPCResponse(
428
- success=True,
429
- message=f"Parser {parser_id} status updated to {status}"
430
- ).model_dump(mode='json')
431
-
432
- except Exception as e:
433
- logger.error(f"Error updating parser status: {e}", component="scheduler", operation="update_parser_status", error=e)
434
- return BaseRPCResponse(
435
- success=False,
436
- error="Internal server error"
437
- ).model_dump(mode='json')
438
-
439
-
440
- async def handle_scheduler_get_parser_status(**params) -> Dict[str, Any]:
441
- """
442
- Get parser status and active tasks.
443
-
444
- Expected params:
445
- parser_id: str
446
- api_key: Optional[str]
447
- """
448
- try:
449
- parser_id = params.get("parser_id")
450
- if not parser_id:
451
- return SchedulerParserStatusResponse(
452
- success=False,
453
- error="parser_id is required",
454
- parser_status=None,
455
- active_tasks=[]
456
- ).model_dump(mode='json')
457
-
458
- parser_status = _parser_statuses.get(parser_id)
459
- if not parser_status:
460
- return SchedulerParserStatusResponse(
461
- success=False,
462
- error=f"Parser not found: {parser_id}",
463
- parser_status=None,
464
- active_tasks=[]
465
- ).model_dump(mode='json')
466
-
467
- # Find active tasks for this parser
468
- active_tasks = []
469
- for task in _scheduled_tasks.values():
470
- if task.parser_id == parser_id and task.status in ["pending", "running"]:
471
- active_tasks.append(task.task_id)
472
-
473
- logger.info(
474
- f"Retrieved parser status: {parser_id}",
475
- component="scheduler",
476
- operation="get_parser_status",
477
- parser_status=parser_status.status,
478
- active_tasks_count=len(active_tasks)
479
- )
480
-
481
- return SchedulerParserStatusResponse(
482
- success=True,
483
- parser_status=parser_status,
484
- active_tasks=active_tasks
485
- ).model_dump(mode='json')
486
-
487
- except Exception as e:
488
- logger.error(f"Error getting parser status: {e}", component="scheduler", operation="get_parser_status", error=e)
489
- return SchedulerParserStatusResponse(
490
- success=False,
491
- error="Internal server error",
492
- parser_status=None,
493
- active_tasks=[]
494
- ).model_dump(mode='json')
495
-
496
-
497
- async def handle_scheduler_get_stats(api_key: str = None) -> Dict[str, Any]:
498
- """
499
- Get scheduler statistics.
500
-
501
- Expected params:
502
- api_key: Optional[str]
503
- """
504
- try:
505
- # Count tasks by status
506
- total_tasks = len(_scheduled_tasks)
507
- active_tasks = sum(1 for task in _scheduled_tasks.values() if task.status in ["pending", "running"])
508
- completed_tasks = sum(1 for task in _scheduled_tasks.values() if task.status == "completed")
509
- failed_tasks = sum(1 for task in _scheduled_tasks.values() if task.status == "failed")
510
-
511
- # Count parsers by status
512
- parsers_online = sum(1 for parser in _parser_statuses.values() if parser.status == "online")
513
- parsers_offline = sum(1 for parser in _parser_statuses.values() if parser.status == "offline")
514
-
515
- logger.info(
516
- "Retrieved scheduler statistics",
517
- component="scheduler",
518
- operation="get_stats",
519
- total_tasks=total_tasks,
520
- active_tasks=active_tasks,
521
- parsers_online=parsers_online
522
- )
523
-
524
- return SchedulerStatsResponse(
525
- success=True,
526
- total_tasks=total_tasks,
527
- active_tasks=active_tasks,
528
- completed_tasks=completed_tasks,
529
- failed_tasks=failed_tasks,
530
- parsers_online=parsers_online,
531
- parsers_offline=parsers_offline
532
- ).model_dump(mode='json')
533
-
534
- except Exception as e:
535
- logger.error(f"Error getting scheduler stats: {e}", component="scheduler", operation="get_stats", error=e)
536
- return SchedulerStatsResponse(
537
- success=False,
538
- error="Internal server error",
539
- total_tasks=0,
540
- active_tasks=0,
541
- completed_tasks=0,
542
- failed_tasks=0,
543
- parsers_online=0,
544
- parsers_offline=0
545
- ).model_dump(mode='json')
@@ -1,66 +0,0 @@
1
- """
2
- Session-related RPC handlers.
3
- """
4
-
5
- import uuid
6
- from datetime import datetime
7
- from unrealon_rpc.logging import get_logger
8
-
9
- from ...models import (
10
- ParserSession,
11
- SessionStartRequest, SessionStartResponse,
12
- SessionEndRequest, SessionEndResponse
13
- )
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- class SessionHandlers:
19
- """Handlers for session-related RPC operations."""
20
-
21
- async def handle_session_start(self, parser_id: str, session_type: str, metadata: dict = None) -> dict:
22
- """Handle session start."""
23
- try:
24
- # Create request object for validation
25
- request = SessionStartRequest(
26
- parser_id=parser_id,
27
- session_type=session_type,
28
- metadata=metadata
29
- )
30
-
31
- session = ParserSession(
32
- session_id=str(uuid.uuid4()),
33
- parser_id=request.parser_id,
34
- session_type=request.session_type,
35
- metadata=request.metadata or {}
36
- )
37
- self.sessions[session.session_id] = session
38
-
39
- logger.info(f"Session started: {session.session_id} for parser {session.parser_id}")
40
-
41
- response = SessionStartResponse(success=True, session=session)
42
- return response.model_dump(mode='json')
43
- except Exception as e:
44
- logger.error(f"Session start failed: {e}")
45
- response = SessionStartResponse(success=False, error=str(e))
46
- return response.model_dump(mode='json')
47
-
48
- async def handle_session_end(self, session_id: str) -> dict:
49
- """Handle session end."""
50
- try:
51
- # Create request object for validation
52
- request = SessionEndRequest(session_id=session_id)
53
-
54
- if request.session_id in self.sessions:
55
- session = self.sessions[request.session_id]
56
- session.ended_at = datetime.now()
57
- session.status = "completed"
58
-
59
- logger.info(f"Session ended: {request.session_id}")
60
-
61
- response = SessionEndResponse(success=True, session_id=request.session_id, message="Session ended")
62
- return response.model_dump(mode='json')
63
- except Exception as e:
64
- logger.error(f"Session end failed: {e}")
65
- response = SessionEndResponse(success=False, session_id=session_id, error=str(e))
66
- return response.model_dump(mode='json')
@@ -1,8 +0,0 @@
1
- """
2
- Browser automation module
3
- """
4
-
5
- from .manager import BrowserManager
6
- from .config import BrowserConfig
7
-
8
- __all__ = ["BrowserManager", "BrowserConfig"]