unrealon 1.1.1__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.5.dist-info/METADATA +621 -0
  3. unrealon-1.1.5.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,450 +0,0 @@
1
- """
2
- Scheduler client mixin for Parser Bridge Client.
3
-
4
- Provides scheduler-related functionality for parsers.
5
- """
6
-
7
- from typing import Dict, Any, List, Optional
8
- from datetime import datetime
9
-
10
- from unrealon_rpc.logging import get_logger
11
- from unrealon_bridge.models.scheduler import (
12
- ScheduledTask,
13
- TaskParameters,
14
- CronExpression,
15
- TaskRetryConfig,
16
- TaskExecutionResult,
17
- ParserStatus,
18
- SchedulerError,
19
- )
20
-
21
- logger = get_logger(__name__)
22
-
23
-
24
- class SchedulerMixin:
25
- """Scheduler functionality mixin for Parser Bridge Client."""
26
-
27
- async def create_scheduled_task(
28
- self,
29
- task_type: str,
30
- parameters: TaskParameters,
31
- cron_expression: Optional[str] = None,
32
- scheduled_at: Optional[datetime] = None,
33
- retry_config: Optional[TaskRetryConfig] = None,
34
- priority: int = 5,
35
- timeout_seconds: int = 300
36
- ) -> Dict[str, Any]:
37
- """
38
- Create a new scheduled task.
39
-
40
- Args:
41
- task_type: Type of task (command, scrape, parse, etc.)
42
- parameters: Task parameters
43
- cron_expression: Optional cron expression for recurring tasks
44
- scheduled_at: Optional specific execution time
45
- retry_config: Optional retry configuration
46
- priority: Task priority (1-10, higher = more important)
47
- timeout_seconds: Task timeout in seconds
48
-
49
- Returns:
50
- Task creation response with task_id and scheduling info
51
- """
52
- try:
53
- request_data = {
54
- "parser_id": self.parser_id,
55
- "task_type": task_type,
56
- "parameters": parameters.model_dump(),
57
- "priority": priority,
58
- "timeout_seconds": timeout_seconds
59
- }
60
-
61
- if cron_expression:
62
- request_data["cron_expression"] = cron_expression
63
-
64
- if scheduled_at:
65
- request_data["scheduled_at"] = scheduled_at.isoformat()
66
-
67
- if retry_config:
68
- request_data["retry_config"] = retry_config.model_dump()
69
-
70
- response = await self.bridge_client.call_rpc(
71
- method="scheduler.create_task",
72
- params=request_data,
73
- timeout=30
74
- )
75
-
76
- logger.info(
77
- f"Created scheduled task: {task_type}",
78
- component="scheduler_client",
79
- operation="create_task",
80
- parser_id=self.parser_id,
81
- task_type=task_type,
82
- cron_expression=cron_expression
83
- )
84
-
85
- return response
86
-
87
- except Exception as e:
88
- logger.error(
89
- f"Failed to create scheduled task: {e}",
90
- component="scheduler_client",
91
- operation="create_task",
92
- error=e
93
- )
94
- raise SchedulerError(f"Failed to create scheduled task: {e}")
95
-
96
- async def list_scheduled_tasks(
97
- self,
98
- task_type: Optional[str] = None,
99
- status: Optional[str] = None,
100
- limit: int = 100,
101
- offset: int = 0
102
- ) -> Dict[str, Any]:
103
- """
104
- List scheduled tasks for this parser.
105
-
106
- Args:
107
- task_type: Optional filter by task type
108
- status: Optional filter by status
109
- limit: Maximum number of tasks to return
110
- offset: Offset for pagination
111
-
112
- Returns:
113
- List of scheduled tasks
114
- """
115
- try:
116
- request_data = {
117
- "parser_id": self.parser_id,
118
- "limit": limit,
119
- "offset": offset
120
- }
121
-
122
- if task_type:
123
- request_data["task_type"] = task_type
124
-
125
- if status:
126
- request_data["status"] = status
127
-
128
- response = await self.bridge_client.call_rpc(
129
- method="scheduler.list_tasks",
130
- params=request_data,
131
- timeout=30
132
- )
133
-
134
- logger.info(
135
- f"Listed scheduled tasks: {response.get('total_count', 0)} total",
136
- component="scheduler_client",
137
- operation="list_tasks",
138
- parser_id=self.parser_id,
139
- task_type=task_type,
140
- status=status
141
- )
142
-
143
- return response
144
-
145
- except Exception as e:
146
- logger.error(
147
- f"Failed to list scheduled tasks: {e}",
148
- component="scheduler_client",
149
- operation="list_tasks",
150
- error=e
151
- )
152
- raise SchedulerError(f"Failed to list scheduled tasks: {e}")
153
-
154
- async def get_task_status(self, task_id: str) -> Dict[str, Any]:
155
- """
156
- Get task status and execution history.
157
-
158
- Args:
159
- task_id: Task ID to query
160
-
161
- Returns:
162
- Task status and execution history
163
- """
164
- try:
165
- response = await self.bridge_client.call_rpc(
166
- method="scheduler.get_task",
167
- params={"task_id": task_id},
168
- timeout=30
169
- )
170
-
171
- logger.info(
172
- f"Retrieved task status: {task_id}",
173
- component="scheduler_client",
174
- operation="get_task",
175
- task_id=task_id
176
- )
177
-
178
- return response
179
-
180
- except Exception as e:
181
- logger.error(
182
- f"Failed to get task status: {e}",
183
- component="scheduler_client",
184
- operation="get_task",
185
- error=e
186
- )
187
- raise SchedulerError(f"Failed to get task status: {e}")
188
-
189
- async def cancel_scheduled_task(self, task_id: str) -> Dict[str, Any]:
190
- """
191
- Cancel a scheduled task.
192
-
193
- Args:
194
- task_id: Task ID to cancel
195
-
196
- Returns:
197
- Cancellation response
198
- """
199
- try:
200
- response = await self.bridge_client.call_rpc(
201
- method="scheduler.cancel_task",
202
- params={"task_id": task_id},
203
- timeout=30
204
- )
205
-
206
- logger.info(
207
- f"Cancelled scheduled task: {task_id}",
208
- component="scheduler_client",
209
- operation="cancel_task",
210
- task_id=task_id
211
- )
212
-
213
- return response
214
-
215
- except Exception as e:
216
- logger.error(
217
- f"Failed to cancel scheduled task: {e}",
218
- component="scheduler_client",
219
- operation="cancel_task",
220
- error=e
221
- )
222
- raise SchedulerError(f"Failed to cancel scheduled task: {e}")
223
-
224
- async def update_parser_status_for_scheduler(
225
- self,
226
- status: str,
227
- current_task_id: Optional[str] = None,
228
- capabilities: Optional[List[str]] = None
229
- ) -> Dict[str, Any]:
230
- """
231
- Update parser status for scheduler.
232
-
233
- Args:
234
- status: Parser status (online, offline, busy, error)
235
- current_task_id: Currently executing task ID
236
- capabilities: Parser capabilities
237
-
238
- Returns:
239
- Status update response
240
- """
241
- try:
242
- request_data = {
243
- "parser_id": self.parser_id,
244
- "status": status,
245
- "last_seen": datetime.utcnow().isoformat()
246
- }
247
-
248
- if current_task_id:
249
- request_data["current_task_id"] = current_task_id
250
-
251
- if capabilities:
252
- request_data["capabilities"] = capabilities
253
-
254
- response = await self.bridge_client.call_rpc(
255
- method="scheduler.update_parser_status",
256
- params=request_data,
257
- timeout=30
258
- )
259
-
260
- logger.info(
261
- f"Updated parser status for scheduler: {status}",
262
- component="scheduler_client",
263
- operation="update_parser_status",
264
- parser_id=self.parser_id,
265
- status=status
266
- )
267
-
268
- return response
269
-
270
- except Exception as e:
271
- logger.error(
272
- f"Failed to update parser status: {e}",
273
- component="scheduler_client",
274
- operation="update_parser_status",
275
- error=e
276
- )
277
- raise SchedulerError(f"Failed to update parser status: {e}")
278
-
279
- async def get_parser_scheduler_status(self) -> Dict[str, Any]:
280
- """
281
- Get parser status and active tasks from scheduler.
282
-
283
- Returns:
284
- Parser status and active tasks
285
- """
286
- try:
287
- response = await self.bridge_client.call_rpc(
288
- method="scheduler.get_parser_status",
289
- params={"parser_id": self.parser_id},
290
- timeout=30
291
- )
292
-
293
- logger.info(
294
- f"Retrieved parser scheduler status",
295
- component="scheduler_client",
296
- operation="get_parser_status",
297
- parser_id=self.parser_id
298
- )
299
-
300
- return response
301
-
302
- except Exception as e:
303
- logger.error(
304
- f"Failed to get parser scheduler status: {e}",
305
- component="scheduler_client",
306
- operation="get_parser_status",
307
- error=e
308
- )
309
- raise SchedulerError(f"Failed to get parser scheduler status: {e}")
310
-
311
- async def get_scheduler_stats(self) -> Dict[str, Any]:
312
- """
313
- Get scheduler statistics.
314
-
315
- Returns:
316
- Scheduler statistics
317
- """
318
- try:
319
- response = await self.bridge_client.call_rpc(
320
- method="scheduler.get_stats",
321
- params={},
322
- timeout=30
323
- )
324
-
325
- logger.info(
326
- "Retrieved scheduler statistics",
327
- component="scheduler_client",
328
- operation="get_stats"
329
- )
330
-
331
- return response
332
-
333
- except Exception as e:
334
- logger.error(
335
- f"Failed to get scheduler stats: {e}",
336
- component="scheduler_client",
337
- operation="get_stats",
338
- error=e
339
- )
340
- raise SchedulerError(f"Failed to get scheduler stats: {e}")
341
-
342
- # Convenience methods for common task types
343
-
344
- async def schedule_scraping_task(
345
- self,
346
- target_url: str,
347
- scraping_config: Dict[str, Any],
348
- cron_expression: Optional[str] = None,
349
- scheduled_at: Optional[datetime] = None,
350
- priority: int = 5
351
- ) -> Dict[str, Any]:
352
- """
353
- Schedule a scraping task.
354
-
355
- Args:
356
- target_url: URL to scrape
357
- scraping_config: Scraping configuration
358
- cron_expression: Optional cron expression for recurring scraping
359
- scheduled_at: Optional specific execution time
360
- priority: Task priority
361
-
362
- Returns:
363
- Task creation response
364
- """
365
- parameters = TaskParameters(
366
- command_type="scrape",
367
- parameters={
368
- "target_url": target_url,
369
- **{k: str(v) for k, v in scraping_config.items()}
370
- }
371
- )
372
-
373
- return await self.create_scheduled_task(
374
- task_type="scrape",
375
- parameters=parameters,
376
- cron_expression=cron_expression,
377
- scheduled_at=scheduled_at,
378
- priority=priority
379
- )
380
-
381
- async def schedule_parsing_task(
382
- self,
383
- input_data: str,
384
- parsing_config: Dict[str, Any],
385
- cron_expression: Optional[str] = None,
386
- scheduled_at: Optional[datetime] = None,
387
- priority: int = 5
388
- ) -> Dict[str, Any]:
389
- """
390
- Schedule a parsing task.
391
-
392
- Args:
393
- input_data: Data to parse
394
- parsing_config: Parsing configuration
395
- cron_expression: Optional cron expression for recurring parsing
396
- scheduled_at: Optional specific execution time
397
- priority: Task priority
398
-
399
- Returns:
400
- Task creation response
401
- """
402
- parameters = TaskParameters(
403
- command_type="parse",
404
- parameters={
405
- "input_data": input_data,
406
- **{k: str(v) for k, v in parsing_config.items()}
407
- }
408
- )
409
-
410
- return await self.create_scheduled_task(
411
- task_type="parse",
412
- parameters=parameters,
413
- cron_expression=cron_expression,
414
- scheduled_at=scheduled_at,
415
- priority=priority
416
- )
417
-
418
- async def schedule_command_task(
419
- self,
420
- command_type: str,
421
- command_params: Dict[str, Any],
422
- cron_expression: Optional[str] = None,
423
- scheduled_at: Optional[datetime] = None,
424
- priority: int = 5
425
- ) -> Dict[str, Any]:
426
- """
427
- Schedule a command execution task.
428
-
429
- Args:
430
- command_type: Type of command to execute
431
- command_params: Command parameters
432
- cron_expression: Optional cron expression for recurring commands
433
- scheduled_at: Optional specific execution time
434
- priority: Task priority
435
-
436
- Returns:
437
- Task creation response
438
- """
439
- parameters = TaskParameters(
440
- command_type=command_type,
441
- parameters={k: str(v) for k, v in command_params.items()}
442
- )
443
-
444
- return await self.create_scheduled_task(
445
- task_type="command",
446
- parameters=parameters,
447
- cron_expression=cron_expression,
448
- scheduled_at=scheduled_at,
449
- priority=priority
450
- )
@@ -1,70 +0,0 @@
1
- """
2
- Session management for Parser Bridge Client.
3
- """
4
-
5
- from typing import Optional
6
- from unrealon_rpc.logging import get_logger
7
-
8
- from ..models import (
9
- SessionStartRequest, SessionStartResponse,
10
- SessionEndRequest, SessionEndResponse
11
- )
12
-
13
- logger = get_logger(__name__)
14
-
15
-
16
- class SessionMixin:
17
- """Mixin for session management functionality."""
18
-
19
- async def start_session(self, session_type: str = "default", metadata: Optional[dict[str, str]] = None) -> str:
20
- """
21
- Start a new parser session.
22
-
23
- Args:
24
- session_type: Type of session
25
- metadata: Session metadata
26
-
27
- Returns:
28
- Session ID
29
- """
30
- self._ensure_registered()
31
-
32
- request = SessionStartRequest(
33
- parser_id=self.parser_id,
34
- session_type=session_type,
35
- metadata=metadata
36
- )
37
-
38
- result = await self.bridge_client.call_rpc(
39
- method="parser.start_session",
40
- params=request.model_dump()
41
- )
42
-
43
- response = SessionStartResponse.model_validate(result)
44
-
45
- if response.success and response.session:
46
- self.session_id = response.session.session_id
47
- logger.info(f"Session started: {self.session_id}")
48
- return self.session_id
49
- else:
50
- raise RuntimeError(f"Session start failed: {response.error}")
51
-
52
- async def end_session(self) -> None:
53
- """End current parser session."""
54
- if not self.session_id:
55
- return
56
-
57
- request = SessionEndRequest(session_id=self.session_id)
58
-
59
- result = await self.bridge_client.call_rpc(
60
- method="parser.end_session",
61
- params=request.model_dump()
62
- )
63
-
64
- response = SessionEndResponse.model_validate(result)
65
-
66
- if response.success:
67
- logger.info(f"Session ended: {self.session_id}")
68
- self.session_id = None
69
- else:
70
- logger.error(f"Session end failed: {response.error}")
@@ -1,14 +0,0 @@
1
- """
2
- Bridge configuration module.
3
-
4
- Provides Pydantic2 configuration management for the bridge server.
5
- """
6
-
7
- from .bridge_config import BridgeConfig, load_bridge_config, save_bridge_config, create_sample_config
8
-
9
- __all__ = [
10
- "BridgeConfig",
11
- "load_bridge_config",
12
- "save_bridge_config",
13
- "create_sample_config"
14
- ]