unrealon 1.1.1__py3-none-any.whl → 1.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. unrealon/__init__.py +16 -6
  2. unrealon-1.1.4.dist-info/METADATA +658 -0
  3. unrealon-1.1.4.dist-info/RECORD +54 -0
  4. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/entry_points.txt +1 -1
  5. unrealon_browser/__init__.py +3 -6
  6. unrealon_browser/core/browser_manager.py +86 -84
  7. unrealon_browser/dto/models/config.py +2 -0
  8. unrealon_browser/managers/captcha.py +165 -185
  9. unrealon_browser/managers/cookies.py +57 -28
  10. unrealon_browser/managers/logger_bridge.py +94 -34
  11. unrealon_browser/managers/profile.py +186 -158
  12. unrealon_browser/managers/stealth.py +58 -47
  13. unrealon_driver/__init__.py +8 -21
  14. unrealon_driver/exceptions.py +5 -0
  15. unrealon_driver/html_analyzer/__init__.py +32 -0
  16. unrealon_driver/{parser/managers/html.py → html_analyzer/cleaner.py} +330 -405
  17. unrealon_driver/html_analyzer/config.py +64 -0
  18. unrealon_driver/html_analyzer/manager.py +247 -0
  19. unrealon_driver/html_analyzer/models.py +115 -0
  20. unrealon_driver/html_analyzer/websocket_analyzer.py +157 -0
  21. unrealon_driver/models/__init__.py +31 -0
  22. unrealon_driver/models/websocket.py +98 -0
  23. unrealon_driver/parser/__init__.py +4 -23
  24. unrealon_driver/parser/cli_manager.py +6 -5
  25. unrealon_driver/parser/daemon_manager.py +242 -66
  26. unrealon_driver/parser/managers/__init__.py +0 -21
  27. unrealon_driver/parser/managers/config.py +15 -3
  28. unrealon_driver/parser/parser_manager.py +225 -395
  29. unrealon_driver/smart_logging/__init__.py +24 -0
  30. unrealon_driver/smart_logging/models.py +44 -0
  31. unrealon_driver/smart_logging/smart_logger.py +406 -0
  32. unrealon_driver/smart_logging/unified_logger.py +525 -0
  33. unrealon_driver/websocket/__init__.py +31 -0
  34. unrealon_driver/websocket/client.py +249 -0
  35. unrealon_driver/websocket/config.py +188 -0
  36. unrealon_driver/websocket/manager.py +90 -0
  37. unrealon-1.1.1.dist-info/METADATA +0 -722
  38. unrealon-1.1.1.dist-info/RECORD +0 -82
  39. unrealon_bridge/__init__.py +0 -114
  40. unrealon_bridge/cli.py +0 -316
  41. unrealon_bridge/client/__init__.py +0 -93
  42. unrealon_bridge/client/base.py +0 -78
  43. unrealon_bridge/client/commands.py +0 -89
  44. unrealon_bridge/client/connection.py +0 -90
  45. unrealon_bridge/client/events.py +0 -65
  46. unrealon_bridge/client/health.py +0 -38
  47. unrealon_bridge/client/html_parser.py +0 -146
  48. unrealon_bridge/client/logging.py +0 -139
  49. unrealon_bridge/client/proxy.py +0 -70
  50. unrealon_bridge/client/scheduler.py +0 -450
  51. unrealon_bridge/client/session.py +0 -70
  52. unrealon_bridge/configs/__init__.py +0 -14
  53. unrealon_bridge/configs/bridge_config.py +0 -212
  54. unrealon_bridge/configs/bridge_config.yaml +0 -39
  55. unrealon_bridge/models/__init__.py +0 -138
  56. unrealon_bridge/models/base.py +0 -28
  57. unrealon_bridge/models/command.py +0 -41
  58. unrealon_bridge/models/events.py +0 -40
  59. unrealon_bridge/models/html_parser.py +0 -79
  60. unrealon_bridge/models/logging.py +0 -55
  61. unrealon_bridge/models/parser.py +0 -63
  62. unrealon_bridge/models/proxy.py +0 -41
  63. unrealon_bridge/models/requests.py +0 -95
  64. unrealon_bridge/models/responses.py +0 -88
  65. unrealon_bridge/models/scheduler.py +0 -592
  66. unrealon_bridge/models/session.py +0 -28
  67. unrealon_bridge/server/__init__.py +0 -91
  68. unrealon_bridge/server/base.py +0 -171
  69. unrealon_bridge/server/handlers/__init__.py +0 -23
  70. unrealon_bridge/server/handlers/command.py +0 -110
  71. unrealon_bridge/server/handlers/html_parser.py +0 -139
  72. unrealon_bridge/server/handlers/logging.py +0 -95
  73. unrealon_bridge/server/handlers/parser.py +0 -95
  74. unrealon_bridge/server/handlers/proxy.py +0 -75
  75. unrealon_bridge/server/handlers/scheduler.py +0 -545
  76. unrealon_bridge/server/handlers/session.py +0 -66
  77. unrealon_driver/browser/__init__.py +0 -8
  78. unrealon_driver/browser/config.py +0 -74
  79. unrealon_driver/browser/manager.py +0 -416
  80. unrealon_driver/parser/managers/browser.py +0 -51
  81. unrealon_driver/parser/managers/logging.py +0 -609
  82. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/WHEEL +0 -0
  83. {unrealon-1.1.1.dist-info → unrealon-1.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,592 +0,0 @@
1
- """
2
- Task Scheduler Models - UnrealOn RPC v2.0
3
-
4
- Pydantic v2 models for hybrid RPC + Redis Queue task scheduling system.
5
- Provides complete type safety for scheduled tasks, cron expressions, and task execution.
6
-
7
- COMPLIANCE: 100% Pydantic v2, no Dict[str, Any], strict typing everywhere.
8
- """
9
-
10
- import uuid
11
- from enum import Enum
12
- from typing import Optional, List, Union
13
- from datetime import datetime, timedelta
14
- from pydantic import BaseModel, Field, ConfigDict, field_validator, model_validator
15
- from typing_extensions import Annotated
16
-
17
- from .base import BaseParserModel
18
-
19
-
20
- class TaskStatus(str, Enum):
21
- """Task execution status with clear state transitions."""
22
-
23
- PENDING = "pending" # Task created, waiting for execution
24
- QUEUED = "queued" # Task added to Redis Queue
25
- RUNNING = "running" # Task currently executing
26
- COMPLETED = "completed" # Task finished successfully
27
- FAILED = "failed" # Task failed with error
28
- CANCELLED = "cancelled" # Task cancelled by user
29
- TIMEOUT = "timeout" # Task exceeded timeout
30
- RETRY = "retry" # Task failed, will retry
31
-
32
-
33
- class TaskPriority(int, Enum):
34
- """Task priority levels for queue ordering."""
35
-
36
- CRITICAL = 1 # System critical tasks
37
- HIGH = 3 # High priority tasks
38
- NORMAL = 5 # Default priority
39
- LOW = 7 # Background tasks
40
- BULK = 9 # Bulk processing tasks
41
-
42
-
43
- class ScheduleType(str, Enum):
44
- """Types of task scheduling."""
45
-
46
- IMMEDIATE = "immediate" # Execute immediately via RPC
47
- DELAYED = "delayed" # Execute after delay via Queue
48
- RECURRING = "recurring" # Execute on cron schedule
49
- CONDITIONAL = "conditional" # Execute when condition met
50
-
51
-
52
- class TaskExecutionMode(str, Enum):
53
- """Task execution mode selection."""
54
-
55
- RPC_ONLY = "rpc_only" # Force RPC execution
56
- QUEUE_ONLY = "queue_only" # Force Queue execution
57
- HYBRID_AUTO = "hybrid_auto" # Auto-select based on parser status
58
- HYBRID_PREFER_RPC = "hybrid_prefer_rpc" # Prefer RPC, fallback to Queue
59
- HYBRID_PREFER_QUEUE = "hybrid_prefer_queue" # Prefer Queue, fallback to RPC
60
-
61
-
62
- class CronExpression(BaseModel):
63
- """Cron expression with validation and parsing."""
64
-
65
- model_config = ConfigDict(
66
- validate_assignment=True,
67
- extra="forbid",
68
- str_strip_whitespace=True
69
- )
70
-
71
- expression: Annotated[str, Field(
72
- description="Cron expression (minute hour day month weekday)",
73
- examples=["0 2 * * *", "*/15 * * * *", "0 9 * * 1-5"]
74
- )]
75
-
76
- timezone: Annotated[str, Field(
77
- default="UTC",
78
- description="Timezone for cron execution",
79
- examples=["UTC", "America/New_York", "Asia/Seoul"]
80
- )]
81
-
82
- @field_validator('expression')
83
- @classmethod
84
- def validate_cron_expression(cls, v: str) -> str:
85
- """Validate cron expression format."""
86
- if not v or not v.strip():
87
- raise ValueError("Cron expression cannot be empty")
88
-
89
- parts = v.strip().split()
90
- if len(parts) != 5:
91
- raise ValueError("Cron expression must have exactly 5 parts: minute hour day month weekday")
92
-
93
- # Basic validation for each part
94
- minute, hour, day, month, weekday = parts
95
-
96
- # Validate ranges (basic check)
97
- for part, name, max_val in [
98
- (minute, "minute", 59),
99
- (hour, "hour", 23),
100
- (day, "day", 31),
101
- (month, "month", 12),
102
- (weekday, "weekday", 7)
103
- ]:
104
- if part != "*" and not any(c in part for c in ["/", "-", ","]):
105
- try:
106
- val = int(part)
107
- if val < 0 or val > max_val:
108
- raise ValueError(f"Invalid {name} value: {val}")
109
- except ValueError as e:
110
- if "invalid literal" not in str(e):
111
- raise
112
-
113
- return v.strip()
114
-
115
-
116
- class TaskParameters(BaseModel):
117
- """Strongly typed task parameters."""
118
-
119
- model_config = ConfigDict(
120
- validate_assignment=True,
121
- extra="forbid"
122
- )
123
-
124
- command_type: Annotated[str, Field(
125
- min_length=1,
126
- max_length=100,
127
- description="Type of command to execute",
128
- examples=["scrape", "parse", "daily_update", "cleanup"]
129
- )]
130
-
131
- parameters: Annotated[dict[str, str], Field(
132
- default_factory=dict,
133
- description="Command parameters (string values only for Redis compatibility)"
134
- )]
135
-
136
- timeout: Annotated[int, Field(
137
- default=300,
138
- ge=1,
139
- le=86400, # 24 hours max
140
- description="Task timeout in seconds"
141
- )]
142
-
143
- @field_validator('parameters')
144
- @classmethod
145
- def validate_parameters(cls, v: dict[str, str]) -> dict[str, str]:
146
- """Ensure all parameter values are strings."""
147
- if not isinstance(v, dict):
148
- raise ValueError("Parameters must be a dictionary")
149
-
150
- for key, value in v.items():
151
- if not isinstance(key, str):
152
- raise ValueError(f"Parameter key must be string, got {type(key)}")
153
- if not isinstance(value, str):
154
- raise ValueError(f"Parameter value must be string, got {type(value)} for key '{key}'")
155
-
156
- return v
157
-
158
-
159
- class TaskRetryConfig(BaseModel):
160
- """Task retry configuration."""
161
-
162
- model_config = ConfigDict(
163
- validate_assignment=True,
164
- extra="forbid"
165
- )
166
-
167
- max_retries: Annotated[int, Field(
168
- default=3,
169
- ge=0,
170
- le=10,
171
- description="Maximum number of retry attempts"
172
- )]
173
-
174
- retry_delay: Annotated[int, Field(
175
- default=60,
176
- ge=1,
177
- le=3600,
178
- description="Initial retry delay in seconds"
179
- )]
180
-
181
- exponential_backoff: Annotated[bool, Field(
182
- default=True,
183
- description="Use exponential backoff for retry delays"
184
- )]
185
-
186
- max_retry_delay: Annotated[int, Field(
187
- default=3600,
188
- ge=60,
189
- le=86400,
190
- description="Maximum retry delay in seconds"
191
- )]
192
-
193
-
194
- class ScheduledTask(BaseParserModel):
195
- """Complete scheduled task definition with full type safety."""
196
-
197
- # Task identification
198
- task_id: Annotated[str, Field(
199
- default_factory=lambda: str(uuid.uuid4()),
200
- description="Unique task identifier"
201
- )]
202
-
203
- task_name: Annotated[str, Field(
204
- min_length=1,
205
- max_length=200,
206
- description="Human-readable task name"
207
- )]
208
-
209
- # Parser targeting
210
- parser_type: Annotated[str, Field(
211
- min_length=1,
212
- max_length=100,
213
- description="Target parser type",
214
- examples=["encar_parser", "autotrader_parser"]
215
- )]
216
-
217
- parser_id: Annotated[Optional[str], Field(
218
- default=None,
219
- description="Specific parser ID (optional, for targeting specific instance)"
220
- )]
221
-
222
- # Task execution
223
- task_parameters: TaskParameters
224
- execution_mode: TaskExecutionMode = TaskExecutionMode.HYBRID_AUTO
225
- priority: TaskPriority = TaskPriority.NORMAL
226
-
227
- # Scheduling
228
- schedule_type: ScheduleType
229
- scheduled_at: Annotated[Optional[datetime], Field(
230
- default=None,
231
- description="When to execute (for delayed tasks)"
232
- )]
233
-
234
- cron_schedule: Annotated[Optional[CronExpression], Field(
235
- default=None,
236
- description="Cron schedule (for recurring tasks)"
237
- )]
238
-
239
- # Status and tracking
240
- status: TaskStatus = TaskStatus.PENDING
241
- created_at: Annotated[datetime, Field(
242
- default_factory=datetime.utcnow,
243
- description="Task creation timestamp"
244
- )]
245
-
246
- updated_at: Annotated[datetime, Field(
247
- default_factory=datetime.utcnow,
248
- description="Last update timestamp"
249
- )]
250
-
251
- # Execution tracking
252
- started_at: Annotated[Optional[datetime], Field(
253
- default=None,
254
- description="Task execution start time"
255
- )]
256
-
257
- completed_at: Annotated[Optional[datetime], Field(
258
- default=None,
259
- description="Task completion time"
260
- )]
261
-
262
- # Retry configuration
263
- retry_config: TaskRetryConfig = Field(default_factory=TaskRetryConfig)
264
- retry_count: Annotated[int, Field(
265
- default=0,
266
- ge=0,
267
- description="Current retry attempt count"
268
- )]
269
-
270
- # Results and errors
271
- last_error: Annotated[Optional[str], Field(
272
- default=None,
273
- description="Last error message if failed"
274
- )]
275
-
276
- execution_log: Annotated[List[str], Field(
277
- default_factory=list,
278
- description="Execution log entries"
279
- )]
280
-
281
- # Metadata
282
- tags: Annotated[List[str], Field(
283
- default_factory=list,
284
- description="Task tags for filtering and organization"
285
- )]
286
-
287
- metadata: Annotated[dict[str, str], Field(
288
- default_factory=dict,
289
- description="Additional task metadata (string values only)"
290
- )]
291
-
292
- @model_validator(mode='after')
293
- def validate_schedule_consistency(self) -> 'ScheduledTask':
294
- """Validate scheduling configuration consistency."""
295
- if self.schedule_type == ScheduleType.DELAYED:
296
- if not self.scheduled_at:
297
- raise ValueError("Delayed tasks must have scheduled_at timestamp")
298
-
299
- elif self.schedule_type == ScheduleType.RECURRING:
300
- if not self.cron_schedule:
301
- raise ValueError("Recurring tasks must have cron_schedule")
302
-
303
- elif self.schedule_type == ScheduleType.IMMEDIATE:
304
- if self.scheduled_at or self.cron_schedule:
305
- raise ValueError("Immediate tasks cannot have scheduling configuration")
306
-
307
- return self
308
-
309
- @field_validator('metadata')
310
- @classmethod
311
- def validate_metadata(cls, v: dict[str, str]) -> dict[str, str]:
312
- """Ensure metadata values are strings."""
313
- if not isinstance(v, dict):
314
- raise ValueError("Metadata must be a dictionary")
315
-
316
- for key, value in v.items():
317
- if not isinstance(key, str):
318
- raise ValueError(f"Metadata key must be string, got {type(key)}")
319
- if not isinstance(value, str):
320
- raise ValueError(f"Metadata value must be string, got {type(value)} for key '{key}'")
321
-
322
- return v
323
-
324
- def add_log_entry(self, message: str) -> None:
325
- """Add entry to execution log."""
326
- timestamp = datetime.utcnow().isoformat()
327
- log_entry = f"[{timestamp}] {message}"
328
- self.execution_log.append(log_entry)
329
- self.updated_at = datetime.utcnow()
330
-
331
- def mark_started(self) -> None:
332
- """Mark task as started."""
333
- self.status = TaskStatus.RUNNING
334
- self.started_at = datetime.utcnow()
335
- self.updated_at = datetime.utcnow()
336
- self.add_log_entry("Task execution started")
337
-
338
- def mark_completed(self, result_message: Optional[str] = None) -> None:
339
- """Mark task as completed."""
340
- self.status = TaskStatus.COMPLETED
341
- self.completed_at = datetime.utcnow()
342
- self.updated_at = datetime.utcnow()
343
-
344
- message = result_message or "Task completed successfully"
345
- self.add_log_entry(message)
346
-
347
- def mark_failed(self, error_message: str) -> None:
348
- """Mark task as failed."""
349
- self.status = TaskStatus.FAILED
350
- self.last_error = error_message
351
- self.updated_at = datetime.utcnow()
352
- self.add_log_entry(f"Task failed: {error_message}")
353
-
354
- def increment_retry(self) -> None:
355
- """Increment retry count and update status."""
356
- self.retry_count += 1
357
- self.status = TaskStatus.RETRY
358
- self.updated_at = datetime.utcnow()
359
- self.add_log_entry(f"Retry attempt {self.retry_count}/{self.retry_config.max_retries}")
360
-
361
- def can_retry(self) -> bool:
362
- """Check if task can be retried."""
363
- return (
364
- self.status in [TaskStatus.FAILED, TaskStatus.TIMEOUT] and
365
- self.retry_count < self.retry_config.max_retries
366
- )
367
-
368
- def get_next_retry_delay(self) -> int:
369
- """Calculate next retry delay in seconds."""
370
- if not self.can_retry():
371
- return 0
372
-
373
- base_delay = self.retry_config.retry_delay
374
-
375
- if self.retry_config.exponential_backoff:
376
- delay = base_delay * (2 ** self.retry_count)
377
- return min(delay, self.retry_config.max_retry_delay)
378
-
379
- return base_delay
380
-
381
-
382
- class TaskQueue(BaseModel):
383
- """Redis Queue configuration for task scheduling."""
384
-
385
- model_config = ConfigDict(
386
- validate_assignment=True,
387
- extra="forbid"
388
- )
389
-
390
- queue_name: Annotated[str, Field(
391
- min_length=1,
392
- max_length=100,
393
- description="Redis queue name"
394
- )]
395
-
396
- redis_url: Annotated[str, Field(
397
- description="Redis connection URL",
398
- examples=["redis://localhost:6379/1"]
399
- )]
400
-
401
- max_workers: Annotated[int, Field(
402
- default=4,
403
- ge=1,
404
- le=20,
405
- description="Maximum concurrent workers"
406
- )]
407
-
408
- worker_timeout: Annotated[int, Field(
409
- default=3600,
410
- ge=60,
411
- le=86400,
412
- description="Worker timeout in seconds"
413
- )]
414
-
415
- visibility_timeout: Annotated[int, Field(
416
- default=300,
417
- ge=30,
418
- le=3600,
419
- description="Task visibility timeout in seconds"
420
- )]
421
-
422
- dead_letter_queue: Annotated[Optional[str], Field(
423
- default=None,
424
- description="Dead letter queue name for failed tasks"
425
- )]
426
-
427
-
428
- class TaskExecutionResult(BaseParserModel):
429
- """Result of task execution with complete type safety."""
430
-
431
- task_id: Annotated[str, Field(description="Task identifier")]
432
-
433
- success: Annotated[bool, Field(description="Execution success status")]
434
-
435
- execution_time: Annotated[float, Field(
436
- ge=0.0,
437
- description="Execution time in seconds"
438
- )]
439
-
440
- result_data: Annotated[dict[str, str], Field(
441
- default_factory=dict,
442
- description="Task result data (string values only)"
443
- )]
444
-
445
- error_message: Annotated[Optional[str], Field(
446
- default=None,
447
- description="Error message if failed"
448
- )]
449
-
450
- retry_count: Annotated[int, Field(
451
- default=0,
452
- ge=0,
453
- description="Number of retries performed"
454
- )]
455
-
456
- executed_at: Annotated[datetime, Field(
457
- default_factory=datetime.utcnow,
458
- description="Execution timestamp"
459
- )]
460
-
461
- executed_by: Annotated[Optional[str], Field(
462
- default=None,
463
- description="Parser ID that executed the task"
464
- )]
465
-
466
- @field_validator('result_data')
467
- @classmethod
468
- def validate_result_data(cls, v: dict[str, str]) -> dict[str, str]:
469
- """Ensure result data values are strings."""
470
- if not isinstance(v, dict):
471
- raise ValueError("Result data must be a dictionary")
472
-
473
- for key, value in v.items():
474
- if not isinstance(key, str):
475
- raise ValueError(f"Result key must be string, got {type(key)}")
476
- if not isinstance(value, str):
477
- raise ValueError(f"Result value must be string, got {type(value)} for key '{key}'")
478
-
479
- return v
480
-
481
-
482
- class ParserStatus(BaseModel):
483
- """Parser online/offline status tracking."""
484
-
485
- model_config = ConfigDict(
486
- validate_assignment=True,
487
- extra="forbid"
488
- )
489
-
490
- parser_id: Annotated[str, Field(description="Parser identifier")]
491
-
492
- parser_type: Annotated[str, Field(description="Parser type")]
493
-
494
- status: Annotated[str, Field(
495
- description="Parser status",
496
- pattern="^(online|offline|connecting|disconnecting)$"
497
- )]
498
-
499
- last_seen: Annotated[datetime, Field(
500
- default_factory=datetime.utcnow,
501
- description="Last heartbeat timestamp"
502
- )]
503
-
504
- capabilities: Annotated[List[str], Field(
505
- default_factory=list,
506
- description="Parser capabilities"
507
- )]
508
-
509
- current_tasks: Annotated[int, Field(
510
- default=0,
511
- ge=0,
512
- description="Number of currently executing tasks"
513
- )]
514
-
515
- max_concurrent_tasks: Annotated[int, Field(
516
- default=5,
517
- ge=1,
518
- le=50,
519
- description="Maximum concurrent tasks this parser can handle"
520
- )]
521
-
522
- def is_online(self, timeout_seconds: int = 300) -> bool:
523
- """Check if parser is considered online."""
524
- if self.status != "online":
525
- return False
526
-
527
- time_since_last_seen = datetime.utcnow() - self.last_seen
528
- return time_since_last_seen.total_seconds() <= timeout_seconds
529
-
530
- def can_accept_task(self) -> bool:
531
- """Check if parser can accept new tasks."""
532
- return (
533
- self.is_online() and
534
- self.current_tasks < self.max_concurrent_tasks
535
- )
536
-
537
-
538
- # Custom exceptions for scheduler
539
- class SchedulerError(Exception):
540
- """Base scheduler error."""
541
-
542
- def __init__(self, message: str, task_id: Optional[str] = None, details: Optional[dict[str, str]] = None):
543
- self.message = message
544
- self.task_id = task_id
545
- self.details = details or {}
546
- super().__init__(message)
547
-
548
-
549
- class TaskValidationError(SchedulerError):
550
- """Task validation errors."""
551
- pass
552
-
553
-
554
- class TaskExecutionError(SchedulerError):
555
- """Task execution errors."""
556
- pass
557
-
558
-
559
- class ParserUnavailableError(SchedulerError):
560
- """Parser not available for task execution."""
561
- pass
562
-
563
-
564
- class QueueError(SchedulerError):
565
- """Redis Queue operation errors."""
566
- pass
567
-
568
-
569
- # Export all models and exceptions
570
- __all__ = [
571
- # Enums
572
- "TaskStatus",
573
- "TaskPriority",
574
- "ScheduleType",
575
- "TaskExecutionMode",
576
-
577
- # Models
578
- "CronExpression",
579
- "TaskParameters",
580
- "TaskRetryConfig",
581
- "ScheduledTask",
582
- "TaskQueue",
583
- "TaskExecutionResult",
584
- "ParserStatus",
585
-
586
- # Exceptions
587
- "SchedulerError",
588
- "TaskValidationError",
589
- "TaskExecutionError",
590
- "ParserUnavailableError",
591
- "QueueError"
592
- ]
@@ -1,28 +0,0 @@
1
- """
2
- Session-related models.
3
-
4
- Contains models for parser session management and tracking.
5
- """
6
-
7
- from typing import Optional, Dict, Literal
8
- from datetime import datetime
9
- from pydantic import Field
10
- from typing_extensions import Annotated
11
-
12
- from .base import BaseParserModel
13
-
14
-
15
- class ParserSession(BaseParserModel):
16
- """Parser session information and tracking."""
17
-
18
- session_id: Annotated[str, Field(min_length=1, description="Unique session identifier")]
19
- parser_id: Annotated[str, Field(min_length=1, description="Parser ID")]
20
- session_type: Annotated[str, Field(min_length=1, description="Type of session (scraping, parsing, etc.)")]
21
- status: Literal["active", "paused", "completed", "failed", "cancelled"] = "active"
22
- metadata: Dict[str, str] = Field(default_factory=dict, description="Session metadata")
23
- started_at: datetime = Field(default_factory=datetime.now)
24
- ended_at: Optional[datetime] = None
25
- duration_seconds: Optional[Annotated[int, Field(ge=0)]] = None
26
- commands_executed: Annotated[int, Field(ge=0)] = 0
27
- data_processed: Annotated[int, Field(ge=0)] = 0
28
- errors_count: Annotated[int, Field(ge=0)] = 0
@@ -1,91 +0,0 @@
1
- """
2
- Parser Bridge Server - Modular implementation with composition.
3
-
4
- Clean architecture with separated handlers and no inheritance hell.
5
- """
6
-
7
- from typing import Callable
8
- from unrealon_rpc.logging import get_logger
9
-
10
- from .base import ParserBridgeServerBase
11
- from .handlers import ParserHandlers, SessionHandlers, CommandHandlers, ProxyHandlers, HTMLParserHandlers, LoggingHandlers, SchedulerHandlers
12
-
13
- from ..models import ParserSystemStats
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- class ParserBridgeServer(
19
- ParserBridgeServerBase,
20
- ParserHandlers,
21
- SessionHandlers,
22
- CommandHandlers,
23
- ProxyHandlers,
24
- HTMLParserHandlers,
25
- LoggingHandlers,
26
- SchedulerHandlers
27
- ):
28
- """
29
- Complete Parser Bridge Server with all functionality.
30
-
31
- Combines base server with all handlers to provide full server capabilities:
32
- - Parser registration and management
33
- - Session lifecycle management
34
- - Command execution and tracking
35
- - Proxy allocation and management
36
- - HTML parsing via AI/LLM integration
37
- - Parser logging to Django backend
38
- """
39
-
40
- def __init__(self, redis_url: str = "redis://localhost:6379/0", rpc_channel: str = "parser_rpc", pubsub_prefix: str = "parser", **kwargs):
41
- """
42
- Initialize complete parser bridge server.
43
-
44
- Args:
45
- redis_url: Redis connection URL
46
- rpc_channel: RPC channel name
47
- pubsub_prefix: PubSub channel prefix
48
- **kwargs: Additional arguments for WebSocketBridge
49
- """
50
- super().__init__(redis_url, rpc_channel, pubsub_prefix, **kwargs)
51
- self._register_rpc_methods()
52
-
53
- def _register_rpc_methods(self) -> None:
54
- """Register all RPC methods with their handlers."""
55
- # Parser management
56
- self.parser_rpc.register_method("parser.register", self.handle_parser_register)
57
- self.parser_rpc.register_method("parser.get_status", self.handle_parser_get_status)
58
- self.parser_rpc.register_method("parser.list", self.handle_parser_list)
59
- self.parser_rpc.register_method("parser.get_health", self.handle_parser_get_health)
60
-
61
- # Session management
62
- self.parser_rpc.register_method("parser.start_session", self.handle_session_start)
63
- self.parser_rpc.register_method("parser.end_session", self.handle_session_end)
64
-
65
- # Command management
66
- self.parser_rpc.register_method("parser.execute_command", self.handle_command_execute)
67
- self.parser_rpc.register_method("command.create", self.handle_command_create)
68
- self.parser_rpc.register_method("command.get_status", self.handle_command_get_status)
69
-
70
- # Proxy management
71
- self.parser_rpc.register_method("proxy.allocate", self.handle_proxy_allocate)
72
- self.parser_rpc.register_method("proxy.release", self.handle_proxy_release)
73
- self.parser_rpc.register_method("proxy.check", self.handle_proxy_check)
74
-
75
- # HTML Parser management
76
- self.parser_rpc.register_method("html_parser.parse", self.handle_html_parse)
77
-
78
- # Parser Logging
79
- self.parser_rpc.register_method("parser.log", self.handle_parser_log)
80
-
81
- # Scheduler management
82
- self.parser_rpc.register_method("scheduler.create_task", self.handle_scheduler_create_task)
83
- self.parser_rpc.register_method("scheduler.list_tasks", self.handle_scheduler_list_tasks)
84
- self.parser_rpc.register_method("scheduler.get_task", self.handle_scheduler_get_task)
85
- self.parser_rpc.register_method("scheduler.cancel_task", self.handle_scheduler_cancel_task)
86
- self.parser_rpc.register_method("scheduler.update_parser_status", self.handle_scheduler_update_parser_status)
87
- self.parser_rpc.register_method("scheduler.get_parser_status", self.handle_scheduler_get_parser_status)
88
- self.parser_rpc.register_method("scheduler.get_stats", self.handle_scheduler_get_stats)
89
-
90
-
91
- __all__ = ["ParserBridgeServer"]