unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.1.dist-info/METADATA +722 -0
- unrealon-1.1.1.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
- unrealon-1.1.1.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -0,0 +1,592 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Task Scheduler Models - UnrealOn RPC v2.0
|
|
3
|
+
|
|
4
|
+
Pydantic v2 models for hybrid RPC + Redis Queue task scheduling system.
|
|
5
|
+
Provides complete type safety for scheduled tasks, cron expressions, and task execution.
|
|
6
|
+
|
|
7
|
+
COMPLIANCE: 100% Pydantic v2, no Dict[str, Any], strict typing everywhere.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import uuid
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Optional, List, Union
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
from pydantic import BaseModel, Field, ConfigDict, field_validator, model_validator
|
|
15
|
+
from typing_extensions import Annotated
|
|
16
|
+
|
|
17
|
+
from .base import BaseParserModel
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TaskStatus(str, Enum):
|
|
21
|
+
"""Task execution status with clear state transitions."""
|
|
22
|
+
|
|
23
|
+
PENDING = "pending" # Task created, waiting for execution
|
|
24
|
+
QUEUED = "queued" # Task added to Redis Queue
|
|
25
|
+
RUNNING = "running" # Task currently executing
|
|
26
|
+
COMPLETED = "completed" # Task finished successfully
|
|
27
|
+
FAILED = "failed" # Task failed with error
|
|
28
|
+
CANCELLED = "cancelled" # Task cancelled by user
|
|
29
|
+
TIMEOUT = "timeout" # Task exceeded timeout
|
|
30
|
+
RETRY = "retry" # Task failed, will retry
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class TaskPriority(int, Enum):
|
|
34
|
+
"""Task priority levels for queue ordering."""
|
|
35
|
+
|
|
36
|
+
CRITICAL = 1 # System critical tasks
|
|
37
|
+
HIGH = 3 # High priority tasks
|
|
38
|
+
NORMAL = 5 # Default priority
|
|
39
|
+
LOW = 7 # Background tasks
|
|
40
|
+
BULK = 9 # Bulk processing tasks
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ScheduleType(str, Enum):
|
|
44
|
+
"""Types of task scheduling."""
|
|
45
|
+
|
|
46
|
+
IMMEDIATE = "immediate" # Execute immediately via RPC
|
|
47
|
+
DELAYED = "delayed" # Execute after delay via Queue
|
|
48
|
+
RECURRING = "recurring" # Execute on cron schedule
|
|
49
|
+
CONDITIONAL = "conditional" # Execute when condition met
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class TaskExecutionMode(str, Enum):
|
|
53
|
+
"""Task execution mode selection."""
|
|
54
|
+
|
|
55
|
+
RPC_ONLY = "rpc_only" # Force RPC execution
|
|
56
|
+
QUEUE_ONLY = "queue_only" # Force Queue execution
|
|
57
|
+
HYBRID_AUTO = "hybrid_auto" # Auto-select based on parser status
|
|
58
|
+
HYBRID_PREFER_RPC = "hybrid_prefer_rpc" # Prefer RPC, fallback to Queue
|
|
59
|
+
HYBRID_PREFER_QUEUE = "hybrid_prefer_queue" # Prefer Queue, fallback to RPC
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class CronExpression(BaseModel):
|
|
63
|
+
"""Cron expression with validation and parsing."""
|
|
64
|
+
|
|
65
|
+
model_config = ConfigDict(
|
|
66
|
+
validate_assignment=True,
|
|
67
|
+
extra="forbid",
|
|
68
|
+
str_strip_whitespace=True
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
expression: Annotated[str, Field(
|
|
72
|
+
description="Cron expression (minute hour day month weekday)",
|
|
73
|
+
examples=["0 2 * * *", "*/15 * * * *", "0 9 * * 1-5"]
|
|
74
|
+
)]
|
|
75
|
+
|
|
76
|
+
timezone: Annotated[str, Field(
|
|
77
|
+
default="UTC",
|
|
78
|
+
description="Timezone for cron execution",
|
|
79
|
+
examples=["UTC", "America/New_York", "Asia/Seoul"]
|
|
80
|
+
)]
|
|
81
|
+
|
|
82
|
+
@field_validator('expression')
|
|
83
|
+
@classmethod
|
|
84
|
+
def validate_cron_expression(cls, v: str) -> str:
|
|
85
|
+
"""Validate cron expression format."""
|
|
86
|
+
if not v or not v.strip():
|
|
87
|
+
raise ValueError("Cron expression cannot be empty")
|
|
88
|
+
|
|
89
|
+
parts = v.strip().split()
|
|
90
|
+
if len(parts) != 5:
|
|
91
|
+
raise ValueError("Cron expression must have exactly 5 parts: minute hour day month weekday")
|
|
92
|
+
|
|
93
|
+
# Basic validation for each part
|
|
94
|
+
minute, hour, day, month, weekday = parts
|
|
95
|
+
|
|
96
|
+
# Validate ranges (basic check)
|
|
97
|
+
for part, name, max_val in [
|
|
98
|
+
(minute, "minute", 59),
|
|
99
|
+
(hour, "hour", 23),
|
|
100
|
+
(day, "day", 31),
|
|
101
|
+
(month, "month", 12),
|
|
102
|
+
(weekday, "weekday", 7)
|
|
103
|
+
]:
|
|
104
|
+
if part != "*" and not any(c in part for c in ["/", "-", ","]):
|
|
105
|
+
try:
|
|
106
|
+
val = int(part)
|
|
107
|
+
if val < 0 or val > max_val:
|
|
108
|
+
raise ValueError(f"Invalid {name} value: {val}")
|
|
109
|
+
except ValueError as e:
|
|
110
|
+
if "invalid literal" not in str(e):
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
return v.strip()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class TaskParameters(BaseModel):
|
|
117
|
+
"""Strongly typed task parameters."""
|
|
118
|
+
|
|
119
|
+
model_config = ConfigDict(
|
|
120
|
+
validate_assignment=True,
|
|
121
|
+
extra="forbid"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
command_type: Annotated[str, Field(
|
|
125
|
+
min_length=1,
|
|
126
|
+
max_length=100,
|
|
127
|
+
description="Type of command to execute",
|
|
128
|
+
examples=["scrape", "parse", "daily_update", "cleanup"]
|
|
129
|
+
)]
|
|
130
|
+
|
|
131
|
+
parameters: Annotated[dict[str, str], Field(
|
|
132
|
+
default_factory=dict,
|
|
133
|
+
description="Command parameters (string values only for Redis compatibility)"
|
|
134
|
+
)]
|
|
135
|
+
|
|
136
|
+
timeout: Annotated[int, Field(
|
|
137
|
+
default=300,
|
|
138
|
+
ge=1,
|
|
139
|
+
le=86400, # 24 hours max
|
|
140
|
+
description="Task timeout in seconds"
|
|
141
|
+
)]
|
|
142
|
+
|
|
143
|
+
@field_validator('parameters')
|
|
144
|
+
@classmethod
|
|
145
|
+
def validate_parameters(cls, v: dict[str, str]) -> dict[str, str]:
|
|
146
|
+
"""Ensure all parameter values are strings."""
|
|
147
|
+
if not isinstance(v, dict):
|
|
148
|
+
raise ValueError("Parameters must be a dictionary")
|
|
149
|
+
|
|
150
|
+
for key, value in v.items():
|
|
151
|
+
if not isinstance(key, str):
|
|
152
|
+
raise ValueError(f"Parameter key must be string, got {type(key)}")
|
|
153
|
+
if not isinstance(value, str):
|
|
154
|
+
raise ValueError(f"Parameter value must be string, got {type(value)} for key '{key}'")
|
|
155
|
+
|
|
156
|
+
return v
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class TaskRetryConfig(BaseModel):
|
|
160
|
+
"""Task retry configuration."""
|
|
161
|
+
|
|
162
|
+
model_config = ConfigDict(
|
|
163
|
+
validate_assignment=True,
|
|
164
|
+
extra="forbid"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
max_retries: Annotated[int, Field(
|
|
168
|
+
default=3,
|
|
169
|
+
ge=0,
|
|
170
|
+
le=10,
|
|
171
|
+
description="Maximum number of retry attempts"
|
|
172
|
+
)]
|
|
173
|
+
|
|
174
|
+
retry_delay: Annotated[int, Field(
|
|
175
|
+
default=60,
|
|
176
|
+
ge=1,
|
|
177
|
+
le=3600,
|
|
178
|
+
description="Initial retry delay in seconds"
|
|
179
|
+
)]
|
|
180
|
+
|
|
181
|
+
exponential_backoff: Annotated[bool, Field(
|
|
182
|
+
default=True,
|
|
183
|
+
description="Use exponential backoff for retry delays"
|
|
184
|
+
)]
|
|
185
|
+
|
|
186
|
+
max_retry_delay: Annotated[int, Field(
|
|
187
|
+
default=3600,
|
|
188
|
+
ge=60,
|
|
189
|
+
le=86400,
|
|
190
|
+
description="Maximum retry delay in seconds"
|
|
191
|
+
)]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class ScheduledTask(BaseParserModel):
|
|
195
|
+
"""Complete scheduled task definition with full type safety."""
|
|
196
|
+
|
|
197
|
+
# Task identification
|
|
198
|
+
task_id: Annotated[str, Field(
|
|
199
|
+
default_factory=lambda: str(uuid.uuid4()),
|
|
200
|
+
description="Unique task identifier"
|
|
201
|
+
)]
|
|
202
|
+
|
|
203
|
+
task_name: Annotated[str, Field(
|
|
204
|
+
min_length=1,
|
|
205
|
+
max_length=200,
|
|
206
|
+
description="Human-readable task name"
|
|
207
|
+
)]
|
|
208
|
+
|
|
209
|
+
# Parser targeting
|
|
210
|
+
parser_type: Annotated[str, Field(
|
|
211
|
+
min_length=1,
|
|
212
|
+
max_length=100,
|
|
213
|
+
description="Target parser type",
|
|
214
|
+
examples=["encar_parser", "autotrader_parser"]
|
|
215
|
+
)]
|
|
216
|
+
|
|
217
|
+
parser_id: Annotated[Optional[str], Field(
|
|
218
|
+
default=None,
|
|
219
|
+
description="Specific parser ID (optional, for targeting specific instance)"
|
|
220
|
+
)]
|
|
221
|
+
|
|
222
|
+
# Task execution
|
|
223
|
+
task_parameters: TaskParameters
|
|
224
|
+
execution_mode: TaskExecutionMode = TaskExecutionMode.HYBRID_AUTO
|
|
225
|
+
priority: TaskPriority = TaskPriority.NORMAL
|
|
226
|
+
|
|
227
|
+
# Scheduling
|
|
228
|
+
schedule_type: ScheduleType
|
|
229
|
+
scheduled_at: Annotated[Optional[datetime], Field(
|
|
230
|
+
default=None,
|
|
231
|
+
description="When to execute (for delayed tasks)"
|
|
232
|
+
)]
|
|
233
|
+
|
|
234
|
+
cron_schedule: Annotated[Optional[CronExpression], Field(
|
|
235
|
+
default=None,
|
|
236
|
+
description="Cron schedule (for recurring tasks)"
|
|
237
|
+
)]
|
|
238
|
+
|
|
239
|
+
# Status and tracking
|
|
240
|
+
status: TaskStatus = TaskStatus.PENDING
|
|
241
|
+
created_at: Annotated[datetime, Field(
|
|
242
|
+
default_factory=datetime.utcnow,
|
|
243
|
+
description="Task creation timestamp"
|
|
244
|
+
)]
|
|
245
|
+
|
|
246
|
+
updated_at: Annotated[datetime, Field(
|
|
247
|
+
default_factory=datetime.utcnow,
|
|
248
|
+
description="Last update timestamp"
|
|
249
|
+
)]
|
|
250
|
+
|
|
251
|
+
# Execution tracking
|
|
252
|
+
started_at: Annotated[Optional[datetime], Field(
|
|
253
|
+
default=None,
|
|
254
|
+
description="Task execution start time"
|
|
255
|
+
)]
|
|
256
|
+
|
|
257
|
+
completed_at: Annotated[Optional[datetime], Field(
|
|
258
|
+
default=None,
|
|
259
|
+
description="Task completion time"
|
|
260
|
+
)]
|
|
261
|
+
|
|
262
|
+
# Retry configuration
|
|
263
|
+
retry_config: TaskRetryConfig = Field(default_factory=TaskRetryConfig)
|
|
264
|
+
retry_count: Annotated[int, Field(
|
|
265
|
+
default=0,
|
|
266
|
+
ge=0,
|
|
267
|
+
description="Current retry attempt count"
|
|
268
|
+
)]
|
|
269
|
+
|
|
270
|
+
# Results and errors
|
|
271
|
+
last_error: Annotated[Optional[str], Field(
|
|
272
|
+
default=None,
|
|
273
|
+
description="Last error message if failed"
|
|
274
|
+
)]
|
|
275
|
+
|
|
276
|
+
execution_log: Annotated[List[str], Field(
|
|
277
|
+
default_factory=list,
|
|
278
|
+
description="Execution log entries"
|
|
279
|
+
)]
|
|
280
|
+
|
|
281
|
+
# Metadata
|
|
282
|
+
tags: Annotated[List[str], Field(
|
|
283
|
+
default_factory=list,
|
|
284
|
+
description="Task tags for filtering and organization"
|
|
285
|
+
)]
|
|
286
|
+
|
|
287
|
+
metadata: Annotated[dict[str, str], Field(
|
|
288
|
+
default_factory=dict,
|
|
289
|
+
description="Additional task metadata (string values only)"
|
|
290
|
+
)]
|
|
291
|
+
|
|
292
|
+
@model_validator(mode='after')
|
|
293
|
+
def validate_schedule_consistency(self) -> 'ScheduledTask':
|
|
294
|
+
"""Validate scheduling configuration consistency."""
|
|
295
|
+
if self.schedule_type == ScheduleType.DELAYED:
|
|
296
|
+
if not self.scheduled_at:
|
|
297
|
+
raise ValueError("Delayed tasks must have scheduled_at timestamp")
|
|
298
|
+
|
|
299
|
+
elif self.schedule_type == ScheduleType.RECURRING:
|
|
300
|
+
if not self.cron_schedule:
|
|
301
|
+
raise ValueError("Recurring tasks must have cron_schedule")
|
|
302
|
+
|
|
303
|
+
elif self.schedule_type == ScheduleType.IMMEDIATE:
|
|
304
|
+
if self.scheduled_at or self.cron_schedule:
|
|
305
|
+
raise ValueError("Immediate tasks cannot have scheduling configuration")
|
|
306
|
+
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
@field_validator('metadata')
|
|
310
|
+
@classmethod
|
|
311
|
+
def validate_metadata(cls, v: dict[str, str]) -> dict[str, str]:
|
|
312
|
+
"""Ensure metadata values are strings."""
|
|
313
|
+
if not isinstance(v, dict):
|
|
314
|
+
raise ValueError("Metadata must be a dictionary")
|
|
315
|
+
|
|
316
|
+
for key, value in v.items():
|
|
317
|
+
if not isinstance(key, str):
|
|
318
|
+
raise ValueError(f"Metadata key must be string, got {type(key)}")
|
|
319
|
+
if not isinstance(value, str):
|
|
320
|
+
raise ValueError(f"Metadata value must be string, got {type(value)} for key '{key}'")
|
|
321
|
+
|
|
322
|
+
return v
|
|
323
|
+
|
|
324
|
+
def add_log_entry(self, message: str) -> None:
|
|
325
|
+
"""Add entry to execution log."""
|
|
326
|
+
timestamp = datetime.utcnow().isoformat()
|
|
327
|
+
log_entry = f"[{timestamp}] {message}"
|
|
328
|
+
self.execution_log.append(log_entry)
|
|
329
|
+
self.updated_at = datetime.utcnow()
|
|
330
|
+
|
|
331
|
+
def mark_started(self) -> None:
|
|
332
|
+
"""Mark task as started."""
|
|
333
|
+
self.status = TaskStatus.RUNNING
|
|
334
|
+
self.started_at = datetime.utcnow()
|
|
335
|
+
self.updated_at = datetime.utcnow()
|
|
336
|
+
self.add_log_entry("Task execution started")
|
|
337
|
+
|
|
338
|
+
def mark_completed(self, result_message: Optional[str] = None) -> None:
|
|
339
|
+
"""Mark task as completed."""
|
|
340
|
+
self.status = TaskStatus.COMPLETED
|
|
341
|
+
self.completed_at = datetime.utcnow()
|
|
342
|
+
self.updated_at = datetime.utcnow()
|
|
343
|
+
|
|
344
|
+
message = result_message or "Task completed successfully"
|
|
345
|
+
self.add_log_entry(message)
|
|
346
|
+
|
|
347
|
+
def mark_failed(self, error_message: str) -> None:
|
|
348
|
+
"""Mark task as failed."""
|
|
349
|
+
self.status = TaskStatus.FAILED
|
|
350
|
+
self.last_error = error_message
|
|
351
|
+
self.updated_at = datetime.utcnow()
|
|
352
|
+
self.add_log_entry(f"Task failed: {error_message}")
|
|
353
|
+
|
|
354
|
+
def increment_retry(self) -> None:
|
|
355
|
+
"""Increment retry count and update status."""
|
|
356
|
+
self.retry_count += 1
|
|
357
|
+
self.status = TaskStatus.RETRY
|
|
358
|
+
self.updated_at = datetime.utcnow()
|
|
359
|
+
self.add_log_entry(f"Retry attempt {self.retry_count}/{self.retry_config.max_retries}")
|
|
360
|
+
|
|
361
|
+
def can_retry(self) -> bool:
|
|
362
|
+
"""Check if task can be retried."""
|
|
363
|
+
return (
|
|
364
|
+
self.status in [TaskStatus.FAILED, TaskStatus.TIMEOUT] and
|
|
365
|
+
self.retry_count < self.retry_config.max_retries
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
def get_next_retry_delay(self) -> int:
|
|
369
|
+
"""Calculate next retry delay in seconds."""
|
|
370
|
+
if not self.can_retry():
|
|
371
|
+
return 0
|
|
372
|
+
|
|
373
|
+
base_delay = self.retry_config.retry_delay
|
|
374
|
+
|
|
375
|
+
if self.retry_config.exponential_backoff:
|
|
376
|
+
delay = base_delay * (2 ** self.retry_count)
|
|
377
|
+
return min(delay, self.retry_config.max_retry_delay)
|
|
378
|
+
|
|
379
|
+
return base_delay
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class TaskQueue(BaseModel):
|
|
383
|
+
"""Redis Queue configuration for task scheduling."""
|
|
384
|
+
|
|
385
|
+
model_config = ConfigDict(
|
|
386
|
+
validate_assignment=True,
|
|
387
|
+
extra="forbid"
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
queue_name: Annotated[str, Field(
|
|
391
|
+
min_length=1,
|
|
392
|
+
max_length=100,
|
|
393
|
+
description="Redis queue name"
|
|
394
|
+
)]
|
|
395
|
+
|
|
396
|
+
redis_url: Annotated[str, Field(
|
|
397
|
+
description="Redis connection URL",
|
|
398
|
+
examples=["redis://localhost:6379/1"]
|
|
399
|
+
)]
|
|
400
|
+
|
|
401
|
+
max_workers: Annotated[int, Field(
|
|
402
|
+
default=4,
|
|
403
|
+
ge=1,
|
|
404
|
+
le=20,
|
|
405
|
+
description="Maximum concurrent workers"
|
|
406
|
+
)]
|
|
407
|
+
|
|
408
|
+
worker_timeout: Annotated[int, Field(
|
|
409
|
+
default=3600,
|
|
410
|
+
ge=60,
|
|
411
|
+
le=86400,
|
|
412
|
+
description="Worker timeout in seconds"
|
|
413
|
+
)]
|
|
414
|
+
|
|
415
|
+
visibility_timeout: Annotated[int, Field(
|
|
416
|
+
default=300,
|
|
417
|
+
ge=30,
|
|
418
|
+
le=3600,
|
|
419
|
+
description="Task visibility timeout in seconds"
|
|
420
|
+
)]
|
|
421
|
+
|
|
422
|
+
dead_letter_queue: Annotated[Optional[str], Field(
|
|
423
|
+
default=None,
|
|
424
|
+
description="Dead letter queue name for failed tasks"
|
|
425
|
+
)]
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class TaskExecutionResult(BaseParserModel):
|
|
429
|
+
"""Result of task execution with complete type safety."""
|
|
430
|
+
|
|
431
|
+
task_id: Annotated[str, Field(description="Task identifier")]
|
|
432
|
+
|
|
433
|
+
success: Annotated[bool, Field(description="Execution success status")]
|
|
434
|
+
|
|
435
|
+
execution_time: Annotated[float, Field(
|
|
436
|
+
ge=0.0,
|
|
437
|
+
description="Execution time in seconds"
|
|
438
|
+
)]
|
|
439
|
+
|
|
440
|
+
result_data: Annotated[dict[str, str], Field(
|
|
441
|
+
default_factory=dict,
|
|
442
|
+
description="Task result data (string values only)"
|
|
443
|
+
)]
|
|
444
|
+
|
|
445
|
+
error_message: Annotated[Optional[str], Field(
|
|
446
|
+
default=None,
|
|
447
|
+
description="Error message if failed"
|
|
448
|
+
)]
|
|
449
|
+
|
|
450
|
+
retry_count: Annotated[int, Field(
|
|
451
|
+
default=0,
|
|
452
|
+
ge=0,
|
|
453
|
+
description="Number of retries performed"
|
|
454
|
+
)]
|
|
455
|
+
|
|
456
|
+
executed_at: Annotated[datetime, Field(
|
|
457
|
+
default_factory=datetime.utcnow,
|
|
458
|
+
description="Execution timestamp"
|
|
459
|
+
)]
|
|
460
|
+
|
|
461
|
+
executed_by: Annotated[Optional[str], Field(
|
|
462
|
+
default=None,
|
|
463
|
+
description="Parser ID that executed the task"
|
|
464
|
+
)]
|
|
465
|
+
|
|
466
|
+
@field_validator('result_data')
|
|
467
|
+
@classmethod
|
|
468
|
+
def validate_result_data(cls, v: dict[str, str]) -> dict[str, str]:
|
|
469
|
+
"""Ensure result data values are strings."""
|
|
470
|
+
if not isinstance(v, dict):
|
|
471
|
+
raise ValueError("Result data must be a dictionary")
|
|
472
|
+
|
|
473
|
+
for key, value in v.items():
|
|
474
|
+
if not isinstance(key, str):
|
|
475
|
+
raise ValueError(f"Result key must be string, got {type(key)}")
|
|
476
|
+
if not isinstance(value, str):
|
|
477
|
+
raise ValueError(f"Result value must be string, got {type(value)} for key '{key}'")
|
|
478
|
+
|
|
479
|
+
return v
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class ParserStatus(BaseModel):
|
|
483
|
+
"""Parser online/offline status tracking."""
|
|
484
|
+
|
|
485
|
+
model_config = ConfigDict(
|
|
486
|
+
validate_assignment=True,
|
|
487
|
+
extra="forbid"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
parser_id: Annotated[str, Field(description="Parser identifier")]
|
|
491
|
+
|
|
492
|
+
parser_type: Annotated[str, Field(description="Parser type")]
|
|
493
|
+
|
|
494
|
+
status: Annotated[str, Field(
|
|
495
|
+
description="Parser status",
|
|
496
|
+
pattern="^(online|offline|connecting|disconnecting)$"
|
|
497
|
+
)]
|
|
498
|
+
|
|
499
|
+
last_seen: Annotated[datetime, Field(
|
|
500
|
+
default_factory=datetime.utcnow,
|
|
501
|
+
description="Last heartbeat timestamp"
|
|
502
|
+
)]
|
|
503
|
+
|
|
504
|
+
capabilities: Annotated[List[str], Field(
|
|
505
|
+
default_factory=list,
|
|
506
|
+
description="Parser capabilities"
|
|
507
|
+
)]
|
|
508
|
+
|
|
509
|
+
current_tasks: Annotated[int, Field(
|
|
510
|
+
default=0,
|
|
511
|
+
ge=0,
|
|
512
|
+
description="Number of currently executing tasks"
|
|
513
|
+
)]
|
|
514
|
+
|
|
515
|
+
max_concurrent_tasks: Annotated[int, Field(
|
|
516
|
+
default=5,
|
|
517
|
+
ge=1,
|
|
518
|
+
le=50,
|
|
519
|
+
description="Maximum concurrent tasks this parser can handle"
|
|
520
|
+
)]
|
|
521
|
+
|
|
522
|
+
def is_online(self, timeout_seconds: int = 300) -> bool:
|
|
523
|
+
"""Check if parser is considered online."""
|
|
524
|
+
if self.status != "online":
|
|
525
|
+
return False
|
|
526
|
+
|
|
527
|
+
time_since_last_seen = datetime.utcnow() - self.last_seen
|
|
528
|
+
return time_since_last_seen.total_seconds() <= timeout_seconds
|
|
529
|
+
|
|
530
|
+
def can_accept_task(self) -> bool:
|
|
531
|
+
"""Check if parser can accept new tasks."""
|
|
532
|
+
return (
|
|
533
|
+
self.is_online() and
|
|
534
|
+
self.current_tasks < self.max_concurrent_tasks
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
# Custom exceptions for scheduler
|
|
539
|
+
class SchedulerError(Exception):
|
|
540
|
+
"""Base scheduler error."""
|
|
541
|
+
|
|
542
|
+
def __init__(self, message: str, task_id: Optional[str] = None, details: Optional[dict[str, str]] = None):
|
|
543
|
+
self.message = message
|
|
544
|
+
self.task_id = task_id
|
|
545
|
+
self.details = details or {}
|
|
546
|
+
super().__init__(message)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
class TaskValidationError(SchedulerError):
|
|
550
|
+
"""Task validation errors."""
|
|
551
|
+
pass
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
class TaskExecutionError(SchedulerError):
|
|
555
|
+
"""Task execution errors."""
|
|
556
|
+
pass
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
class ParserUnavailableError(SchedulerError):
|
|
560
|
+
"""Parser not available for task execution."""
|
|
561
|
+
pass
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
class QueueError(SchedulerError):
|
|
565
|
+
"""Redis Queue operation errors."""
|
|
566
|
+
pass
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
# Export all models and exceptions
|
|
570
|
+
__all__ = [
|
|
571
|
+
# Enums
|
|
572
|
+
"TaskStatus",
|
|
573
|
+
"TaskPriority",
|
|
574
|
+
"ScheduleType",
|
|
575
|
+
"TaskExecutionMode",
|
|
576
|
+
|
|
577
|
+
# Models
|
|
578
|
+
"CronExpression",
|
|
579
|
+
"TaskParameters",
|
|
580
|
+
"TaskRetryConfig",
|
|
581
|
+
"ScheduledTask",
|
|
582
|
+
"TaskQueue",
|
|
583
|
+
"TaskExecutionResult",
|
|
584
|
+
"ParserStatus",
|
|
585
|
+
|
|
586
|
+
# Exceptions
|
|
587
|
+
"SchedulerError",
|
|
588
|
+
"TaskValidationError",
|
|
589
|
+
"TaskExecutionError",
|
|
590
|
+
"ParserUnavailableError",
|
|
591
|
+
"QueueError"
|
|
592
|
+
]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session-related models.
|
|
3
|
+
|
|
4
|
+
Contains models for parser session management and tracking.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Dict, Literal
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
from typing_extensions import Annotated
|
|
11
|
+
|
|
12
|
+
from .base import BaseParserModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ParserSession(BaseParserModel):
|
|
16
|
+
"""Parser session information and tracking."""
|
|
17
|
+
|
|
18
|
+
session_id: Annotated[str, Field(min_length=1, description="Unique session identifier")]
|
|
19
|
+
parser_id: Annotated[str, Field(min_length=1, description="Parser ID")]
|
|
20
|
+
session_type: Annotated[str, Field(min_length=1, description="Type of session (scraping, parsing, etc.)")]
|
|
21
|
+
status: Literal["active", "paused", "completed", "failed", "cancelled"] = "active"
|
|
22
|
+
metadata: Dict[str, str] = Field(default_factory=dict, description="Session metadata")
|
|
23
|
+
started_at: datetime = Field(default_factory=datetime.now)
|
|
24
|
+
ended_at: Optional[datetime] = None
|
|
25
|
+
duration_seconds: Optional[Annotated[int, Field(ge=0)]] = None
|
|
26
|
+
commands_executed: Annotated[int, Field(ge=0)] = 0
|
|
27
|
+
data_processed: Annotated[int, Field(ge=0)] = 0
|
|
28
|
+
errors_count: Annotated[int, Field(ge=0)] = 0
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser Bridge Server - Modular implementation with composition.
|
|
3
|
+
|
|
4
|
+
Clean architecture with separated handlers and no inheritance hell.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Callable
|
|
8
|
+
from unrealon_rpc.logging import get_logger
|
|
9
|
+
|
|
10
|
+
from .base import ParserBridgeServerBase
|
|
11
|
+
from .handlers import ParserHandlers, SessionHandlers, CommandHandlers, ProxyHandlers, HTMLParserHandlers, LoggingHandlers, SchedulerHandlers
|
|
12
|
+
|
|
13
|
+
from ..models import ParserSystemStats
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ParserBridgeServer(
|
|
19
|
+
ParserBridgeServerBase,
|
|
20
|
+
ParserHandlers,
|
|
21
|
+
SessionHandlers,
|
|
22
|
+
CommandHandlers,
|
|
23
|
+
ProxyHandlers,
|
|
24
|
+
HTMLParserHandlers,
|
|
25
|
+
LoggingHandlers,
|
|
26
|
+
SchedulerHandlers
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Complete Parser Bridge Server with all functionality.
|
|
30
|
+
|
|
31
|
+
Combines base server with all handlers to provide full server capabilities:
|
|
32
|
+
- Parser registration and management
|
|
33
|
+
- Session lifecycle management
|
|
34
|
+
- Command execution and tracking
|
|
35
|
+
- Proxy allocation and management
|
|
36
|
+
- HTML parsing via AI/LLM integration
|
|
37
|
+
- Parser logging to Django backend
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, redis_url: str = "redis://localhost:6379/0", rpc_channel: str = "parser_rpc", pubsub_prefix: str = "parser", **kwargs):
|
|
41
|
+
"""
|
|
42
|
+
Initialize complete parser bridge server.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
redis_url: Redis connection URL
|
|
46
|
+
rpc_channel: RPC channel name
|
|
47
|
+
pubsub_prefix: PubSub channel prefix
|
|
48
|
+
**kwargs: Additional arguments for WebSocketBridge
|
|
49
|
+
"""
|
|
50
|
+
super().__init__(redis_url, rpc_channel, pubsub_prefix, **kwargs)
|
|
51
|
+
self._register_rpc_methods()
|
|
52
|
+
|
|
53
|
+
def _register_rpc_methods(self) -> None:
|
|
54
|
+
"""Register all RPC methods with their handlers."""
|
|
55
|
+
# Parser management
|
|
56
|
+
self.parser_rpc.register_method("parser.register", self.handle_parser_register)
|
|
57
|
+
self.parser_rpc.register_method("parser.get_status", self.handle_parser_get_status)
|
|
58
|
+
self.parser_rpc.register_method("parser.list", self.handle_parser_list)
|
|
59
|
+
self.parser_rpc.register_method("parser.get_health", self.handle_parser_get_health)
|
|
60
|
+
|
|
61
|
+
# Session management
|
|
62
|
+
self.parser_rpc.register_method("parser.start_session", self.handle_session_start)
|
|
63
|
+
self.parser_rpc.register_method("parser.end_session", self.handle_session_end)
|
|
64
|
+
|
|
65
|
+
# Command management
|
|
66
|
+
self.parser_rpc.register_method("parser.execute_command", self.handle_command_execute)
|
|
67
|
+
self.parser_rpc.register_method("command.create", self.handle_command_create)
|
|
68
|
+
self.parser_rpc.register_method("command.get_status", self.handle_command_get_status)
|
|
69
|
+
|
|
70
|
+
# Proxy management
|
|
71
|
+
self.parser_rpc.register_method("proxy.allocate", self.handle_proxy_allocate)
|
|
72
|
+
self.parser_rpc.register_method("proxy.release", self.handle_proxy_release)
|
|
73
|
+
self.parser_rpc.register_method("proxy.check", self.handle_proxy_check)
|
|
74
|
+
|
|
75
|
+
# HTML Parser management
|
|
76
|
+
self.parser_rpc.register_method("html_parser.parse", self.handle_html_parse)
|
|
77
|
+
|
|
78
|
+
# Parser Logging
|
|
79
|
+
self.parser_rpc.register_method("parser.log", self.handle_parser_log)
|
|
80
|
+
|
|
81
|
+
# Scheduler management
|
|
82
|
+
self.parser_rpc.register_method("scheduler.create_task", self.handle_scheduler_create_task)
|
|
83
|
+
self.parser_rpc.register_method("scheduler.list_tasks", self.handle_scheduler_list_tasks)
|
|
84
|
+
self.parser_rpc.register_method("scheduler.get_task", self.handle_scheduler_get_task)
|
|
85
|
+
self.parser_rpc.register_method("scheduler.cancel_task", self.handle_scheduler_cancel_task)
|
|
86
|
+
self.parser_rpc.register_method("scheduler.update_parser_status", self.handle_scheduler_update_parser_status)
|
|
87
|
+
self.parser_rpc.register_method("scheduler.get_parser_status", self.handle_scheduler_get_parser_status)
|
|
88
|
+
self.parser_rpc.register_method("scheduler.get_stats", self.handle_scheduler_get_stats)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
__all__ = ["ParserBridgeServer"]
|