unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.1.dist-info/METADATA +722 -0
- unrealon-1.1.1.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
- unrealon-1.1.1.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Daemon Manager - Base class for parser daemons
|
|
3
|
+
|
|
4
|
+
Strict Pydantic v2 compliance and type safety
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import signal
|
|
9
|
+
import time
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Dict, Any
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
14
|
+
|
|
15
|
+
from .parser_manager import ParserManager, ParserManagerConfig
|
|
16
|
+
from .managers import ParserConfig, LoggingConfig, HTMLCleaningConfig, BrowserConfig
|
|
17
|
+
|
|
18
|
+
# RPC removed - all commands go through WebSocket bridge
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DaemonStatus(BaseModel):
|
|
22
|
+
"""Daemon status information."""
|
|
23
|
+
running: bool = Field(..., description="Whether daemon is running")
|
|
24
|
+
parser_id: str = Field(..., description="Parser identifier")
|
|
25
|
+
started_at: datetime = Field(..., description="Daemon start time")
|
|
26
|
+
uptime_seconds: float = Field(..., description="Uptime in seconds")
|
|
27
|
+
schedule_enabled: bool = Field(default=False, description="Whether scheduling is active")
|
|
28
|
+
next_run_at: Optional[datetime] = Field(default=None, description="Next scheduled run")
|
|
29
|
+
total_runs: int = Field(default=0, description="Total completed runs")
|
|
30
|
+
successful_runs: int = Field(default=0, description="Successful runs")
|
|
31
|
+
failed_runs: int = Field(default=0, description="Failed runs")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DaemonManager(ParserManager):
|
|
35
|
+
"""Base daemon manager with scheduling and status display."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, parser_name: str, parser_type: str, system_dir: str,
|
|
38
|
+
bridge_enabled: bool = False, websocket_url: str = "ws://localhost:8000/ws"):
|
|
39
|
+
# Create parser config
|
|
40
|
+
parser_config = ParserConfig(
|
|
41
|
+
parser_name=parser_name,
|
|
42
|
+
parser_type=parser_type,
|
|
43
|
+
system_dir=Path(system_dir)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Create logging config
|
|
47
|
+
logging_config = LoggingConfig(parser_name=parser_name)
|
|
48
|
+
|
|
49
|
+
# Create other configs
|
|
50
|
+
html_config = HTMLCleaningConfig()
|
|
51
|
+
browser_config = BrowserConfig()
|
|
52
|
+
|
|
53
|
+
# Create manager config
|
|
54
|
+
manager_config = ParserManagerConfig(
|
|
55
|
+
parser_config=parser_config,
|
|
56
|
+
logging_config=logging_config,
|
|
57
|
+
html_config=html_config,
|
|
58
|
+
browser_config=browser_config,
|
|
59
|
+
bridge_enabled=bridge_enabled
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
super().__init__(manager_config)
|
|
63
|
+
|
|
64
|
+
# Daemon state
|
|
65
|
+
self.running = False
|
|
66
|
+
self.started_at: Optional[datetime] = None
|
|
67
|
+
self.next_run_at: Optional[datetime] = None
|
|
68
|
+
|
|
69
|
+
# Statistics
|
|
70
|
+
self.total_runs = 0
|
|
71
|
+
self.successful_runs = 0
|
|
72
|
+
self.failed_runs = 0
|
|
73
|
+
|
|
74
|
+
# Setup signal handlers
|
|
75
|
+
signal.signal(signal.SIGINT, self._signal_handler)
|
|
76
|
+
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
77
|
+
|
|
78
|
+
# RPC removed - commands come through WebSocket bridge
|
|
79
|
+
|
|
80
|
+
def _signal_handler(self, signum: int, frame) -> None:
|
|
81
|
+
"""Handle shutdown signals."""
|
|
82
|
+
self.logger.info(f"🛑 Received signal {signum}, shutting down...")
|
|
83
|
+
self.running = False
|
|
84
|
+
|
|
85
|
+
# RPC methods removed - commands handled through WebSocket bridge
|
|
86
|
+
|
|
87
|
+
async def start_daemon(self, schedule_enabled: bool = False, interval_minutes: Optional[int] = None) -> bool:
|
|
88
|
+
"""Start the daemon."""
|
|
89
|
+
try:
|
|
90
|
+
self.logger.info("🚀 Starting daemon...")
|
|
91
|
+
self.running = True
|
|
92
|
+
self.started_at = datetime.now()
|
|
93
|
+
|
|
94
|
+
# Initialize parser
|
|
95
|
+
await self.initialize()
|
|
96
|
+
|
|
97
|
+
# RPC server removed - using WebSocket bridge
|
|
98
|
+
|
|
99
|
+
# Calculate next run if scheduling enabled
|
|
100
|
+
if schedule_enabled and interval_minutes:
|
|
101
|
+
self._calculate_next_run(interval_minutes)
|
|
102
|
+
|
|
103
|
+
# Start main loop
|
|
104
|
+
await self._daemon_loop(schedule_enabled, interval_minutes)
|
|
105
|
+
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
self.logger.error(f"❌ Daemon startup failed: {e}")
|
|
110
|
+
return False
|
|
111
|
+
finally:
|
|
112
|
+
await self.cleanup()
|
|
113
|
+
|
|
114
|
+
def _calculate_next_run(self, interval_minutes: int) -> None:
|
|
115
|
+
"""Calculate next scheduled run time."""
|
|
116
|
+
now = datetime.now()
|
|
117
|
+
self.next_run_at = now + timedelta(minutes=interval_minutes)
|
|
118
|
+
|
|
119
|
+
async def _daemon_loop(self, schedule_enabled: bool, interval_minutes: Optional[int]) -> None:
|
|
120
|
+
"""Main daemon loop."""
|
|
121
|
+
self.logger.info("🔄 Daemon loop started")
|
|
122
|
+
|
|
123
|
+
if schedule_enabled and self.next_run_at:
|
|
124
|
+
self.logger.info(f"⏰ Next run: {self.next_run_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
125
|
+
else:
|
|
126
|
+
self.logger.info("📋 Manual mode")
|
|
127
|
+
|
|
128
|
+
last_status_update = time.time()
|
|
129
|
+
|
|
130
|
+
while self.running:
|
|
131
|
+
try:
|
|
132
|
+
current_time = time.time()
|
|
133
|
+
|
|
134
|
+
# Update status every second
|
|
135
|
+
if current_time - last_status_update >= 1.0:
|
|
136
|
+
self._display_status(schedule_enabled)
|
|
137
|
+
last_status_update = current_time
|
|
138
|
+
|
|
139
|
+
# Check for scheduled run
|
|
140
|
+
if self._should_run_now():
|
|
141
|
+
await self._execute_run()
|
|
142
|
+
if interval_minutes:
|
|
143
|
+
self._calculate_next_run(interval_minutes)
|
|
144
|
+
|
|
145
|
+
await asyncio.sleep(0.1)
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
self.logger.error(f"❌ Daemon loop error: {e}")
|
|
149
|
+
await asyncio.sleep(1)
|
|
150
|
+
|
|
151
|
+
def _display_status(self, schedule_enabled: bool) -> None:
|
|
152
|
+
"""Display live status."""
|
|
153
|
+
if not self.running:
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
# Clear previous lines
|
|
157
|
+
print("\033[2K\033[1A" * 3, end="")
|
|
158
|
+
|
|
159
|
+
now = datetime.now()
|
|
160
|
+
uptime = (now - self.started_at).total_seconds() if self.started_at else 0
|
|
161
|
+
|
|
162
|
+
print(f"🕐 {now.strftime('%H:%M:%S')} | ⏱️ Uptime: {int(uptime//3600):02d}:{int((uptime%3600)//60):02d}:{int(uptime%60):02d}")
|
|
163
|
+
|
|
164
|
+
# Schedule status
|
|
165
|
+
if self.next_run_at and schedule_enabled:
|
|
166
|
+
seconds_until = (self.next_run_at - now).total_seconds()
|
|
167
|
+
if seconds_until > 0:
|
|
168
|
+
hours = int(seconds_until // 3600)
|
|
169
|
+
minutes = int((seconds_until % 3600) // 60)
|
|
170
|
+
seconds = int(seconds_until % 60)
|
|
171
|
+
print(f"⏰ Next run in: {hours:02d}:{minutes:02d}:{seconds:02d} | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
172
|
+
else:
|
|
173
|
+
print(f"🚀 Running now... | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
174
|
+
else:
|
|
175
|
+
print(f"📋 Manual mode | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
|
|
176
|
+
|
|
177
|
+
status = "🟢 RUNNING" if self.running else "🔴 STOPPED"
|
|
178
|
+
print(f"{status} | 💾 System: {self.config.system_dir}")
|
|
179
|
+
|
|
180
|
+
def _should_run_now(self) -> bool:
|
|
181
|
+
"""Check if should run now."""
|
|
182
|
+
if not self.next_run_at:
|
|
183
|
+
return False
|
|
184
|
+
return datetime.now() >= self.next_run_at
|
|
185
|
+
|
|
186
|
+
async def _execute_run(self) -> None:
|
|
187
|
+
"""Execute a parsing run - override in subclass."""
|
|
188
|
+
self.logger.info("🚀 Starting parsing run...")
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
# Default implementation - override in subclass
|
|
192
|
+
result = await self.parse_url("https://example.com")
|
|
193
|
+
|
|
194
|
+
self.total_runs += 1
|
|
195
|
+
|
|
196
|
+
if result.get("success") == "true":
|
|
197
|
+
self.successful_runs += 1
|
|
198
|
+
self.logger.info("✅ Run completed successfully")
|
|
199
|
+
else:
|
|
200
|
+
self.failed_runs += 1
|
|
201
|
+
self.logger.error("❌ Run failed")
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
self.failed_runs += 1
|
|
205
|
+
self.logger.error(f"❌ Run exception: {e}")
|
|
206
|
+
|
|
207
|
+
def get_status(self) -> DaemonStatus:
|
|
208
|
+
"""Get daemon status."""
|
|
209
|
+
now = datetime.now()
|
|
210
|
+
uptime = (now - self.started_at).total_seconds() if self.started_at else 0
|
|
211
|
+
|
|
212
|
+
return DaemonStatus(
|
|
213
|
+
running=self.running,
|
|
214
|
+
parser_id=self.config.parser_config.parser_name,
|
|
215
|
+
started_at=self.started_at or now,
|
|
216
|
+
uptime_seconds=uptime,
|
|
217
|
+
schedule_enabled=bool(self.next_run_at),
|
|
218
|
+
next_run_at=self.next_run_at,
|
|
219
|
+
total_runs=self.total_runs,
|
|
220
|
+
successful_runs=self.successful_runs,
|
|
221
|
+
failed_runs=self.failed_runs
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
async def cleanup(self):
|
|
225
|
+
"""Cleanup daemon resources."""
|
|
226
|
+
# RPC server removed - only parent cleanup needed
|
|
227
|
+
await super().cleanup()
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser Managers - Specialized management components
|
|
3
|
+
|
|
4
|
+
All managers follow strict Pydantic v2 compliance and CRITICAL_REQUIREMENTS.md
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .config import ConfigManager, ParserConfig
|
|
8
|
+
from .result import ResultManager, ParseResult, ParseMetrics, OperationStatus
|
|
9
|
+
from .error import ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity
|
|
10
|
+
from .logging import LoggingManager, LoggingConfig, LogLevel, LogContext
|
|
11
|
+
from .html import HTMLManager, HTMLCleaningConfig, HTMLCleaningStats
|
|
12
|
+
from .browser import BrowserManager, BrowserConfig, BrowserStats
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
# Config Manager
|
|
16
|
+
"ConfigManager",
|
|
17
|
+
"ParserConfig",
|
|
18
|
+
|
|
19
|
+
# Result Manager
|
|
20
|
+
"ResultManager",
|
|
21
|
+
"ParseResult",
|
|
22
|
+
"ParseMetrics",
|
|
23
|
+
"OperationStatus",
|
|
24
|
+
|
|
25
|
+
# Error Manager
|
|
26
|
+
"ErrorManager",
|
|
27
|
+
"RetryConfig",
|
|
28
|
+
"ErrorInfo",
|
|
29
|
+
"ErrorSeverity",
|
|
30
|
+
|
|
31
|
+
# Logging Manager
|
|
32
|
+
"LoggingManager",
|
|
33
|
+
"LoggingConfig",
|
|
34
|
+
"LogLevel",
|
|
35
|
+
"LogContext",
|
|
36
|
+
|
|
37
|
+
# HTML Manager
|
|
38
|
+
"HTMLManager",
|
|
39
|
+
"HTMLCleaningConfig",
|
|
40
|
+
"HTMLCleaningStats",
|
|
41
|
+
|
|
42
|
+
# Browser Manager
|
|
43
|
+
"BrowserManager",
|
|
44
|
+
"BrowserConfig",
|
|
45
|
+
"BrowserStats"
|
|
46
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Browser Manager - Wrapper over unrealon_driver.browser
|
|
3
|
+
|
|
4
|
+
Simple wrapper that inherits from the main BrowserManager
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Dict, Any
|
|
8
|
+
from pydantic import BaseModel, Field, ConfigDict
|
|
9
|
+
|
|
10
|
+
from unrealon_driver.browser import BrowserManager as BaseBrowserManager, BrowserConfig as BaseBrowserConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BrowserConfig(BaseBrowserConfig):
|
|
14
|
+
"""Extended browser configuration for parser manager"""
|
|
15
|
+
|
|
16
|
+
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BrowserStats(BaseModel):
|
|
20
|
+
"""Browser usage statistics"""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
23
|
+
|
|
24
|
+
pages_visited: int = Field(default=0, ge=0)
|
|
25
|
+
total_load_time: float = Field(default=0.0, ge=0.0)
|
|
26
|
+
average_load_time: float = Field(default=0.0, ge=0.0)
|
|
27
|
+
screenshots_taken: int = Field(default=0, ge=0)
|
|
28
|
+
cookies_saved: int = Field(default=0, ge=0)
|
|
29
|
+
errors_count: int = Field(default=0, ge=0)
|
|
30
|
+
session_duration: float = Field(default=0.0, ge=0.0)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BrowserManager(BaseBrowserManager):
|
|
34
|
+
"""
|
|
35
|
+
🌐 Browser Manager - Wrapper over base browser manager
|
|
36
|
+
|
|
37
|
+
Simple wrapper that extends the base BrowserManager with parser-specific functionality
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, config: BrowserConfig):
|
|
41
|
+
super().__init__(config)
|
|
42
|
+
self._stats = BrowserStats()
|
|
43
|
+
|
|
44
|
+
def get_stats(self) -> BrowserStats:
|
|
45
|
+
"""Get browser usage statistics"""
|
|
46
|
+
return self._stats
|
|
47
|
+
|
|
48
|
+
async def health_check(self) -> Dict[str, Any]:
|
|
49
|
+
"""Browser health check"""
|
|
50
|
+
base_health = await super().health_check()
|
|
51
|
+
return {**base_health, "parser_manager": True}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Config Manager - Type-safe configuration management with Pydantic v2
|
|
3
|
+
|
|
4
|
+
Strict compliance with CRITICAL_REQUIREMENTS.md:
|
|
5
|
+
- No Dict[str, Any] usage
|
|
6
|
+
- Complete type annotations
|
|
7
|
+
- Pydantic v2 models everywhere
|
|
8
|
+
- No mutable defaults
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Optional, List
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from pydantic import BaseModel, Field, ConfigDict, field_validator
|
|
14
|
+
import uuid
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ParserConfig(BaseModel):
|
|
18
|
+
"""
|
|
19
|
+
Parser configuration with smart defaults and strict typing
|
|
20
|
+
|
|
21
|
+
Zero configuration approach - everything has sensible defaults
|
|
22
|
+
"""
|
|
23
|
+
model_config = ConfigDict(
|
|
24
|
+
validate_assignment=True,
|
|
25
|
+
extra="forbid",
|
|
26
|
+
str_strip_whitespace=True
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Parser identity
|
|
30
|
+
parser_id: str = Field(
|
|
31
|
+
default_factory=lambda: f"parser_{uuid.uuid4().hex[:8]}",
|
|
32
|
+
description="Unique parser identifier"
|
|
33
|
+
)
|
|
34
|
+
parser_name: str = Field(
|
|
35
|
+
default="UnrealOn Parser",
|
|
36
|
+
description="Human-readable parser name"
|
|
37
|
+
)
|
|
38
|
+
parser_type: str = Field(
|
|
39
|
+
default="generic",
|
|
40
|
+
description="Parser type for classification"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Connection settings
|
|
44
|
+
websocket_url: str = Field(
|
|
45
|
+
default="ws://localhost:8002/ws",
|
|
46
|
+
description="WebSocket bridge URL"
|
|
47
|
+
)
|
|
48
|
+
api_key: Optional[str] = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="API key for authentication"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Browser settings
|
|
54
|
+
headless: bool = Field(
|
|
55
|
+
default=True,
|
|
56
|
+
description="Run browser in headless mode"
|
|
57
|
+
)
|
|
58
|
+
stealth_mode: bool = Field(
|
|
59
|
+
default=True,
|
|
60
|
+
description="Enable stealth mode"
|
|
61
|
+
)
|
|
62
|
+
user_agent: Optional[str] = Field(
|
|
63
|
+
default=None,
|
|
64
|
+
description="Custom user agent"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# HTML cleaning settings
|
|
68
|
+
aggressive_cleaning: bool = Field(
|
|
69
|
+
default=True,
|
|
70
|
+
description="Enable aggressive HTML cleaning"
|
|
71
|
+
)
|
|
72
|
+
preserve_js_data: bool = Field(
|
|
73
|
+
default=True,
|
|
74
|
+
description="Preserve JavaScript data during cleaning"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Timeouts (in milliseconds)
|
|
78
|
+
page_timeout: int = Field(
|
|
79
|
+
default=30000,
|
|
80
|
+
ge=1000,
|
|
81
|
+
le=300000,
|
|
82
|
+
description="Page load timeout in milliseconds"
|
|
83
|
+
)
|
|
84
|
+
navigation_timeout: int = Field(
|
|
85
|
+
default=30000,
|
|
86
|
+
ge=1000,
|
|
87
|
+
le=300000,
|
|
88
|
+
description="Navigation timeout in milliseconds"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Directories
|
|
92
|
+
system_dir: Optional[Path] = Field(
|
|
93
|
+
default=None,
|
|
94
|
+
description="System directory for logs and data"
|
|
95
|
+
)
|
|
96
|
+
screenshots_dir: Optional[Path] = Field(
|
|
97
|
+
default=None,
|
|
98
|
+
description="Screenshots directory"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Development settings
|
|
102
|
+
debug: bool = Field(
|
|
103
|
+
default=False,
|
|
104
|
+
description="Enable debug mode"
|
|
105
|
+
)
|
|
106
|
+
save_html: bool = Field(
|
|
107
|
+
default=False,
|
|
108
|
+
description="Save HTML files for debugging"
|
|
109
|
+
)
|
|
110
|
+
save_screenshots: bool = Field(
|
|
111
|
+
default=False,
|
|
112
|
+
description="Save screenshots for debugging"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
@field_validator('parser_name')
|
|
116
|
+
@classmethod
|
|
117
|
+
def validate_parser_name(cls, v: str) -> str:
|
|
118
|
+
"""Validate parser name is not empty"""
|
|
119
|
+
if not v.strip():
|
|
120
|
+
raise ValueError("Parser name cannot be empty")
|
|
121
|
+
return v.strip()
|
|
122
|
+
|
|
123
|
+
@field_validator('parser_type')
|
|
124
|
+
@classmethod
|
|
125
|
+
def validate_parser_type(cls, v: str) -> str:
|
|
126
|
+
"""Validate parser type"""
|
|
127
|
+
allowed_types = {
|
|
128
|
+
"generic", "ecommerce", "news", "jobs",
|
|
129
|
+
"real_estate", "social_media", "reviews",
|
|
130
|
+
"events", "directory"
|
|
131
|
+
}
|
|
132
|
+
if v not in allowed_types:
|
|
133
|
+
raise ValueError(f"Parser type must be one of: {', '.join(allowed_types)}")
|
|
134
|
+
return v
|
|
135
|
+
|
|
136
|
+
@field_validator('websocket_url')
|
|
137
|
+
@classmethod
|
|
138
|
+
def validate_websocket_url(cls, v: str) -> str:
|
|
139
|
+
"""Validate WebSocket URL format"""
|
|
140
|
+
if not v.startswith(('ws://', 'wss://')):
|
|
141
|
+
raise ValueError("WebSocket URL must start with ws:// or wss://")
|
|
142
|
+
return v
|
|
143
|
+
|
|
144
|
+
def model_post_init(self, __context) -> None:
|
|
145
|
+
"""Post-initialization setup"""
|
|
146
|
+
# Setup system directory if not provided
|
|
147
|
+
if self.system_dir is None:
|
|
148
|
+
self.system_dir = Path.cwd() / "system"
|
|
149
|
+
|
|
150
|
+
# Setup screenshots directory if not provided
|
|
151
|
+
if self.screenshots_dir is None:
|
|
152
|
+
self.screenshots_dir = self.system_dir / "screenshots"
|
|
153
|
+
|
|
154
|
+
# Create directories
|
|
155
|
+
self.system_dir.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
self.screenshots_dir.mkdir(parents=True, exist_ok=True)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ConfigManager:
|
|
160
|
+
"""
|
|
161
|
+
🔧 Config Manager - Type-safe configuration management
|
|
162
|
+
|
|
163
|
+
Features:
|
|
164
|
+
- Pydantic v2 validation
|
|
165
|
+
- Environment variable integration
|
|
166
|
+
- Configuration profiles
|
|
167
|
+
- Hot reloading
|
|
168
|
+
- Type safety enforcement
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
def __init__(self, config: Optional[ParserConfig] = None):
|
|
172
|
+
self._config: ParserConfig = config or ParserConfig()
|
|
173
|
+
self._profiles: dict[str, ParserConfig] = {}
|
|
174
|
+
self._current_profile: Optional[str] = None
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def config(self) -> ParserConfig:
|
|
178
|
+
"""Get current configuration"""
|
|
179
|
+
return self._config
|
|
180
|
+
|
|
181
|
+
def update_config(self, **kwargs) -> None:
|
|
182
|
+
"""Update configuration with new values"""
|
|
183
|
+
# Create new config with updated values
|
|
184
|
+
current_data = self._config.model_dump()
|
|
185
|
+
current_data.update(kwargs)
|
|
186
|
+
self._config = ParserConfig.model_validate(current_data)
|
|
187
|
+
|
|
188
|
+
def load_from_dict(self, config_dict: dict[str, str]) -> None:
|
|
189
|
+
"""Load configuration from dictionary"""
|
|
190
|
+
self._config = ParserConfig.model_validate(config_dict)
|
|
191
|
+
|
|
192
|
+
def load_from_env(self, prefix: str = "PARSER_") -> None:
|
|
193
|
+
"""Load configuration from environment variables"""
|
|
194
|
+
import os
|
|
195
|
+
|
|
196
|
+
env_config = {}
|
|
197
|
+
for key, value in os.environ.items():
|
|
198
|
+
if key.startswith(prefix):
|
|
199
|
+
config_key = key[len(prefix):].lower()
|
|
200
|
+
|
|
201
|
+
# Convert string values to appropriate types
|
|
202
|
+
if config_key in ['headless', 'stealth_mode', 'aggressive_cleaning',
|
|
203
|
+
'preserve_js_data', 'debug', 'save_html', 'save_screenshots']:
|
|
204
|
+
env_config[config_key] = value.lower() in ('true', '1', 'yes', 'on')
|
|
205
|
+
elif config_key in ['page_timeout', 'navigation_timeout']:
|
|
206
|
+
env_config[config_key] = int(value)
|
|
207
|
+
elif config_key in ['system_dir', 'screenshots_dir']:
|
|
208
|
+
env_config[config_key] = Path(value)
|
|
209
|
+
else:
|
|
210
|
+
env_config[config_key] = value
|
|
211
|
+
|
|
212
|
+
if env_config:
|
|
213
|
+
current_data = self._config.model_dump()
|
|
214
|
+
current_data.update(env_config)
|
|
215
|
+
self._config = ParserConfig.model_validate(current_data)
|
|
216
|
+
|
|
217
|
+
def save_profile(self, name: str) -> None:
|
|
218
|
+
"""Save current configuration as a profile"""
|
|
219
|
+
if not name.strip():
|
|
220
|
+
raise ValueError("Profile name cannot be empty")
|
|
221
|
+
self._profiles[name] = ParserConfig.model_validate(self._config.model_dump())
|
|
222
|
+
|
|
223
|
+
def load_profile(self, name: str) -> None:
|
|
224
|
+
"""Load configuration from a saved profile"""
|
|
225
|
+
if name not in self._profiles:
|
|
226
|
+
raise ValueError(f"Profile '{name}' not found")
|
|
227
|
+
self._config = ParserConfig.model_validate(self._profiles[name].model_dump())
|
|
228
|
+
self._current_profile = name
|
|
229
|
+
|
|
230
|
+
def get_profiles(self) -> List[str]:
|
|
231
|
+
"""Get list of available profiles"""
|
|
232
|
+
return list(self._profiles.keys())
|
|
233
|
+
|
|
234
|
+
def delete_profile(self, name: str) -> None:
|
|
235
|
+
"""Delete a saved profile"""
|
|
236
|
+
if name not in self._profiles:
|
|
237
|
+
raise ValueError(f"Profile '{name}' not found")
|
|
238
|
+
del self._profiles[name]
|
|
239
|
+
if self._current_profile == name:
|
|
240
|
+
self._current_profile = None
|
|
241
|
+
|
|
242
|
+
def get_current_profile(self) -> Optional[str]:
|
|
243
|
+
"""Get current profile name"""
|
|
244
|
+
return self._current_profile
|
|
245
|
+
|
|
246
|
+
def validate_config(self) -> List[str]:
|
|
247
|
+
"""Validate current configuration and return any issues"""
|
|
248
|
+
issues = []
|
|
249
|
+
|
|
250
|
+
# Check directory permissions
|
|
251
|
+
try:
|
|
252
|
+
test_file = self._config.system_dir / ".test"
|
|
253
|
+
test_file.touch()
|
|
254
|
+
test_file.unlink()
|
|
255
|
+
except PermissionError:
|
|
256
|
+
issues.append(f"No write permission for system directory: {self._config.system_dir}")
|
|
257
|
+
except Exception as e:
|
|
258
|
+
issues.append(f"System directory issue: {e}")
|
|
259
|
+
|
|
260
|
+
# Check timeouts are reasonable
|
|
261
|
+
if self._config.page_timeout < 5000:
|
|
262
|
+
issues.append("Page timeout is very low (< 5 seconds)")
|
|
263
|
+
if self._config.navigation_timeout < 5000:
|
|
264
|
+
issues.append("Navigation timeout is very low (< 5 seconds)")
|
|
265
|
+
|
|
266
|
+
return issues
|
|
267
|
+
|
|
268
|
+
def to_dict(self) -> dict[str, str]:
|
|
269
|
+
"""Export configuration as dictionary"""
|
|
270
|
+
return self._config.model_dump(mode='json')
|
|
271
|
+
|
|
272
|
+
def to_env_format(self, prefix: str = "PARSER_") -> List[str]:
|
|
273
|
+
"""Export configuration as environment variable format"""
|
|
274
|
+
config_dict = self.to_dict()
|
|
275
|
+
env_vars = []
|
|
276
|
+
|
|
277
|
+
for key, value in config_dict.items():
|
|
278
|
+
env_key = f"{prefix}{key.upper()}"
|
|
279
|
+
env_vars.append(f"{env_key}={value}")
|
|
280
|
+
|
|
281
|
+
return env_vars
|