unrealon 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.0.dist-info/METADATA +164 -0
- unrealon-1.1.0.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
- unrealon-1.1.0.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modern Browser Manager built on Playwright
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from typing import Optional, Dict, Any, List
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
|
|
14
|
+
except ImportError:
|
|
15
|
+
async_playwright = None
|
|
16
|
+
Browser = None
|
|
17
|
+
BrowserContext = None
|
|
18
|
+
Page = None
|
|
19
|
+
|
|
20
|
+
from unrealon_rpc.logging import get_logger
|
|
21
|
+
|
|
22
|
+
from .config import BrowserConfig
|
|
23
|
+
from ..exceptions import BrowserError
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BrowserManager:
|
|
27
|
+
"""
|
|
28
|
+
🌐 Modern Browser Manager v4.0
|
|
29
|
+
|
|
30
|
+
Simplified browser automation built on Playwright with stealth capabilities.
|
|
31
|
+
Designed for the new architecture where complex automation is simplified.
|
|
32
|
+
|
|
33
|
+
Features:
|
|
34
|
+
- 🎭 Stealth Mode: Anti-detection by default
|
|
35
|
+
- 🍪 Cookie Persistence: Automatic cookie management
|
|
36
|
+
- 📸 Screenshots: Debug-friendly screenshot capture
|
|
37
|
+
- ⚡ Performance: Optimized for speed and reliability
|
|
38
|
+
- 🔧 Zero Config: Works out of the box
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, config: BrowserConfig):
|
|
42
|
+
"""
|
|
43
|
+
Initialize browser manager
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
config: Browser configuration
|
|
47
|
+
"""
|
|
48
|
+
if async_playwright is None:
|
|
49
|
+
raise BrowserError(
|
|
50
|
+
"Playwright is not installed. Install it with: pip install playwright && playwright install"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.config = config
|
|
54
|
+
self.logger = get_logger()
|
|
55
|
+
|
|
56
|
+
# Browser components
|
|
57
|
+
self._playwright = None
|
|
58
|
+
self._browser: Optional[Browser] = None
|
|
59
|
+
self._context: Optional[BrowserContext] = None
|
|
60
|
+
self._page: Optional[Page] = None
|
|
61
|
+
|
|
62
|
+
# State
|
|
63
|
+
self._is_initialized = False
|
|
64
|
+
self._session_id = str(uuid.uuid4())
|
|
65
|
+
|
|
66
|
+
# ==========================================
|
|
67
|
+
# LIFECYCLE MANAGEMENT
|
|
68
|
+
# ==========================================
|
|
69
|
+
|
|
70
|
+
async def initialize(self) -> None:
|
|
71
|
+
"""Initialize browser components"""
|
|
72
|
+
if self._is_initialized:
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
self.logger.info("Initializing browser manager...")
|
|
77
|
+
|
|
78
|
+
# Start Playwright
|
|
79
|
+
self._playwright = await async_playwright().start()
|
|
80
|
+
|
|
81
|
+
# Launch browser
|
|
82
|
+
browser_args = self._get_browser_args()
|
|
83
|
+
|
|
84
|
+
if self.config.browser_type == "chromium":
|
|
85
|
+
self._browser = await self._playwright.chromium.launch(**browser_args)
|
|
86
|
+
elif self.config.browser_type == "firefox":
|
|
87
|
+
self._browser = await self._playwright.firefox.launch(**browser_args)
|
|
88
|
+
elif self.config.browser_type == "webkit":
|
|
89
|
+
self._browser = await self._playwright.webkit.launch(**browser_args)
|
|
90
|
+
else:
|
|
91
|
+
raise BrowserError(f"Unsupported browser type: {self.config.browser_type}")
|
|
92
|
+
|
|
93
|
+
# Create context
|
|
94
|
+
context_args = self._get_context_args()
|
|
95
|
+
self._context = await self._browser.new_context(**context_args)
|
|
96
|
+
|
|
97
|
+
# Load cookies if available
|
|
98
|
+
await self._load_cookies()
|
|
99
|
+
|
|
100
|
+
# Create page
|
|
101
|
+
self._page = await self._context.new_page()
|
|
102
|
+
|
|
103
|
+
# Setup stealth mode
|
|
104
|
+
if self.config.stealth_mode:
|
|
105
|
+
await self._setup_stealth()
|
|
106
|
+
|
|
107
|
+
# Set timeouts
|
|
108
|
+
self._page.set_default_timeout(self.config.page_timeout)
|
|
109
|
+
self._page.set_default_navigation_timeout(self.config.navigation_timeout)
|
|
110
|
+
|
|
111
|
+
self._is_initialized = True
|
|
112
|
+
self.logger.info(f"Browser initialized: {self.config.browser_type}")
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
await self.cleanup()
|
|
116
|
+
raise BrowserError(f"Failed to initialize browser: {e}")
|
|
117
|
+
|
|
118
|
+
async def cleanup(self) -> None:
|
|
119
|
+
"""Clean up browser resources"""
|
|
120
|
+
self.logger.info("Cleaning up browser resources...")
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
# Save cookies
|
|
124
|
+
if self._context and self.config.persist_cookies:
|
|
125
|
+
await self._save_cookies()
|
|
126
|
+
|
|
127
|
+
# Close page
|
|
128
|
+
if self._page:
|
|
129
|
+
await self._page.close()
|
|
130
|
+
self._page = None
|
|
131
|
+
|
|
132
|
+
# Close context
|
|
133
|
+
if self._context:
|
|
134
|
+
await self._context.close()
|
|
135
|
+
self._context = None
|
|
136
|
+
|
|
137
|
+
# Close browser
|
|
138
|
+
if self._browser:
|
|
139
|
+
await self._browser.close()
|
|
140
|
+
self._browser = None
|
|
141
|
+
|
|
142
|
+
# Stop Playwright
|
|
143
|
+
if self._playwright:
|
|
144
|
+
await self._playwright.stop()
|
|
145
|
+
self._playwright = None
|
|
146
|
+
|
|
147
|
+
self._is_initialized = False
|
|
148
|
+
self.logger.info("Browser cleanup completed")
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
self.logger.error(f"Error during browser cleanup: {e}")
|
|
152
|
+
|
|
153
|
+
# ==========================================
|
|
154
|
+
# HIGH-LEVEL METHODS
|
|
155
|
+
# ==========================================
|
|
156
|
+
|
|
157
|
+
async def get_html(self, url: str, wait_for: Optional[str] = None) -> str:
|
|
158
|
+
"""
|
|
159
|
+
Get HTML content from URL
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
url: Target URL
|
|
163
|
+
wait_for: Optional CSS selector to wait for
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
HTML content as string
|
|
167
|
+
"""
|
|
168
|
+
await self._ensure_initialized()
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
self.logger.info(f"Navigating to: {url}")
|
|
172
|
+
|
|
173
|
+
# Navigate to URL
|
|
174
|
+
await self._page.goto(url, wait_until="domcontentloaded")
|
|
175
|
+
|
|
176
|
+
# Wait for specific element if requested
|
|
177
|
+
if wait_for:
|
|
178
|
+
await self._page.wait_for_selector(wait_for, timeout=self.config.element_timeout)
|
|
179
|
+
|
|
180
|
+
# Get HTML content
|
|
181
|
+
html = await self._page.content()
|
|
182
|
+
|
|
183
|
+
# Save screenshot if debugging
|
|
184
|
+
if self.config.save_screenshots:
|
|
185
|
+
await self._save_screenshot(f"get_html_{url.replace('/', '_')}")
|
|
186
|
+
|
|
187
|
+
self.logger.info(f"Retrieved HTML content: {len(html)} characters")
|
|
188
|
+
return html
|
|
189
|
+
|
|
190
|
+
except Exception as e:
|
|
191
|
+
if self.config.save_screenshots:
|
|
192
|
+
await self._save_screenshot(f"error_{url.replace('/', '_')}")
|
|
193
|
+
raise BrowserError(f"Failed to get HTML from {url}: {e}")
|
|
194
|
+
|
|
195
|
+
async def extract_elements(
|
|
196
|
+
self,
|
|
197
|
+
url: str,
|
|
198
|
+
selector: str,
|
|
199
|
+
attribute: Optional[str] = None
|
|
200
|
+
) -> List[str]:
|
|
201
|
+
"""
|
|
202
|
+
Extract elements from URL using CSS selector
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
url: Target URL
|
|
206
|
+
selector: CSS selector
|
|
207
|
+
attribute: Optional attribute to extract (default: text content)
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
List of extracted values
|
|
211
|
+
"""
|
|
212
|
+
await self._ensure_initialized()
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
self.logger.info(f"Extracting elements from: {url}")
|
|
216
|
+
|
|
217
|
+
# Navigate to URL
|
|
218
|
+
await self._page.goto(url, wait_until="domcontentloaded")
|
|
219
|
+
|
|
220
|
+
# Wait for elements
|
|
221
|
+
await self._page.wait_for_selector(selector, timeout=self.config.element_timeout)
|
|
222
|
+
|
|
223
|
+
# Extract elements
|
|
224
|
+
if attribute:
|
|
225
|
+
elements = await self._page.eval_on_selector_all(
|
|
226
|
+
selector,
|
|
227
|
+
f"elements => elements.map(el => el.getAttribute('{attribute}'))"
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
elements = await self._page.eval_on_selector_all(
|
|
231
|
+
selector,
|
|
232
|
+
"elements => elements.map(el => el.textContent.trim())"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Filter out empty values
|
|
236
|
+
elements = [el for el in elements if el and el.strip()]
|
|
237
|
+
|
|
238
|
+
self.logger.info(f"Extracted {len(elements)} elements")
|
|
239
|
+
return elements
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
raise BrowserError(f"Failed to extract elements from {url}: {e}")
|
|
243
|
+
|
|
244
|
+
async def screenshot(self, filename: Optional[str] = None) -> Path:
|
|
245
|
+
"""
|
|
246
|
+
Take screenshot of current page
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
filename: Optional filename (auto-generated if not provided)
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Path to screenshot file
|
|
253
|
+
"""
|
|
254
|
+
await self._ensure_initialized()
|
|
255
|
+
|
|
256
|
+
if not filename:
|
|
257
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
258
|
+
filename = f"screenshot_{timestamp}.png"
|
|
259
|
+
|
|
260
|
+
if not filename.endswith('.png'):
|
|
261
|
+
filename += '.png'
|
|
262
|
+
|
|
263
|
+
screenshot_path = self.config.screenshots_dir / filename
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
await self._page.screenshot(path=str(screenshot_path), full_page=True)
|
|
267
|
+
self.logger.info(f"Screenshot saved: {screenshot_path}")
|
|
268
|
+
return screenshot_path
|
|
269
|
+
|
|
270
|
+
except Exception as e:
|
|
271
|
+
raise BrowserError(f"Failed to take screenshot: {e}")
|
|
272
|
+
|
|
273
|
+
# ==========================================
|
|
274
|
+
# UTILITY METHODS
|
|
275
|
+
# ==========================================
|
|
276
|
+
|
|
277
|
+
async def health_check(self) -> Dict[str, Any]:
|
|
278
|
+
"""Browser health check"""
|
|
279
|
+
return {
|
|
280
|
+
"status": "healthy" if self._is_initialized else "not_initialized",
|
|
281
|
+
"browser_type": self.config.browser_type,
|
|
282
|
+
"session_id": self._session_id,
|
|
283
|
+
"stealth_mode": self.config.stealth_mode,
|
|
284
|
+
"headless": self.config.headless,
|
|
285
|
+
"initialized": self._is_initialized
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
# ==========================================
|
|
289
|
+
# PRIVATE METHODS
|
|
290
|
+
# ==========================================
|
|
291
|
+
|
|
292
|
+
async def _ensure_initialized(self) -> None:
|
|
293
|
+
"""Ensure browser is initialized"""
|
|
294
|
+
if not self._is_initialized:
|
|
295
|
+
await self.initialize()
|
|
296
|
+
|
|
297
|
+
def _get_browser_args(self) -> Dict[str, Any]:
|
|
298
|
+
"""Get browser launch arguments"""
|
|
299
|
+
args = {
|
|
300
|
+
"headless": self.config.headless,
|
|
301
|
+
"args": self.config.extra_args.copy()
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
# Add stealth arguments
|
|
305
|
+
if self.config.stealth_mode:
|
|
306
|
+
args["args"].extend([
|
|
307
|
+
"--no-first-run",
|
|
308
|
+
"--no-default-browser-check",
|
|
309
|
+
"--disable-blink-features=AutomationControlled",
|
|
310
|
+
"--disable-web-security",
|
|
311
|
+
"--disable-features=VizDisplayCompositor"
|
|
312
|
+
])
|
|
313
|
+
|
|
314
|
+
# Add performance arguments
|
|
315
|
+
if self.config.disable_images:
|
|
316
|
+
args["args"].append("--disable-images")
|
|
317
|
+
|
|
318
|
+
return args
|
|
319
|
+
|
|
320
|
+
def _get_context_args(self) -> Dict[str, Any]:
|
|
321
|
+
"""Get browser context arguments"""
|
|
322
|
+
args = {
|
|
323
|
+
"viewport": {
|
|
324
|
+
"width": self.config.viewport_width,
|
|
325
|
+
"height": self.config.viewport_height
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
# User agent
|
|
330
|
+
if self.config.user_agent:
|
|
331
|
+
args["user_agent"] = self.config.user_agent
|
|
332
|
+
|
|
333
|
+
# Proxy
|
|
334
|
+
if self.config.proxy_url:
|
|
335
|
+
proxy_config = {"server": self.config.proxy_url}
|
|
336
|
+
if self.config.proxy_username:
|
|
337
|
+
proxy_config["username"] = self.config.proxy_username
|
|
338
|
+
if self.config.proxy_password:
|
|
339
|
+
proxy_config["password"] = self.config.proxy_password
|
|
340
|
+
args["proxy"] = proxy_config
|
|
341
|
+
|
|
342
|
+
# Disable resources
|
|
343
|
+
if self.config.disable_javascript:
|
|
344
|
+
args["java_script_enabled"] = False
|
|
345
|
+
|
|
346
|
+
return args
|
|
347
|
+
|
|
348
|
+
async def _setup_stealth(self) -> None:
|
|
349
|
+
"""Setup stealth mode"""
|
|
350
|
+
# Add stealth scripts
|
|
351
|
+
await self._page.add_init_script("""
|
|
352
|
+
// Remove webdriver property
|
|
353
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
354
|
+
get: () => undefined,
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
// Mock plugins
|
|
358
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
359
|
+
get: () => [1, 2, 3, 4, 5],
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
// Mock languages
|
|
363
|
+
Object.defineProperty(navigator, 'languages', {
|
|
364
|
+
get: () => ['en-US', 'en'],
|
|
365
|
+
});
|
|
366
|
+
""")
|
|
367
|
+
|
|
368
|
+
async def _load_cookies(self) -> None:
|
|
369
|
+
"""Load cookies from file"""
|
|
370
|
+
if not self.config.persist_cookies or not self.config.cookies_file:
|
|
371
|
+
return
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
if self.config.cookies_file.exists():
|
|
375
|
+
with open(self.config.cookies_file, 'r') as f:
|
|
376
|
+
cookies = json.load(f)
|
|
377
|
+
await self._context.add_cookies(cookies)
|
|
378
|
+
self.logger.info(f"Loaded {len(cookies)} cookies")
|
|
379
|
+
except Exception as e:
|
|
380
|
+
self.logger.warning(f"Failed to load cookies: {e}")
|
|
381
|
+
|
|
382
|
+
async def _save_cookies(self) -> None:
|
|
383
|
+
"""Save cookies to file"""
|
|
384
|
+
if not self.config.persist_cookies or not self.config.cookies_file:
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
try:
|
|
388
|
+
cookies = await self._context.cookies()
|
|
389
|
+
with open(self.config.cookies_file, 'w') as f:
|
|
390
|
+
json.dump(cookies, f, indent=2)
|
|
391
|
+
self.logger.info(f"Saved {len(cookies)} cookies")
|
|
392
|
+
except Exception as e:
|
|
393
|
+
self.logger.warning(f"Failed to save cookies: {e}")
|
|
394
|
+
|
|
395
|
+
async def _save_screenshot(self, name: str) -> None:
|
|
396
|
+
"""Save debug screenshot"""
|
|
397
|
+
try:
|
|
398
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
399
|
+
filename = f"{name}_{timestamp}.png"
|
|
400
|
+
await self.screenshot(filename)
|
|
401
|
+
except Exception as e:
|
|
402
|
+
self.logger.warning(f"Failed to save debug screenshot: {e}")
|
|
403
|
+
|
|
404
|
+
# ==========================================
|
|
405
|
+
# CONTEXT MANAGER SUPPORT
|
|
406
|
+
# ==========================================
|
|
407
|
+
|
|
408
|
+
async def __aenter__(self):
|
|
409
|
+
"""Async context manager entry"""
|
|
410
|
+
await self.initialize()
|
|
411
|
+
return self
|
|
412
|
+
|
|
413
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
414
|
+
"""Async context manager exit"""
|
|
415
|
+
await self.cleanup()
|
|
416
|
+
return False
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UnrealOn Driver exceptions
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ParserError(Exception):
|
|
7
|
+
"""Base exception for parser errors"""
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BrowserError(ParserError):
|
|
12
|
+
"""Browser-related errors"""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HTMLCleaningError(ParserError):
|
|
17
|
+
"""HTML cleaning errors"""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ConfigurationError(ParserError):
|
|
22
|
+
"""Configuration errors"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ConnectionError(ParserError):
|
|
27
|
+
"""Connection errors"""
|
|
28
|
+
pass
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parser management system with specialized managers
|
|
3
|
+
|
|
4
|
+
Strict Pydantic v2 compliance and type safety
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .parser_manager import ParserManager, ParserManagerConfig, ParserStats, get_parser_manager, quick_parse
|
|
8
|
+
from .daemon_manager import DaemonManager, DaemonStatus
|
|
9
|
+
from .cli_manager import CLIManager
|
|
10
|
+
from .managers import (
|
|
11
|
+
ConfigManager, ParserConfig,
|
|
12
|
+
ResultManager, ParseResult, ParseMetrics, OperationStatus,
|
|
13
|
+
ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity,
|
|
14
|
+
LoggingManager, LoggingConfig, LogLevel, LogContext,
|
|
15
|
+
HTMLManager, HTMLCleaningConfig, HTMLCleaningStats,
|
|
16
|
+
BrowserManager, BrowserConfig, BrowserStats
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
# Main Parser Manager
|
|
21
|
+
"ParserManager",
|
|
22
|
+
"ParserManagerConfig",
|
|
23
|
+
"ParserStats",
|
|
24
|
+
"get_parser_manager",
|
|
25
|
+
"quick_parse",
|
|
26
|
+
|
|
27
|
+
# Daemon Manager
|
|
28
|
+
"DaemonManager",
|
|
29
|
+
"DaemonStatus",
|
|
30
|
+
|
|
31
|
+
# CLI Manager
|
|
32
|
+
"CLIManager",
|
|
33
|
+
|
|
34
|
+
# Individual Managers
|
|
35
|
+
"ConfigManager",
|
|
36
|
+
"ParserConfig",
|
|
37
|
+
"ResultManager",
|
|
38
|
+
"ParseResult",
|
|
39
|
+
"ParseMetrics",
|
|
40
|
+
"OperationStatus",
|
|
41
|
+
"ErrorManager",
|
|
42
|
+
"RetryConfig",
|
|
43
|
+
"ErrorInfo",
|
|
44
|
+
"ErrorSeverity",
|
|
45
|
+
"LoggingManager",
|
|
46
|
+
"LoggingConfig",
|
|
47
|
+
"LogLevel",
|
|
48
|
+
"LogContext",
|
|
49
|
+
"HTMLManager",
|
|
50
|
+
"HTMLCleaningConfig",
|
|
51
|
+
"HTMLCleaningStats",
|
|
52
|
+
"BrowserManager",
|
|
53
|
+
"BrowserConfig",
|
|
54
|
+
"BrowserStats"
|
|
55
|
+
]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI Manager - Base class for parser CLI interfaces
|
|
3
|
+
|
|
4
|
+
Strict Pydantic v2 compliance and type safety
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional, Any, Dict
|
|
11
|
+
import click
|
|
12
|
+
|
|
13
|
+
from .parser_manager import ParserManager, ParserManagerConfig
|
|
14
|
+
from .managers import ParserConfig, LoggingConfig, HTMLCleaningConfig, BrowserConfig
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CLIManager(ParserManager):
|
|
18
|
+
"""Base CLI manager with common CLI functionality."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, parser_name: str, parser_type: str, system_dir: str,
|
|
21
|
+
bridge_enabled: bool = False, websocket_url: str = "ws://localhost:8000/ws"):
|
|
22
|
+
# Create parser config
|
|
23
|
+
parser_config = ParserConfig(
|
|
24
|
+
parser_name=parser_name,
|
|
25
|
+
parser_type=parser_type,
|
|
26
|
+
system_dir=Path(system_dir)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Create logging config
|
|
30
|
+
logging_config = LoggingConfig(parser_name=parser_name)
|
|
31
|
+
|
|
32
|
+
# Create other configs
|
|
33
|
+
html_config = HTMLCleaningConfig()
|
|
34
|
+
browser_config = BrowserConfig()
|
|
35
|
+
|
|
36
|
+
# Create manager config
|
|
37
|
+
manager_config = ParserManagerConfig(
|
|
38
|
+
parser_config=parser_config,
|
|
39
|
+
logging_config=logging_config,
|
|
40
|
+
html_config=html_config,
|
|
41
|
+
browser_config=browser_config,
|
|
42
|
+
bridge_enabled=bridge_enabled
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
super().__init__(manager_config)
|
|
46
|
+
|
|
47
|
+
async def run_parse_command(self, urls: Optional[List[str]] = None) -> bool:
|
|
48
|
+
"""Run parse command."""
|
|
49
|
+
try:
|
|
50
|
+
await self.initialize()
|
|
51
|
+
|
|
52
|
+
if urls:
|
|
53
|
+
click.echo(f"🚀 Parsing {len(urls)} URLs...")
|
|
54
|
+
results = []
|
|
55
|
+
for url in urls:
|
|
56
|
+
result = await self.parse_url(url)
|
|
57
|
+
results.append(result)
|
|
58
|
+
|
|
59
|
+
success_count = sum(1 for r in results if r.get("success") == "true")
|
|
60
|
+
click.echo(f"✅ Parse completed: {success_count}/{len(results)} URLs successful")
|
|
61
|
+
return success_count > 0
|
|
62
|
+
else:
|
|
63
|
+
click.echo("❌ No URLs provided", err=True)
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
except Exception as e:
|
|
67
|
+
click.echo(f"❌ Parse error: {e}", err=True)
|
|
68
|
+
return False
|
|
69
|
+
finally:
|
|
70
|
+
await self.cleanup()
|
|
71
|
+
|
|
72
|
+
async def run_test_command(self) -> bool:
|
|
73
|
+
"""Run test command."""
|
|
74
|
+
try:
|
|
75
|
+
click.echo("🧪 Running test...")
|
|
76
|
+
|
|
77
|
+
await self.initialize()
|
|
78
|
+
click.echo("✅ Parser initialization: OK")
|
|
79
|
+
|
|
80
|
+
# Test HTML cleaning
|
|
81
|
+
html = "<html><body><h1>Test</h1></body></html>"
|
|
82
|
+
cleaned = await self.clean_html(html)
|
|
83
|
+
click.echo(f"✅ HTML cleaning: OK ({len(html)} → {len(cleaned)} chars)")
|
|
84
|
+
|
|
85
|
+
click.echo("✅ All tests passed!")
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
click.echo(f"❌ Test failed: {e}", err=True)
|
|
90
|
+
return False
|
|
91
|
+
finally:
|
|
92
|
+
await self.cleanup()
|
|
93
|
+
|
|
94
|
+
async def run_quick_command(self, urls: List[str]) -> bool:
|
|
95
|
+
"""Run quick parse command."""
|
|
96
|
+
try:
|
|
97
|
+
click.echo(f"⚡ Quick parse of {len(urls)} URLs...")
|
|
98
|
+
|
|
99
|
+
await self.initialize()
|
|
100
|
+
results = []
|
|
101
|
+
for url in urls:
|
|
102
|
+
result = await self.parse_url(url)
|
|
103
|
+
results.append(result)
|
|
104
|
+
|
|
105
|
+
success_count = sum(1 for r in results if r.get("success") == "true")
|
|
106
|
+
click.echo(f"✅ Quick parse completed: {success_count}/{len(results)} URLs successful")
|
|
107
|
+
|
|
108
|
+
return success_count > 0
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
click.echo(f"❌ Quick parse error: {e}", err=True)
|
|
112
|
+
return False
|
|
113
|
+
finally:
|
|
114
|
+
await self.cleanup()
|
|
115
|
+
|
|
116
|
+
def show_status(self, config_data: Dict[str, Any]) -> None:
|
|
117
|
+
"""Show parser status."""
|
|
118
|
+
click.echo("📊 Parser Status")
|
|
119
|
+
click.echo("=" * 40)
|
|
120
|
+
click.echo(f"Parser Name: {self.config.parser_name}")
|
|
121
|
+
click.echo(f"Parser Type: {self.config.parser_type}")
|
|
122
|
+
click.echo(f"System Dir: {self.config.system_dir}")
|
|
123
|
+
click.echo(f"Bridge: {'Enabled' if self.config.bridge_enabled else 'Disabled'}")
|
|
124
|
+
if self.config.bridge_enabled:
|
|
125
|
+
click.echo(f" URL: {self.config.websocket_url}")
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def create_config_file(config_path: Path, create_func) -> None:
|
|
129
|
+
"""Create configuration file."""
|
|
130
|
+
try:
|
|
131
|
+
create_func(config_path)
|
|
132
|
+
click.echo(f"✅ Configuration file created: {config_path}")
|
|
133
|
+
click.echo(" Edit the file to customize your parser settings")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
click.echo(f"❌ Failed to create configuration: {e}", err=True)
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def run_async_command(coro):
|
|
139
|
+
"""Helper to run async command and exit with proper code."""
|
|
140
|
+
success = asyncio.run(coro)
|
|
141
|
+
sys.exit(0 if success else 1)
|