unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.1.dist-info/METADATA +722 -0
- unrealon-1.1.1.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
- unrealon-1.1.1.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -1,638 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
🚀 Revolutionary Parser Class - UnrealOn Driver v3.0
|
|
3
|
-
|
|
4
|
-
Zero-configuration web automation with AI-first design and multiple execution modes.
|
|
5
|
-
Built from scratch for modern web automation without legacy complexity.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import asyncio
|
|
9
|
-
import os
|
|
10
|
-
import sys
|
|
11
|
-
from datetime import datetime, timezone
|
|
12
|
-
from typing import Any, Dict, List, Optional, Union, Callable
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
|
|
15
|
-
# Core exceptions
|
|
16
|
-
from .exceptions import ParserError, ConfigurationError
|
|
17
|
-
|
|
18
|
-
# Service integrations
|
|
19
|
-
from unrealon_driver.src.services.browser_service import BrowserService
|
|
20
|
-
from unrealon_driver.src.services.llm import LLMService
|
|
21
|
-
from unrealon_driver.src.services.llm.browser_llm_service import BrowserLLMService
|
|
22
|
-
from unrealon_driver.src.services.websocket_service import WebSocketService
|
|
23
|
-
from unrealon_driver.src.logging import DriverLogger, ensure_driver_logger
|
|
24
|
-
from unrealon_driver.src.services.metrics_service import MetricsService
|
|
25
|
-
|
|
26
|
-
# Configuration system
|
|
27
|
-
from unrealon_driver.src.config.auto_config import AutoConfig
|
|
28
|
-
|
|
29
|
-
# Execution modes
|
|
30
|
-
from unrealon_driver.src.execution.test_mode import TestMode
|
|
31
|
-
from unrealon_driver.src.execution.daemon_mode import DaemonMode
|
|
32
|
-
from unrealon_driver.src.execution.scheduled_mode import ScheduledMode
|
|
33
|
-
from unrealon_driver.src.execution.interactive_mode import InteractiveMode
|
|
34
|
-
|
|
35
|
-
# Type-safe execution configuration
|
|
36
|
-
from unrealon_driver.src.dto.execution import (
|
|
37
|
-
ParserTestConfig,
|
|
38
|
-
DaemonModeConfig,
|
|
39
|
-
ScheduledModeConfig,
|
|
40
|
-
InteractiveModeConfig,
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class Parser:
|
|
45
|
-
"""
|
|
46
|
-
🚀 Revolutionary Parser Class
|
|
47
|
-
|
|
48
|
-
Zero-configuration web automation with AI-first design.
|
|
49
|
-
|
|
50
|
-
Features:
|
|
51
|
-
- 🎯 Zero Configuration: Everything works out of the box
|
|
52
|
-
- 🤖 AI-First Design: LLM integration as core feature
|
|
53
|
-
- 🔌 Multiple Execution Modes: test, daemon, scheduled, interactive
|
|
54
|
-
- 🌐 Smart Browser: Intelligent automation with stealth
|
|
55
|
-
- ⏰ Human-Readable Scheduling: "30m", "1h", "daily"
|
|
56
|
-
- 📊 Built-in Monitoring: Enterprise observability
|
|
57
|
-
|
|
58
|
-
Quick Start:
|
|
59
|
-
class MyParser(Parser):
|
|
60
|
-
async def parse(self):
|
|
61
|
-
# Simple browser extraction
|
|
62
|
-
return await self.browser.extract("https://example.com", ".item")
|
|
63
|
-
|
|
64
|
-
# AI-powered extraction (browser + LLM combined)
|
|
65
|
-
return await self.browser_llm.extract("https://example.com", schema={
|
|
66
|
-
"products": [{"name": "string", "price": "number"}]
|
|
67
|
-
})
|
|
68
|
-
|
|
69
|
-
# Development testing
|
|
70
|
-
result = await MyParser().test()
|
|
71
|
-
|
|
72
|
-
# Production daemon (WebSocket service)
|
|
73
|
-
await MyParser().daemon()
|
|
74
|
-
|
|
75
|
-
# Automated scheduling
|
|
76
|
-
await MyParser().schedule(every="30m")
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
def __init__(
|
|
80
|
-
self,
|
|
81
|
-
parser_id: Optional[str] = None,
|
|
82
|
-
parser_name: Optional[str] = None,
|
|
83
|
-
config: Optional[AutoConfig] = None,
|
|
84
|
-
**kwargs,
|
|
85
|
-
):
|
|
86
|
-
"""
|
|
87
|
-
Initialize parser with zero configuration.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
parser_id: Unique identifier (auto-generated if not provided)
|
|
91
|
-
parser_name: Human-readable name (auto-generated if not provided)
|
|
92
|
-
config: Optional configuration override
|
|
93
|
-
**kwargs: Additional configuration options
|
|
94
|
-
"""
|
|
95
|
-
# Auto-generate identifiers
|
|
96
|
-
self.parser_id = parser_id or self._generate_parser_id()
|
|
97
|
-
self.parser_name = parser_name or self._generate_parser_name()
|
|
98
|
-
|
|
99
|
-
# Initialize auto-configuration
|
|
100
|
-
self._config: AutoConfig = AutoConfig.create_development(
|
|
101
|
-
self.parser_id, config=config
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
# Service initialization (lazy-loaded)
|
|
105
|
-
self._browser: BrowserService = None
|
|
106
|
-
self._llm: LLMService = None
|
|
107
|
-
self._browser_llm: BrowserLLMService = None
|
|
108
|
-
self._websocket: WebSocketService = None
|
|
109
|
-
self._logger: DriverLogger = None
|
|
110
|
-
self._metrics: MetricsService = None
|
|
111
|
-
|
|
112
|
-
# Execution mode handlers
|
|
113
|
-
self._test_mode: TestMode = None
|
|
114
|
-
self._daemon_mode: DaemonMode = None
|
|
115
|
-
self._scheduled_mode: ScheduledMode = None
|
|
116
|
-
self._interactive_mode: InteractiveMode = None
|
|
117
|
-
|
|
118
|
-
# Runtime state
|
|
119
|
-
self._is_initialized = False
|
|
120
|
-
self._shutdown_event = asyncio.Event()
|
|
121
|
-
|
|
122
|
-
# ==========================================
|
|
123
|
-
# ZERO-CONFIG SERVICE PROPERTIES
|
|
124
|
-
# ==========================================
|
|
125
|
-
|
|
126
|
-
@property
|
|
127
|
-
def browser(self) -> BrowserService:
|
|
128
|
-
"""Smart browser service with zero configuration."""
|
|
129
|
-
if self._browser is None:
|
|
130
|
-
self._browser = BrowserService(
|
|
131
|
-
config=self._config.browser_config,
|
|
132
|
-
logger=self.logger,
|
|
133
|
-
metrics=self.metrics,
|
|
134
|
-
)
|
|
135
|
-
return self._browser
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def llm(self) -> LLMService:
|
|
139
|
-
"""AI-powered extraction service."""
|
|
140
|
-
if self._llm is None:
|
|
141
|
-
self._llm = LLMService(
|
|
142
|
-
config=self._config.llm_config,
|
|
143
|
-
logger=self.logger,
|
|
144
|
-
)
|
|
145
|
-
return self._llm
|
|
146
|
-
|
|
147
|
-
@property
|
|
148
|
-
def browser_llm(self) -> BrowserLLMService:
|
|
149
|
-
"""🔥 AI-powered browser service - auto-configured and ready to use."""
|
|
150
|
-
if self._browser_llm is None:
|
|
151
|
-
self._browser_llm = BrowserLLMService(
|
|
152
|
-
auto_config=self._config,
|
|
153
|
-
logger=self.logger,
|
|
154
|
-
metrics=self.metrics,
|
|
155
|
-
)
|
|
156
|
-
return self._browser_llm
|
|
157
|
-
|
|
158
|
-
@property
|
|
159
|
-
def websocket(self) -> WebSocketService:
|
|
160
|
-
"""WebSocket service for daemon mode."""
|
|
161
|
-
if self._websocket is None:
|
|
162
|
-
self._websocket = WebSocketService(
|
|
163
|
-
config=self._config.websocket_config,
|
|
164
|
-
logger=self.logger,
|
|
165
|
-
metrics=self.metrics,
|
|
166
|
-
parser_id=self.parser_id,
|
|
167
|
-
)
|
|
168
|
-
return self._websocket
|
|
169
|
-
|
|
170
|
-
@property
|
|
171
|
-
def logger(self) -> DriverLogger:
|
|
172
|
-
"""Enterprise logging service with SDK integration."""
|
|
173
|
-
if self._logger is None:
|
|
174
|
-
self._logger = ensure_driver_logger(
|
|
175
|
-
parser_id=self.parser_id,
|
|
176
|
-
parser_name=self.parser_name,
|
|
177
|
-
system_dir=str(self._config.system_dir) if self._config.system_dir else None,
|
|
178
|
-
)
|
|
179
|
-
return self._logger
|
|
180
|
-
|
|
181
|
-
@property
|
|
182
|
-
def metrics(self) -> MetricsService:
|
|
183
|
-
"""Built-in metrics and monitoring."""
|
|
184
|
-
if self._metrics is None:
|
|
185
|
-
self._metrics = MetricsService(
|
|
186
|
-
config=self._config.metrics_config, parser_id=self.parser_id
|
|
187
|
-
)
|
|
188
|
-
return self._metrics
|
|
189
|
-
|
|
190
|
-
# ==========================================
|
|
191
|
-
# CORE PARSING METHOD
|
|
192
|
-
# ==========================================
|
|
193
|
-
|
|
194
|
-
async def parse(self) -> dict:
|
|
195
|
-
"""
|
|
196
|
-
🎯 Main parsing method - OVERRIDE THIS
|
|
197
|
-
|
|
198
|
-
This is where you implement your parsing logic.
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
Dictionary containing parsed data
|
|
202
|
-
|
|
203
|
-
Example:
|
|
204
|
-
async def parse(self):
|
|
205
|
-
# Simple extraction
|
|
206
|
-
headlines = await self.browser.extract(
|
|
207
|
-
"https://news.com",
|
|
208
|
-
".headline"
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
# AI-powered extraction
|
|
212
|
-
products = await self.llm.extract(html, schema={
|
|
213
|
-
"products": [{"name": "string", "price": "number"}]
|
|
214
|
-
})
|
|
215
|
-
|
|
216
|
-
return {"headlines": headlines, "products": products}
|
|
217
|
-
"""
|
|
218
|
-
raise NotImplementedError(
|
|
219
|
-
f"Parser '{self.parser_name}' must implement the parse() method. "
|
|
220
|
-
f"This is where you define your parsing logic."
|
|
221
|
-
)
|
|
222
|
-
|
|
223
|
-
# ==========================================
|
|
224
|
-
# EXECUTION MODES
|
|
225
|
-
# ==========================================
|
|
226
|
-
|
|
227
|
-
async def test(self, **kwargs) -> dict:
|
|
228
|
-
"""
|
|
229
|
-
🧪 Test Mode - Development and debugging
|
|
230
|
-
|
|
231
|
-
Single execution for development and testing.
|
|
232
|
-
|
|
233
|
-
Features:
|
|
234
|
-
- Detailed logging and debugging
|
|
235
|
-
- Error reporting with suggestions
|
|
236
|
-
- Performance metrics
|
|
237
|
-
- Results visualization
|
|
238
|
-
|
|
239
|
-
Args:
|
|
240
|
-
**kwargs: Test configuration options
|
|
241
|
-
|
|
242
|
-
Returns:
|
|
243
|
-
Parsed data with metadata
|
|
244
|
-
|
|
245
|
-
Example:
|
|
246
|
-
result = await parser.test()
|
|
247
|
-
print(result)
|
|
248
|
-
"""
|
|
249
|
-
if self._test_mode is None:
|
|
250
|
-
# Create type-safe test configuration
|
|
251
|
-
test_config = ParserTestConfig(
|
|
252
|
-
verbose=kwargs.get("verbose", False),
|
|
253
|
-
show_browser=kwargs.get("show_browser", False),
|
|
254
|
-
save_screenshots=kwargs.get("save_screenshots", False),
|
|
255
|
-
timeout_seconds=kwargs.get("timeout", 60),
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
self._test_mode = TestMode(parser=self, config=test_config)
|
|
259
|
-
|
|
260
|
-
return await self._test_mode.execute(**kwargs)
|
|
261
|
-
|
|
262
|
-
async def daemon(
|
|
263
|
-
self, server: Optional[str] = None, api_key: Optional[str] = None, **kwargs
|
|
264
|
-
) -> None:
|
|
265
|
-
"""
|
|
266
|
-
🔌 Daemon Mode - Production WebSocket service
|
|
267
|
-
|
|
268
|
-
Connects to UnrealOn server as persistent WebSocket service.
|
|
269
|
-
|
|
270
|
-
Features:
|
|
271
|
-
- Auto-connection with reconnection
|
|
272
|
-
- Command handling and response
|
|
273
|
-
- Health monitoring and reporting
|
|
274
|
-
- Graceful shutdown handling
|
|
275
|
-
- Load balancing support
|
|
276
|
-
|
|
277
|
-
Args:
|
|
278
|
-
server: WebSocket server URL (auto-detected if not provided)
|
|
279
|
-
api_key: Authentication key (auto-detected if not provided)
|
|
280
|
-
**kwargs: Daemon configuration options
|
|
281
|
-
|
|
282
|
-
Example:
|
|
283
|
-
# Auto-configured daemon
|
|
284
|
-
await parser.daemon()
|
|
285
|
-
|
|
286
|
-
# Custom server
|
|
287
|
-
await parser.daemon(
|
|
288
|
-
server="wss://my-server.com",
|
|
289
|
-
api_key="my_key"
|
|
290
|
-
)
|
|
291
|
-
"""
|
|
292
|
-
if self._daemon_mode is None:
|
|
293
|
-
self._daemon_mode = DaemonMode(
|
|
294
|
-
parser=self, config=self._config.daemon_config
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
await self._daemon_mode.start(server=server, api_key=api_key, **kwargs)
|
|
298
|
-
|
|
299
|
-
async def schedule(self, every: str, at: Optional[str] = None, **kwargs) -> None:
|
|
300
|
-
"""
|
|
301
|
-
⏰ Scheduled Mode - Automated recurring execution
|
|
302
|
-
|
|
303
|
-
Human-readable scheduling with enterprise monitoring.
|
|
304
|
-
|
|
305
|
-
Features:
|
|
306
|
-
- Natural language intervals ("30m", "1h", "daily")
|
|
307
|
-
- Smart load balancing with jitter
|
|
308
|
-
- Error recovery and retries
|
|
309
|
-
- Health monitoring and alerting
|
|
310
|
-
- Production-ready reliability
|
|
311
|
-
|
|
312
|
-
Args:
|
|
313
|
-
every: Human-readable interval ("30m", "1h", "daily", etc.)
|
|
314
|
-
at: Specific time for daily/weekly schedules ("09:00")
|
|
315
|
-
**kwargs: Scheduling configuration options
|
|
316
|
-
|
|
317
|
-
Examples:
|
|
318
|
-
# Every 30 minutes
|
|
319
|
-
await parser.schedule(every="30m")
|
|
320
|
-
|
|
321
|
-
# Daily at 9 AM
|
|
322
|
-
await parser.schedule(every="daily", at="09:00")
|
|
323
|
-
|
|
324
|
-
# Every hour with monitoring
|
|
325
|
-
await parser.schedule(
|
|
326
|
-
every="1h",
|
|
327
|
-
monitoring=True,
|
|
328
|
-
error_handling=True
|
|
329
|
-
)
|
|
330
|
-
"""
|
|
331
|
-
if self._scheduled_mode is None:
|
|
332
|
-
self._scheduled_mode = ScheduledMode(
|
|
333
|
-
parser=self, config=self._config.scheduled_config
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
await self._scheduled_mode.start(every=every, at=at, **kwargs)
|
|
337
|
-
|
|
338
|
-
async def interactive(self, **kwargs) -> None:
|
|
339
|
-
"""
|
|
340
|
-
🎮 Interactive Mode - Live development and debugging
|
|
341
|
-
|
|
342
|
-
Interactive shell for live development and testing.
|
|
343
|
-
|
|
344
|
-
Features:
|
|
345
|
-
- Live parser execution
|
|
346
|
-
- Real-time result inspection
|
|
347
|
-
- Dynamic configuration changes
|
|
348
|
-
- Browser debugging tools
|
|
349
|
-
- Performance profiling
|
|
350
|
-
|
|
351
|
-
Args:
|
|
352
|
-
**kwargs: Interactive mode options
|
|
353
|
-
|
|
354
|
-
Example:
|
|
355
|
-
await parser.interactive()
|
|
356
|
-
"""
|
|
357
|
-
if self._interactive_mode is None:
|
|
358
|
-
self._interactive_mode = InteractiveMode(
|
|
359
|
-
parser=self, config=self._config.interactive_config
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
await self._interactive_mode.start(**kwargs)
|
|
363
|
-
|
|
364
|
-
# ==========================================
|
|
365
|
-
# UTILITY METHODS
|
|
366
|
-
# ==========================================
|
|
367
|
-
|
|
368
|
-
def now(self) -> str:
|
|
369
|
-
"""Get current timestamp in ISO format."""
|
|
370
|
-
return datetime.now(timezone.utc).isoformat()
|
|
371
|
-
|
|
372
|
-
def get_system_info(self) -> dict:
|
|
373
|
-
"""Get system information for debugging."""
|
|
374
|
-
import psutil
|
|
375
|
-
import os
|
|
376
|
-
|
|
377
|
-
process = psutil.Process(os.getpid())
|
|
378
|
-
memory_mb = process.memory_info().rss / 1024 / 1024
|
|
379
|
-
|
|
380
|
-
return {
|
|
381
|
-
"parser_id": self.parser_id,
|
|
382
|
-
"parser_name": self.parser_name,
|
|
383
|
-
"python_version": sys.version,
|
|
384
|
-
"platform": sys.platform,
|
|
385
|
-
"working_directory": str(Path.cwd()),
|
|
386
|
-
"memory_usage_mb": round(memory_mb, 2),
|
|
387
|
-
"environment": dict(os.environ),
|
|
388
|
-
"config": self._config.model_dump(),
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
async def health_check(self) -> dict:
|
|
392
|
-
"""Comprehensive health check."""
|
|
393
|
-
health = {
|
|
394
|
-
"status": "healthy",
|
|
395
|
-
"timestamp": self.now(),
|
|
396
|
-
"parser_id": self.parser_id,
|
|
397
|
-
"services": {},
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
# Check each service individually
|
|
401
|
-
service_errors = []
|
|
402
|
-
|
|
403
|
-
# Check browser service
|
|
404
|
-
if self._browser:
|
|
405
|
-
try:
|
|
406
|
-
health["services"]["browser"] = await self._browser.health_check()
|
|
407
|
-
except Exception as e:
|
|
408
|
-
health["services"]["browser"] = {"status": "error", "error": str(e)}
|
|
409
|
-
service_errors.append(f"browser: {e}")
|
|
410
|
-
|
|
411
|
-
# Check LLM service
|
|
412
|
-
if self._llm:
|
|
413
|
-
try:
|
|
414
|
-
health["services"]["llm"] = await self._llm.health_check()
|
|
415
|
-
except Exception as e:
|
|
416
|
-
health["services"]["llm"] = {"status": "error", "error": str(e)}
|
|
417
|
-
service_errors.append(f"llm: {e}")
|
|
418
|
-
|
|
419
|
-
# Check Browser LLM service
|
|
420
|
-
if self._browser_llm:
|
|
421
|
-
try:
|
|
422
|
-
health["services"]["browser_llm"] = await self._browser_llm.health_check()
|
|
423
|
-
except Exception as e:
|
|
424
|
-
health["services"]["browser_llm"] = {"status": "error", "error": str(e)}
|
|
425
|
-
service_errors.append(f"browser_llm: {e}")
|
|
426
|
-
|
|
427
|
-
# Check WebSocket service
|
|
428
|
-
if self._websocket:
|
|
429
|
-
try:
|
|
430
|
-
health["services"]["websocket"] = await self._websocket.health_check()
|
|
431
|
-
except Exception as e:
|
|
432
|
-
health["services"]["websocket"] = {"status": "error", "error": str(e)}
|
|
433
|
-
service_errors.append(f"websocket: {e}")
|
|
434
|
-
|
|
435
|
-
# Check logger service
|
|
436
|
-
if self._logger:
|
|
437
|
-
try:
|
|
438
|
-
health["services"]["logger"] = self._logger.health_check()
|
|
439
|
-
except Exception as e:
|
|
440
|
-
health["services"]["logger"] = {"status": "error", "error": str(e)}
|
|
441
|
-
service_errors.append(f"logger: {e}")
|
|
442
|
-
|
|
443
|
-
# Check metrics service
|
|
444
|
-
if self._metrics:
|
|
445
|
-
try:
|
|
446
|
-
health["services"]["metrics"] = self._metrics.health_check()
|
|
447
|
-
except Exception as e:
|
|
448
|
-
health["services"]["metrics"] = {"status": "error", "error": str(e)}
|
|
449
|
-
service_errors.append(f"metrics: {e}")
|
|
450
|
-
|
|
451
|
-
# Determine overall status
|
|
452
|
-
if service_errors:
|
|
453
|
-
health["status"] = "degraded" # Instead of "unhealthy"
|
|
454
|
-
health["service_errors"] = service_errors
|
|
455
|
-
|
|
456
|
-
# Add system info as expected by tests
|
|
457
|
-
health["system_info"] = {
|
|
458
|
-
"parser_version": "3.0",
|
|
459
|
-
"environment": getattr(self._config, "environment", "development"),
|
|
460
|
-
"active_services": len(health["services"]),
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
return health
|
|
464
|
-
|
|
465
|
-
async def cleanup(self):
|
|
466
|
-
"""Clean up resources gracefully."""
|
|
467
|
-
self.logger.info("Starting parser cleanup...")
|
|
468
|
-
|
|
469
|
-
# Cleanup services (gracefully handle errors)
|
|
470
|
-
cleanup_errors = []
|
|
471
|
-
|
|
472
|
-
if self._browser:
|
|
473
|
-
try:
|
|
474
|
-
await self._browser.cleanup()
|
|
475
|
-
except Exception as e:
|
|
476
|
-
cleanup_errors.append(f"browser: {e}")
|
|
477
|
-
|
|
478
|
-
if self._llm:
|
|
479
|
-
try:
|
|
480
|
-
await self._llm.cleanup()
|
|
481
|
-
except Exception as e:
|
|
482
|
-
cleanup_errors.append(f"llm: {e}")
|
|
483
|
-
|
|
484
|
-
if self._browser_llm:
|
|
485
|
-
try:
|
|
486
|
-
await self._browser_llm.cleanup()
|
|
487
|
-
except Exception as e:
|
|
488
|
-
cleanup_errors.append(f"browser_llm: {e}")
|
|
489
|
-
|
|
490
|
-
if self._websocket:
|
|
491
|
-
try:
|
|
492
|
-
await self._websocket.cleanup()
|
|
493
|
-
except Exception as e:
|
|
494
|
-
cleanup_errors.append(f"websocket: {e}")
|
|
495
|
-
|
|
496
|
-
if self._logger:
|
|
497
|
-
try:
|
|
498
|
-
await self._logger.cleanup()
|
|
499
|
-
except Exception as e:
|
|
500
|
-
cleanup_errors.append(f"logger: {e}")
|
|
501
|
-
|
|
502
|
-
if self._metrics:
|
|
503
|
-
try:
|
|
504
|
-
await self._metrics.cleanup()
|
|
505
|
-
except Exception as e:
|
|
506
|
-
cleanup_errors.append(f"metrics: {e}")
|
|
507
|
-
|
|
508
|
-
# Log cleanup errors but don't raise
|
|
509
|
-
if cleanup_errors:
|
|
510
|
-
self.logger.warning(f"Cleanup errors: {'; '.join(cleanup_errors)}")
|
|
511
|
-
|
|
512
|
-
self.logger.info("Parser cleanup completed")
|
|
513
|
-
|
|
514
|
-
# ==========================================
|
|
515
|
-
# PRIVATE METHODS
|
|
516
|
-
# ==========================================
|
|
517
|
-
|
|
518
|
-
def _generate_parser_id(self) -> str:
|
|
519
|
-
"""Generate unique parser ID."""
|
|
520
|
-
class_name = self.__class__.__name__.lower()
|
|
521
|
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
522
|
-
return f"{class_name}_{timestamp}"
|
|
523
|
-
|
|
524
|
-
def _generate_parser_name(self) -> str:
|
|
525
|
-
"""Generate human-readable parser name."""
|
|
526
|
-
class_name = self.__class__.__name__
|
|
527
|
-
if class_name.endswith("Parser"):
|
|
528
|
-
class_name = class_name[:-6] # Remove "Parser" suffix
|
|
529
|
-
|
|
530
|
-
# Convert CamelCase to Title Case
|
|
531
|
-
import re
|
|
532
|
-
|
|
533
|
-
name = re.sub(r"([A-Z])", r" \1", class_name).strip()
|
|
534
|
-
return name if name else f"UnrealOn Parser {self.parser_id[-8:]}"
|
|
535
|
-
|
|
536
|
-
def __repr__(self) -> str:
|
|
537
|
-
return f"<{self.__class__.__name__}(id='{self.parser_id}', name='{self.parser_name}')>"
|
|
538
|
-
|
|
539
|
-
def __str__(self) -> str:
|
|
540
|
-
return f"{self.parser_name} ({self.parser_id})"
|
|
541
|
-
|
|
542
|
-
# ==========================================
|
|
543
|
-
# CONTEXT MANAGER SUPPORT
|
|
544
|
-
# ==========================================
|
|
545
|
-
|
|
546
|
-
async def __aenter__(self):
|
|
547
|
-
"""Async context manager entry."""
|
|
548
|
-
await self._initialize()
|
|
549
|
-
return self
|
|
550
|
-
|
|
551
|
-
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
552
|
-
"""Async context manager exit."""
|
|
553
|
-
await self.cleanup()
|
|
554
|
-
# Return None/False to let exceptions propagate
|
|
555
|
-
return False
|
|
556
|
-
|
|
557
|
-
async def _initialize(self):
|
|
558
|
-
"""Initialize parser for context manager usage."""
|
|
559
|
-
if not self._is_initialized:
|
|
560
|
-
self.logger.info(f"Initializing parser: {self.parser_name}")
|
|
561
|
-
self._is_initialized = True
|
|
562
|
-
|
|
563
|
-
def _generate_parser_id(self) -> str:
|
|
564
|
-
"""Generate unique parser ID."""
|
|
565
|
-
import time
|
|
566
|
-
import uuid
|
|
567
|
-
|
|
568
|
-
timestamp = int(time.time() * 1000000) # Microseconds for uniqueness
|
|
569
|
-
short_uuid = str(uuid.uuid4())[:8]
|
|
570
|
-
return f"parser_{timestamp}_{short_uuid}"
|
|
571
|
-
|
|
572
|
-
def _generate_parser_name(self) -> str:
|
|
573
|
-
"""Generate parser name."""
|
|
574
|
-
return f"UnrealOn Parser {self.parser_id[-8:]}"
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
# ==========================================
|
|
578
|
-
# CONVENIENCE FUNCTIONS
|
|
579
|
-
# ==========================================
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
async def quick_extract(url: str, selector: str, **kwargs) -> List[str]:
|
|
583
|
-
"""
|
|
584
|
-
🚀 Quick extraction without creating parser class
|
|
585
|
-
|
|
586
|
-
Convenience function for simple one-off extractions.
|
|
587
|
-
|
|
588
|
-
Args:
|
|
589
|
-
url: Target URL
|
|
590
|
-
selector: CSS selector
|
|
591
|
-
**kwargs: Additional options
|
|
592
|
-
|
|
593
|
-
Returns:
|
|
594
|
-
List of extracted text
|
|
595
|
-
|
|
596
|
-
Example:
|
|
597
|
-
headlines = await quick_extract(
|
|
598
|
-
"https://news.com",
|
|
599
|
-
".headline"
|
|
600
|
-
)
|
|
601
|
-
"""
|
|
602
|
-
|
|
603
|
-
class QuickParser(Parser):
|
|
604
|
-
async def parse(self):
|
|
605
|
-
return await self.browser.extract(url, selector, **kwargs)
|
|
606
|
-
|
|
607
|
-
result = await QuickParser().test()
|
|
608
|
-
return result.get("data", [])
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
async def quick_extract_with_ai(url: str, schema: dict, **kwargs) -> dict:
|
|
612
|
-
"""
|
|
613
|
-
🤖 Quick AI extraction without creating parser class
|
|
614
|
-
|
|
615
|
-
Convenience function for AI-powered extractions.
|
|
616
|
-
|
|
617
|
-
Args:
|
|
618
|
-
url: Target URL
|
|
619
|
-
schema: Data schema for AI extraction
|
|
620
|
-
**kwargs: Additional options
|
|
621
|
-
|
|
622
|
-
Returns:
|
|
623
|
-
Structured data extracted by AI
|
|
624
|
-
|
|
625
|
-
Example:
|
|
626
|
-
products = await quick_extract_with_ai(
|
|
627
|
-
"https://shop.com",
|
|
628
|
-
schema={"products": [{"name": "string", "price": "number"}]}
|
|
629
|
-
)
|
|
630
|
-
"""
|
|
631
|
-
|
|
632
|
-
class QuickAIParser(Parser):
|
|
633
|
-
async def parse(self):
|
|
634
|
-
html = await self.browser.get_html(url)
|
|
635
|
-
return await self.llm.extract(html, schema, **kwargs)
|
|
636
|
-
|
|
637
|
-
result = await QuickAIParser().test()
|
|
638
|
-
return result.get("data", {})
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Data Transfer Objects for UnrealOn Driver v3.0
|
|
3
|
-
|
|
4
|
-
Type-safe configuration and data models using Pydantic v2.
|
|
5
|
-
COMPLIANCE: 100% Pydantic v2 compliant.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from .cli import ParserInstanceConfig, create_parser_config
|
|
9
|
-
from .config import LogLevel
|
|
10
|
-
from .execution import (
|
|
11
|
-
ParserTestConfig,
|
|
12
|
-
DaemonModeConfig,
|
|
13
|
-
ScheduledModeConfig,
|
|
14
|
-
InteractiveModeConfig,
|
|
15
|
-
ExecutionResult,
|
|
16
|
-
ErrorInfo,
|
|
17
|
-
PerformanceMetrics,
|
|
18
|
-
ExecutionEnvironment,
|
|
19
|
-
ScheduledModeStatus,
|
|
20
|
-
DaemonCommandResult,
|
|
21
|
-
DaemonStatusResult,
|
|
22
|
-
DaemonHealthResult,
|
|
23
|
-
)
|
|
24
|
-
from .events import (
|
|
25
|
-
DriverEventType,
|
|
26
|
-
DriverEventContext,
|
|
27
|
-
DriverEventMetrics,
|
|
28
|
-
BROWSER_EVENTS,
|
|
29
|
-
PARSER_EVENTS,
|
|
30
|
-
LLM_EVENTS,
|
|
31
|
-
SCHEDULER_EVENTS,
|
|
32
|
-
WEBSOCKET_EVENTS,
|
|
33
|
-
METRICS_EVENTS,
|
|
34
|
-
ERROR_EVENTS,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
__all__ = [
|
|
38
|
-
"ParserInstanceConfig",
|
|
39
|
-
"create_parser_config",
|
|
40
|
-
"LogLevel",
|
|
41
|
-
# Execution models
|
|
42
|
-
"ParserTestConfig",
|
|
43
|
-
"DaemonModeConfig",
|
|
44
|
-
"ScheduledModeConfig",
|
|
45
|
-
"InteractiveModeConfig",
|
|
46
|
-
"ExecutionResult",
|
|
47
|
-
"ErrorInfo",
|
|
48
|
-
"PerformanceMetrics",
|
|
49
|
-
"ExecutionEnvironment",
|
|
50
|
-
# Daemon models
|
|
51
|
-
"ScheduledModeStatus",
|
|
52
|
-
"DaemonCommandResult",
|
|
53
|
-
"DaemonStatusResult",
|
|
54
|
-
"DaemonHealthResult",
|
|
55
|
-
# Event models
|
|
56
|
-
"DriverEventType",
|
|
57
|
-
"DriverEventContext",
|
|
58
|
-
"DriverEventMetrics",
|
|
59
|
-
"BROWSER_EVENTS",
|
|
60
|
-
"PARSER_EVENTS",
|
|
61
|
-
"LLM_EVENTS",
|
|
62
|
-
"SCHEDULER_EVENTS",
|
|
63
|
-
"WEBSOCKET_EVENTS",
|
|
64
|
-
"METRICS_EVENTS",
|
|
65
|
-
"ERROR_EVENTS",
|
|
66
|
-
]
|