unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.1.dist-info/METADATA +722 -0
- unrealon-1.1.1.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
- unrealon-1.1.1.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
LLM Services for UnrealOn Driver v3.0
|
|
3
|
-
|
|
4
|
-
Simple, clean LLM services following KISS principle.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from .llm import LLMService
|
|
8
|
-
from .browser_llm_service import BrowserLLMService, BrowserLLMConfig, ExtractionResult
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"LLMService",
|
|
12
|
-
"BrowserLLMService",
|
|
13
|
-
"BrowserLLMConfig",
|
|
14
|
-
"ExtractionResult"
|
|
15
|
-
]
|
|
@@ -1,363 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Browser LLM Service - UnrealOn Driver v3.0
|
|
3
|
-
|
|
4
|
-
Simple URL → Browser → HTML → LLM → Response workflow.
|
|
5
|
-
Just like the old driver but with v3.0 improvements.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import asyncio
|
|
9
|
-
import json
|
|
10
|
-
import re
|
|
11
|
-
import shutil
|
|
12
|
-
from datetime import datetime
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from typing import Optional
|
|
15
|
-
from pydantic import BaseModel, Field, ConfigDict
|
|
16
|
-
from urllib.parse import urlparse
|
|
17
|
-
|
|
18
|
-
from unrealon_driver.src.services.browser_service import BrowserService
|
|
19
|
-
from unrealon_driver.src.services.llm.llm import LLMService
|
|
20
|
-
from unrealon_driver.src.dto.services import DriverBrowserConfig, LLMConfig
|
|
21
|
-
from unrealon_driver.src.config.auto_config import AutoConfig
|
|
22
|
-
from unrealon_driver.src.logging.driver_logger import DriverLogger
|
|
23
|
-
from unrealon_driver.src.services.metrics_service import MetricsService
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class BrowserLLMConfig(BaseModel):
|
|
27
|
-
"""Configuration for Browser LLM Service."""
|
|
28
|
-
|
|
29
|
-
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
30
|
-
|
|
31
|
-
# Browser settings
|
|
32
|
-
browser_config: DriverBrowserConfig = Field(
|
|
33
|
-
..., description="Browser configuration"
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
# LLM settings
|
|
37
|
-
llm_config: LLMConfig = Field(..., description="LLM configuration")
|
|
38
|
-
|
|
39
|
-
# Processing settings - removed dom_wait_seconds as unnecessary
|
|
40
|
-
|
|
41
|
-
# Output settings
|
|
42
|
-
save_results: bool = Field(
|
|
43
|
-
default=True, description="Save extraction results to files"
|
|
44
|
-
)
|
|
45
|
-
results_dir: Optional[str] = Field(
|
|
46
|
-
default=None, description="Directory for saving results"
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class ExtractionResult(BaseModel):
|
|
51
|
-
"""Result of browser + LLM extraction operation."""
|
|
52
|
-
|
|
53
|
-
model_config = ConfigDict(validate_assignment=True, extra="forbid")
|
|
54
|
-
|
|
55
|
-
# Core data
|
|
56
|
-
data: dict = Field(..., description="Extracted structured data")
|
|
57
|
-
url: str = Field(..., description="Source URL")
|
|
58
|
-
extraction_id: str = Field(..., description="Unique extraction identifier")
|
|
59
|
-
timestamp: datetime = Field(
|
|
60
|
-
default_factory=datetime.utcnow, description="Extraction timestamp"
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# Performance metrics
|
|
64
|
-
total_duration_seconds: float = Field(
|
|
65
|
-
..., ge=0, description="Total operation duration"
|
|
66
|
-
)
|
|
67
|
-
browser_duration_seconds: float = Field(
|
|
68
|
-
..., ge=0, description="Browser operation duration"
|
|
69
|
-
)
|
|
70
|
-
llm_duration_seconds: float = Field(
|
|
71
|
-
..., ge=0, description="LLM processing duration"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
# Content metrics
|
|
75
|
-
html_size_bytes: int = Field(..., ge=0, description="HTML content size")
|
|
76
|
-
success: bool = Field(..., description="Whether extraction was successful")
|
|
77
|
-
|
|
78
|
-
# NEW: Additional data for comprehensive saving like html_processor_demo
|
|
79
|
-
original_html: str = Field(default="", description="Original HTML content")
|
|
80
|
-
cleaned_html: str = Field(default="", description="Cleaned HTML content")
|
|
81
|
-
|
|
82
|
-
# File paths (if saved)
|
|
83
|
-
result_file_path: Optional[str] = Field(
|
|
84
|
-
default=None, description="Path to saved result file"
|
|
85
|
-
)
|
|
86
|
-
original_html_path: Optional[str] = Field(
|
|
87
|
-
default=None, description="Path to saved original HTML"
|
|
88
|
-
)
|
|
89
|
-
cleaned_html_path: Optional[str] = Field(
|
|
90
|
-
default=None, description="Path to saved cleaned HTML"
|
|
91
|
-
)
|
|
92
|
-
markdown_docs_path: Optional[str] = Field(
|
|
93
|
-
default=None, description="Path to saved markdown documentation"
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class BrowserLLMService:
|
|
98
|
-
"""
|
|
99
|
-
🌐 Browser + LLM Service - Simple Integration
|
|
100
|
-
|
|
101
|
-
Simple URL → Browser → HTML → LLM → Data workflow:
|
|
102
|
-
|
|
103
|
-
Main methods:
|
|
104
|
-
- extract_listing(url) - for search results, catalogs
|
|
105
|
-
- extract_details(url) - for product pages, articles
|
|
106
|
-
|
|
107
|
-
Example:
|
|
108
|
-
service = BrowserLLMService(config)
|
|
109
|
-
result = await service.extract_listing("https://amazon.com/s?k=laptop")
|
|
110
|
-
result = await service.extract_details("https://amazon.com/dp/B123456")
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
def __init__(
|
|
114
|
-
self,
|
|
115
|
-
config=None,
|
|
116
|
-
auto_config: AutoConfig = None,
|
|
117
|
-
logger: DriverLogger = None,
|
|
118
|
-
metrics: MetricsService = None,
|
|
119
|
-
):
|
|
120
|
-
"""
|
|
121
|
-
Initialize Browser + LLM service.
|
|
122
|
-
|
|
123
|
-
Args:
|
|
124
|
-
config: BrowserLLMConfig (legacy method)
|
|
125
|
-
auto_config: AutoConfig with ready browser/llm configs (NEW SIMPLE METHOD!)
|
|
126
|
-
logger: Logger instance
|
|
127
|
-
metrics: Metrics service
|
|
128
|
-
"""
|
|
129
|
-
self.logger = logger
|
|
130
|
-
self.metrics = metrics
|
|
131
|
-
|
|
132
|
-
# 🔥 NEW SIMPLE METHOD: Use AutoConfig directly!
|
|
133
|
-
if auto_config:
|
|
134
|
-
self.config = BrowserLLMConfig(
|
|
135
|
-
browser_config=auto_config.browser_config,
|
|
136
|
-
llm_config=auto_config.llm_config,
|
|
137
|
-
save_results=True,
|
|
138
|
-
results_dir=str(auto_config.system_dir / "results"),
|
|
139
|
-
)
|
|
140
|
-
elif config:
|
|
141
|
-
# Legacy method for backward compatibility
|
|
142
|
-
self.config = config
|
|
143
|
-
else:
|
|
144
|
-
raise ValueError("Either config or auto_config must be provided")
|
|
145
|
-
|
|
146
|
-
# Initialize component services
|
|
147
|
-
self.browser_service = BrowserService(
|
|
148
|
-
config=self.config.browser_config,
|
|
149
|
-
logger=logger,
|
|
150
|
-
metrics=metrics,
|
|
151
|
-
)
|
|
152
|
-
|
|
153
|
-
self.llm_service = LLMService(config=self.config.llm_config, logger=logger)
|
|
154
|
-
|
|
155
|
-
# Setup results directory
|
|
156
|
-
if self.config.save_results and self.config.results_dir:
|
|
157
|
-
self.results_dir = Path(self.config.results_dir)
|
|
158
|
-
self.results_dir.mkdir(parents=True, exist_ok=True)
|
|
159
|
-
else:
|
|
160
|
-
self.results_dir = None
|
|
161
|
-
|
|
162
|
-
if self.logger:
|
|
163
|
-
self.logger.info("🌐 BrowserLLMService initialized successfully")
|
|
164
|
-
|
|
165
|
-
async def extract_listing(self, url: str) -> ExtractionResult:
|
|
166
|
-
"""Extract listing data from URL (e.g., search results, category pages)."""
|
|
167
|
-
return await self._extract_from_url(url, "listing")
|
|
168
|
-
|
|
169
|
-
async def extract_details(self, url: str) -> ExtractionResult:
|
|
170
|
-
"""Extract detail data from URL (e.g., product page, item details)."""
|
|
171
|
-
return await self._extract_from_url(url, "details")
|
|
172
|
-
|
|
173
|
-
async def _extract_from_url(self, url: str, page_type: str) -> ExtractionResult:
|
|
174
|
-
"""
|
|
175
|
-
Private method: Extract structured data from URL using Browser → LLM workflow.
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
url: Target URL to extract from
|
|
179
|
-
page_type: "listing" or "details" for proper LLM routing
|
|
180
|
-
|
|
181
|
-
Returns:
|
|
182
|
-
ExtractionResult with data and metadata
|
|
183
|
-
"""
|
|
184
|
-
extraction_id = f"extract_{int(datetime.utcnow().timestamp())}"
|
|
185
|
-
start_time = datetime.utcnow()
|
|
186
|
-
|
|
187
|
-
if self.logger:
|
|
188
|
-
self.logger.info(f"🌐 Extracting {page_type} from: {url}")
|
|
189
|
-
|
|
190
|
-
try:
|
|
191
|
-
# Step 1: Browser → HTML
|
|
192
|
-
browser_start = datetime.utcnow()
|
|
193
|
-
html_content = await self.browser_service.get_html(url)
|
|
194
|
-
browser_duration = (datetime.utcnow() - browser_start).total_seconds()
|
|
195
|
-
|
|
196
|
-
# Step 2: LLM processing
|
|
197
|
-
llm_start = datetime.utcnow()
|
|
198
|
-
if page_type == "listing":
|
|
199
|
-
extracted_data = await self.llm_service.process_listing(html_content)
|
|
200
|
-
else:
|
|
201
|
-
extracted_data = await self.llm_service.process_details(html_content)
|
|
202
|
-
llm_duration = (datetime.utcnow() - llm_start).total_seconds()
|
|
203
|
-
|
|
204
|
-
# Step 3: Get cleaned HTML from LLM service's processor
|
|
205
|
-
cleaned_html = ""
|
|
206
|
-
try:
|
|
207
|
-
if page_type == "listing" and self.llm_service.listing_processor:
|
|
208
|
-
processor = self.llm_service.listing_processor
|
|
209
|
-
elif self.llm_service.details_processor:
|
|
210
|
-
processor = self.llm_service.details_processor
|
|
211
|
-
else:
|
|
212
|
-
processor = None
|
|
213
|
-
|
|
214
|
-
if processor and hasattr(processor, "cleaner"):
|
|
215
|
-
cleaned_html, _ = processor.cleaner.clean_html(
|
|
216
|
-
html_content, preserve_js_data=True, aggressive_cleaning=True
|
|
217
|
-
)
|
|
218
|
-
except Exception as e:
|
|
219
|
-
if self.logger:
|
|
220
|
-
self.logger.warning(f"⚠️ Could not get cleaned HTML: {e}")
|
|
221
|
-
|
|
222
|
-
cleaned_html = html_content # Fallback to original
|
|
223
|
-
|
|
224
|
-
# Step 4: Create result
|
|
225
|
-
total_duration = (datetime.utcnow() - start_time).total_seconds()
|
|
226
|
-
|
|
227
|
-
result = ExtractionResult(
|
|
228
|
-
data=extracted_data,
|
|
229
|
-
url=url,
|
|
230
|
-
extraction_id=extraction_id,
|
|
231
|
-
total_duration_seconds=total_duration,
|
|
232
|
-
browser_duration_seconds=browser_duration,
|
|
233
|
-
llm_duration_seconds=llm_duration,
|
|
234
|
-
html_size_bytes=len(html_content.encode()),
|
|
235
|
-
success=True,
|
|
236
|
-
# NEW: Additional data
|
|
237
|
-
original_html=html_content,
|
|
238
|
-
cleaned_html=cleaned_html,
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
# Step 4: Save results if configured
|
|
242
|
-
if self.config.save_results and self.results_dir:
|
|
243
|
-
await self._save_extraction_result(result)
|
|
244
|
-
|
|
245
|
-
if self.logger:
|
|
246
|
-
self.logger.info(
|
|
247
|
-
f"✅ {page_type.title()} extraction completed in {total_duration:.2f}s"
|
|
248
|
-
)
|
|
249
|
-
|
|
250
|
-
return result
|
|
251
|
-
|
|
252
|
-
except Exception as e:
|
|
253
|
-
if self.logger:
|
|
254
|
-
self.logger.error(f"❌ {page_type.title()} extraction failed: {e}")
|
|
255
|
-
|
|
256
|
-
# Create failed result
|
|
257
|
-
total_duration = (datetime.utcnow() - start_time).total_seconds()
|
|
258
|
-
return ExtractionResult(
|
|
259
|
-
data={},
|
|
260
|
-
url=url,
|
|
261
|
-
extraction_id=extraction_id,
|
|
262
|
-
total_duration_seconds=total_duration,
|
|
263
|
-
browser_duration_seconds=0,
|
|
264
|
-
llm_duration_seconds=0,
|
|
265
|
-
html_size_bytes=0,
|
|
266
|
-
success=False,
|
|
267
|
-
original_html="",
|
|
268
|
-
cleaned_html="",
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
async def _save_extraction_result(self, result: ExtractionResult) -> None:
|
|
272
|
-
"""Save comprehensive extraction results to files (JSON, HTML, MD) like html_processor_demo."""
|
|
273
|
-
if not self.results_dir:
|
|
274
|
-
return
|
|
275
|
-
|
|
276
|
-
# Create listing-specific folder and clear old results
|
|
277
|
-
listing_folder = self._create_listing_folder(result.url)
|
|
278
|
-
|
|
279
|
-
# Determine page type for filenames
|
|
280
|
-
page_type = "listing" if "listing" in result.extraction_id else "details"
|
|
281
|
-
base_filename = f"{result.extraction_id}_{page_type}"
|
|
282
|
-
|
|
283
|
-
# 1. Save main result as JSON
|
|
284
|
-
result_file = listing_folder / f"{base_filename}.json"
|
|
285
|
-
with open(result_file, "w", encoding="utf-8") as f:
|
|
286
|
-
# Create clean data without huge HTML content for JSON
|
|
287
|
-
clean_data = result.model_dump()
|
|
288
|
-
# Don't save HTML content in JSON (too large)
|
|
289
|
-
clean_data["original_html"] = f"<saved to {base_filename}_original.html>"
|
|
290
|
-
clean_data["cleaned_html"] = f"<saved to {base_filename}_cleaned.html>"
|
|
291
|
-
json.dump(clean_data, f, ensure_ascii=False, indent=2, default=str)
|
|
292
|
-
result.result_file_path = str(result_file)
|
|
293
|
-
|
|
294
|
-
# 2. Save original HTML
|
|
295
|
-
if result.original_html:
|
|
296
|
-
original_html_file = listing_folder / f"{base_filename}_original.html"
|
|
297
|
-
with open(original_html_file, "w", encoding="utf-8") as f:
|
|
298
|
-
f.write(result.original_html)
|
|
299
|
-
result.original_html_path = str(original_html_file)
|
|
300
|
-
|
|
301
|
-
# 3. Save cleaned HTML
|
|
302
|
-
if result.cleaned_html:
|
|
303
|
-
cleaned_html_file = listing_folder / f"{base_filename}_cleaned.html"
|
|
304
|
-
with open(cleaned_html_file, "w", encoding="utf-8") as f:
|
|
305
|
-
f.write(result.cleaned_html)
|
|
306
|
-
result.cleaned_html_path = str(cleaned_html_file)
|
|
307
|
-
|
|
308
|
-
# 4. Generate and save markdown documentation (like html_processor_demo)
|
|
309
|
-
result_dict = result.data if isinstance(result.data, dict) else {}
|
|
310
|
-
self._save_markdown_documentation(result_dict, f"{base_filename}_documentation")
|
|
311
|
-
|
|
312
|
-
if self.logger:
|
|
313
|
-
self.logger.info(f"💾 Comprehensive results saved to: {listing_folder}")
|
|
314
|
-
self.logger.info(f"📊 JSON: {result_file.name}")
|
|
315
|
-
self.logger.info(
|
|
316
|
-
f"🌐 HTML: {base_filename}_original.html, {base_filename}_cleaned.html"
|
|
317
|
-
)
|
|
318
|
-
self.logger.info(f"📝 Docs: {markdown_file.name}")
|
|
319
|
-
|
|
320
|
-
def _create_listing_folder(self, url: str) -> Path:
|
|
321
|
-
"""Create folder for listing based on URL and clear if exists."""
|
|
322
|
-
# Simple folder name from URL host
|
|
323
|
-
host = urlparse(url).netloc.replace("www.", "")
|
|
324
|
-
folder_name = re.sub(r"[^\w\-_]", "_", host) or "listing"
|
|
325
|
-
|
|
326
|
-
# Create folder path
|
|
327
|
-
listing_folder = self.results_dir / folder_name
|
|
328
|
-
|
|
329
|
-
# Clear folder if exists (new LLM cycle)
|
|
330
|
-
if listing_folder.exists():
|
|
331
|
-
if self.logger:
|
|
332
|
-
self.logger.info(f"🗑️ Clearing existing folder: {listing_folder}")
|
|
333
|
-
shutil.rmtree(listing_folder)
|
|
334
|
-
|
|
335
|
-
# Create fresh folder
|
|
336
|
-
listing_folder.mkdir(parents=True, exist_ok=True)
|
|
337
|
-
|
|
338
|
-
if self.logger:
|
|
339
|
-
self.logger.info(f"📁 Created listing folder: {listing_folder}")
|
|
340
|
-
|
|
341
|
-
return listing_folder
|
|
342
|
-
|
|
343
|
-
def _save_markdown_documentation(self, result_dict: dict, filename: str):
|
|
344
|
-
"""Save markdown documentation from selectors"""
|
|
345
|
-
extraction_result = result_dict.get("extraction_result", {})
|
|
346
|
-
selectors = extraction_result.get("selectors", {})
|
|
347
|
-
documentation = extraction_result.get("documentation", "")
|
|
348
|
-
|
|
349
|
-
filepath = self.results_dir / f"{filename}.md"
|
|
350
|
-
with open(filepath, "w", encoding="utf-8") as f:
|
|
351
|
-
f.write(documentation)
|
|
352
|
-
print(f"Markdown documentation saved to: {filepath}")
|
|
353
|
-
|
|
354
|
-
async def cleanup(self):
|
|
355
|
-
"""Clean up service resources."""
|
|
356
|
-
await self.browser_service.cleanup()
|
|
357
|
-
await self.llm_service.cleanup()
|
|
358
|
-
|
|
359
|
-
if self.logger:
|
|
360
|
-
self.logger.info("🌐 BrowserLLMService cleanup completed")
|
|
361
|
-
|
|
362
|
-
def __repr__(self) -> str:
|
|
363
|
-
return f"<BrowserLLMService(parser_id={self.config.browser_config.parser_id})>"
|
|
@@ -1,195 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
🤖 LLM Service - UnrealOn Driver v3.0
|
|
3
|
-
|
|
4
|
-
Simple wrapper around UnrealOn LLM for HTML processing.
|
|
5
|
-
Just pass HTML and get parsed results.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from typing import Optional
|
|
9
|
-
from pydantic import BaseModel, Field, ConfigDict
|
|
10
|
-
|
|
11
|
-
from unrealon_llm.src.provider import UnrealOnLLM
|
|
12
|
-
from unrealon_sdk.src.enterprise.logging.development import get_development_logger
|
|
13
|
-
from unrealon_sdk.src.dto.logging import SDKContext, SDKEventType
|
|
14
|
-
|
|
15
|
-
from unrealon_driver.src.dto.services import LLMConfig
|
|
16
|
-
from unrealon_driver.src.core.exceptions import create_llm_error
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class LLMService:
|
|
20
|
-
"""
|
|
21
|
-
Simple LLM service for HTML processing.
|
|
22
|
-
|
|
23
|
-
Two main methods:
|
|
24
|
-
- process_listing(html) - for catalog/listing pages
|
|
25
|
-
- process_details(html) - for product/detail pages
|
|
26
|
-
|
|
27
|
-
Example:
|
|
28
|
-
llm = LLMService(config)
|
|
29
|
-
result = await llm.process_listing(html_content)
|
|
30
|
-
result = await llm.process_details(html_content)
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, config: LLMConfig, logger=None):
|
|
34
|
-
"""
|
|
35
|
-
Initialize LLM service.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
config: LLMConfig with API key and settings
|
|
39
|
-
logger: Optional logger
|
|
40
|
-
"""
|
|
41
|
-
self.config = config
|
|
42
|
-
self.logger = logger
|
|
43
|
-
|
|
44
|
-
# ✅ DEVELOPMENT LOGGER INTEGRATION
|
|
45
|
-
self.dev_logger = get_development_logger()
|
|
46
|
-
|
|
47
|
-
# Initialize processors immediately (no lazy loading bullshit)
|
|
48
|
-
if config.api_key:
|
|
49
|
-
self.listing_processor = UnrealOnLLM.create_listing_processor(
|
|
50
|
-
openrouter_api_key=config.api_key,
|
|
51
|
-
default_model=config.model,
|
|
52
|
-
daily_cost_limit=1.0, # Default $1 per day
|
|
53
|
-
enable_caching=config.enable_caching
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
self.details_processor = UnrealOnLLM.create_details_processor(
|
|
57
|
-
openrouter_api_key=config.api_key,
|
|
58
|
-
default_model=config.model,
|
|
59
|
-
daily_cost_limit=1.0, # Default $1 per day
|
|
60
|
-
enable_caching=config.enable_caching
|
|
61
|
-
)
|
|
62
|
-
else:
|
|
63
|
-
# For tests - create mock processors
|
|
64
|
-
self.listing_processor = None
|
|
65
|
-
self.details_processor = None
|
|
66
|
-
|
|
67
|
-
if self.logger:
|
|
68
|
-
self.logger.info(f"🤖 LLM service initialized with {config.provider}")
|
|
69
|
-
|
|
70
|
-
# Log initialization with development logger
|
|
71
|
-
if self.dev_logger:
|
|
72
|
-
self.dev_logger.log_info(
|
|
73
|
-
SDKEventType.COMPONENT_CREATED,
|
|
74
|
-
"LLM service initialized",
|
|
75
|
-
context=SDKContext(
|
|
76
|
-
component_name="LLM",
|
|
77
|
-
layer_name="UnrealOn_Driver",
|
|
78
|
-
metadata={
|
|
79
|
-
"provider": config.provider,
|
|
80
|
-
"model": config.model,
|
|
81
|
-
"cost_tracking": config.enable_cost_tracking
|
|
82
|
-
}
|
|
83
|
-
)
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
async def process_listing(self, html: str) -> dict:
|
|
87
|
-
"""
|
|
88
|
-
Process listing/catalog page HTML.
|
|
89
|
-
|
|
90
|
-
Args:
|
|
91
|
-
html: Raw HTML content
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
Extracted data as dict
|
|
95
|
-
"""
|
|
96
|
-
try:
|
|
97
|
-
if self.logger:
|
|
98
|
-
self.logger.info("🔍 Processing listing page")
|
|
99
|
-
|
|
100
|
-
if not self.listing_processor:
|
|
101
|
-
return {"test_data": "mock_listing_result"}
|
|
102
|
-
|
|
103
|
-
result = await self.listing_processor.extract_patterns(html)
|
|
104
|
-
|
|
105
|
-
if self.logger:
|
|
106
|
-
self.logger.info("✅ Listing processing complete")
|
|
107
|
-
|
|
108
|
-
return self._convert_result(result)
|
|
109
|
-
|
|
110
|
-
except Exception as e:
|
|
111
|
-
if self.logger:
|
|
112
|
-
self.logger.error(f"❌ Listing processing failed: {e}")
|
|
113
|
-
|
|
114
|
-
# 🔥 FALLBACK: If LLM validation fails, return basic structure
|
|
115
|
-
if "Input should be a valid dictionary" in str(e) or "must be a mapping" in str(e):
|
|
116
|
-
return {
|
|
117
|
-
"extracted_data": "LLM validation failed - Claude returned list instead of dict",
|
|
118
|
-
"error": "LLM_VALIDATION_ERROR",
|
|
119
|
-
"raw_error": str(e),
|
|
120
|
-
"extraction_result": {
|
|
121
|
-
"selectors": {},
|
|
122
|
-
"documentation": "Extraction failed due to LLM format validation",
|
|
123
|
-
"detected_item_type": "validation_error"
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
raise create_llm_error(
|
|
128
|
-
f"Listing processing failed: {e}",
|
|
129
|
-
provider=self.config.provider,
|
|
130
|
-
model=self.config.model,
|
|
131
|
-
input_size=len(html)
|
|
132
|
-
)
|
|
133
|
-
|
|
134
|
-
async def process_details(self, html: str) -> dict:
|
|
135
|
-
"""
|
|
136
|
-
Process detail/product page HTML.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
html: Raw HTML content
|
|
140
|
-
|
|
141
|
-
Returns:
|
|
142
|
-
Extracted data as dict
|
|
143
|
-
"""
|
|
144
|
-
try:
|
|
145
|
-
if self.logger:
|
|
146
|
-
self.logger.info("🔍 Processing details page")
|
|
147
|
-
|
|
148
|
-
if not self.details_processor:
|
|
149
|
-
return {"test_data": "mock_details_result"}
|
|
150
|
-
|
|
151
|
-
result = await self.details_processor.extract_patterns(html)
|
|
152
|
-
|
|
153
|
-
if self.logger:
|
|
154
|
-
self.logger.info("✅ Details processing complete")
|
|
155
|
-
|
|
156
|
-
return self._convert_result(result)
|
|
157
|
-
|
|
158
|
-
except Exception as e:
|
|
159
|
-
if self.logger:
|
|
160
|
-
self.logger.error(f"❌ Details processing failed: {e}")
|
|
161
|
-
raise create_llm_error(
|
|
162
|
-
f"Details processing failed: {e}",
|
|
163
|
-
provider=self.config.provider,
|
|
164
|
-
model=self.config.model,
|
|
165
|
-
input_size=len(html)
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
def _convert_result(self, result) -> dict:
|
|
169
|
-
"""Convert LLM result to simple dict."""
|
|
170
|
-
try:
|
|
171
|
-
# 🔥 FIX: Use model_dump() like in working example!
|
|
172
|
-
if result:
|
|
173
|
-
return result.model_dump()
|
|
174
|
-
return {"extracted_data": "No extraction result found"}
|
|
175
|
-
except Exception as e:
|
|
176
|
-
return {"extracted_data": f"Error converting result: {e}"}
|
|
177
|
-
|
|
178
|
-
async def cleanup(self):
|
|
179
|
-
"""Clean up LLM resources."""
|
|
180
|
-
try:
|
|
181
|
-
if hasattr(self.listing_processor, 'llm_client') and self.listing_processor.llm_client:
|
|
182
|
-
await self.listing_processor.llm_client.close()
|
|
183
|
-
|
|
184
|
-
if hasattr(self.details_processor, 'llm_client') and self.details_processor.llm_client:
|
|
185
|
-
await self.details_processor.llm_client.close()
|
|
186
|
-
|
|
187
|
-
if self.logger:
|
|
188
|
-
self.logger.info("🤖 LLM service cleanup completed")
|
|
189
|
-
|
|
190
|
-
except Exception as e:
|
|
191
|
-
if self.logger:
|
|
192
|
-
self.logger.error(f"❌ LLM cleanup error: {e}")
|
|
193
|
-
|
|
194
|
-
def __repr__(self) -> str:
|
|
195
|
-
return f"<LLMService(provider={self.config.provider}, model={self.config.model})>"
|