unrealon 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unrealon/__init__.py +23 -21
- unrealon-1.1.0.dist-info/METADATA +164 -0
- unrealon-1.1.0.dist-info/RECORD +82 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
- unrealon-1.1.0.dist-info/entry_points.txt +9 -0
- {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
- unrealon_bridge/__init__.py +114 -0
- unrealon_bridge/cli.py +316 -0
- unrealon_bridge/client/__init__.py +93 -0
- unrealon_bridge/client/base.py +78 -0
- unrealon_bridge/client/commands.py +89 -0
- unrealon_bridge/client/connection.py +90 -0
- unrealon_bridge/client/events.py +65 -0
- unrealon_bridge/client/health.py +38 -0
- unrealon_bridge/client/html_parser.py +146 -0
- unrealon_bridge/client/logging.py +139 -0
- unrealon_bridge/client/proxy.py +70 -0
- unrealon_bridge/client/scheduler.py +450 -0
- unrealon_bridge/client/session.py +70 -0
- unrealon_bridge/configs/__init__.py +14 -0
- unrealon_bridge/configs/bridge_config.py +212 -0
- unrealon_bridge/configs/bridge_config.yaml +39 -0
- unrealon_bridge/models/__init__.py +138 -0
- unrealon_bridge/models/base.py +28 -0
- unrealon_bridge/models/command.py +41 -0
- unrealon_bridge/models/events.py +40 -0
- unrealon_bridge/models/html_parser.py +79 -0
- unrealon_bridge/models/logging.py +55 -0
- unrealon_bridge/models/parser.py +63 -0
- unrealon_bridge/models/proxy.py +41 -0
- unrealon_bridge/models/requests.py +95 -0
- unrealon_bridge/models/responses.py +88 -0
- unrealon_bridge/models/scheduler.py +592 -0
- unrealon_bridge/models/session.py +28 -0
- unrealon_bridge/server/__init__.py +91 -0
- unrealon_bridge/server/base.py +171 -0
- unrealon_bridge/server/handlers/__init__.py +23 -0
- unrealon_bridge/server/handlers/command.py +110 -0
- unrealon_bridge/server/handlers/html_parser.py +139 -0
- unrealon_bridge/server/handlers/logging.py +95 -0
- unrealon_bridge/server/handlers/parser.py +95 -0
- unrealon_bridge/server/handlers/proxy.py +75 -0
- unrealon_bridge/server/handlers/scheduler.py +545 -0
- unrealon_bridge/server/handlers/session.py +66 -0
- unrealon_browser/__init__.py +61 -18
- unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
- unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
- unrealon_browser/{src/core → core}/browser_manager.py +2 -2
- unrealon_browser/{src/managers → managers}/captcha.py +1 -1
- unrealon_browser/{src/managers → managers}/cookies.py +1 -1
- unrealon_browser/managers/logger_bridge.py +231 -0
- unrealon_browser/{src/managers → managers}/profile.py +1 -1
- unrealon_driver/__init__.py +73 -19
- unrealon_driver/browser/__init__.py +8 -0
- unrealon_driver/browser/config.py +74 -0
- unrealon_driver/browser/manager.py +416 -0
- unrealon_driver/exceptions.py +28 -0
- unrealon_driver/parser/__init__.py +55 -0
- unrealon_driver/parser/cli_manager.py +141 -0
- unrealon_driver/parser/daemon_manager.py +227 -0
- unrealon_driver/parser/managers/__init__.py +46 -0
- unrealon_driver/parser/managers/browser.py +51 -0
- unrealon_driver/parser/managers/config.py +281 -0
- unrealon_driver/parser/managers/error.py +412 -0
- unrealon_driver/parser/managers/html.py +732 -0
- unrealon_driver/parser/managers/logging.py +609 -0
- unrealon_driver/parser/managers/result.py +321 -0
- unrealon_driver/parser/parser_manager.py +628 -0
- unrealon/sdk_config.py +0 -88
- unrealon-1.0.9.dist-info/METADATA +0 -810
- unrealon-1.0.9.dist-info/RECORD +0 -246
- unrealon_browser/pyproject.toml +0 -182
- unrealon_browser/src/__init__.py +0 -62
- unrealon_browser/src/managers/logger_bridge.py +0 -395
- unrealon_driver/README.md +0 -204
- unrealon_driver/pyproject.toml +0 -187
- unrealon_driver/src/__init__.py +0 -90
- unrealon_driver/src/cli/__init__.py +0 -10
- unrealon_driver/src/cli/main.py +0 -66
- unrealon_driver/src/cli/simple.py +0 -510
- unrealon_driver/src/config/__init__.py +0 -11
- unrealon_driver/src/config/auto_config.py +0 -478
- unrealon_driver/src/core/__init__.py +0 -18
- unrealon_driver/src/core/exceptions.py +0 -289
- unrealon_driver/src/core/parser.py +0 -638
- unrealon_driver/src/dto/__init__.py +0 -66
- unrealon_driver/src/dto/cli.py +0 -119
- unrealon_driver/src/dto/config.py +0 -18
- unrealon_driver/src/dto/events.py +0 -237
- unrealon_driver/src/dto/execution.py +0 -313
- unrealon_driver/src/dto/services.py +0 -311
- unrealon_driver/src/execution/__init__.py +0 -23
- unrealon_driver/src/execution/daemon_mode.py +0 -317
- unrealon_driver/src/execution/interactive_mode.py +0 -88
- unrealon_driver/src/execution/modes.py +0 -45
- unrealon_driver/src/execution/scheduled_mode.py +0 -209
- unrealon_driver/src/execution/test_mode.py +0 -250
- unrealon_driver/src/logging/__init__.py +0 -24
- unrealon_driver/src/logging/driver_logger.py +0 -512
- unrealon_driver/src/services/__init__.py +0 -24
- unrealon_driver/src/services/browser_service.py +0 -726
- unrealon_driver/src/services/llm/__init__.py +0 -15
- unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
- unrealon_driver/src/services/llm/llm.py +0 -195
- unrealon_driver/src/services/logger_service.py +0 -232
- unrealon_driver/src/services/metrics_service.py +0 -185
- unrealon_driver/src/services/scheduler_service.py +0 -489
- unrealon_driver/src/services/websocket_service.py +0 -362
- unrealon_driver/src/utils/__init__.py +0 -16
- unrealon_driver/src/utils/service_factory.py +0 -317
- unrealon_driver/src/utils/time_formatter.py +0 -338
- unrealon_llm/README.md +0 -44
- unrealon_llm/__init__.py +0 -26
- unrealon_llm/pyproject.toml +0 -154
- unrealon_llm/src/__init__.py +0 -228
- unrealon_llm/src/cli/__init__.py +0 -0
- unrealon_llm/src/core/__init__.py +0 -11
- unrealon_llm/src/core/smart_client.py +0 -438
- unrealon_llm/src/dto/__init__.py +0 -155
- unrealon_llm/src/dto/models/__init__.py +0 -0
- unrealon_llm/src/dto/models/config.py +0 -343
- unrealon_llm/src/dto/models/core.py +0 -328
- unrealon_llm/src/dto/models/enums.py +0 -123
- unrealon_llm/src/dto/models/html_analysis.py +0 -345
- unrealon_llm/src/dto/models/statistics.py +0 -473
- unrealon_llm/src/dto/models/translation.py +0 -383
- unrealon_llm/src/dto/models/type_conversion.py +0 -462
- unrealon_llm/src/dto/schemas/__init__.py +0 -0
- unrealon_llm/src/exceptions.py +0 -392
- unrealon_llm/src/llm_config/__init__.py +0 -20
- unrealon_llm/src/llm_config/logging_config.py +0 -178
- unrealon_llm/src/llm_logging/__init__.py +0 -42
- unrealon_llm/src/llm_logging/llm_events.py +0 -107
- unrealon_llm/src/llm_logging/llm_logger.py +0 -466
- unrealon_llm/src/managers/__init__.py +0 -15
- unrealon_llm/src/managers/cache_manager.py +0 -67
- unrealon_llm/src/managers/cost_manager.py +0 -107
- unrealon_llm/src/managers/request_manager.py +0 -298
- unrealon_llm/src/modules/__init__.py +0 -0
- unrealon_llm/src/modules/html_processor/__init__.py +0 -25
- unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
- unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
- unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
- unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
- unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
- unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
- unrealon_llm/src/modules/html_processor/processor.py +0 -102
- unrealon_llm/src/modules/llm/__init__.py +0 -0
- unrealon_llm/src/modules/translator/__init__.py +0 -0
- unrealon_llm/src/provider.py +0 -116
- unrealon_llm/src/utils/__init__.py +0 -95
- unrealon_llm/src/utils/common.py +0 -64
- unrealon_llm/src/utils/data_extractor.py +0 -188
- unrealon_llm/src/utils/html_cleaner.py +0 -767
- unrealon_llm/src/utils/language_detector.py +0 -308
- unrealon_llm/src/utils/models_cache.py +0 -592
- unrealon_llm/src/utils/smart_counter.py +0 -229
- unrealon_llm/src/utils/token_counter.py +0 -189
- unrealon_sdk/README.md +0 -25
- unrealon_sdk/__init__.py +0 -30
- unrealon_sdk/pyproject.toml +0 -231
- unrealon_sdk/src/__init__.py +0 -150
- unrealon_sdk/src/cli/__init__.py +0 -12
- unrealon_sdk/src/cli/commands/__init__.py +0 -22
- unrealon_sdk/src/cli/commands/benchmark.py +0 -42
- unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
- unrealon_sdk/src/cli/commands/health.py +0 -46
- unrealon_sdk/src/cli/commands/integration.py +0 -498
- unrealon_sdk/src/cli/commands/reports.py +0 -43
- unrealon_sdk/src/cli/commands/security.py +0 -36
- unrealon_sdk/src/cli/commands/server.py +0 -483
- unrealon_sdk/src/cli/commands/servers.py +0 -56
- unrealon_sdk/src/cli/commands/tests.py +0 -55
- unrealon_sdk/src/cli/main.py +0 -126
- unrealon_sdk/src/cli/utils/reporter.py +0 -519
- unrealon_sdk/src/clients/openapi.yaml +0 -3347
- unrealon_sdk/src/clients/python_http/__init__.py +0 -3
- unrealon_sdk/src/clients/python_http/api_config.py +0 -228
- unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
- unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
- unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
- unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
- unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
- unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
- unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
- unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
- unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
- unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
- unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
- unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
- unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
- unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
- unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
- unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
- unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
- unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
- unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
- unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
- unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
- unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
- unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
- unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
- unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
- unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
- unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
- unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
- unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
- unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
- unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
- unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
- unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
- unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
- unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
- unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
- unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
- unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
- unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
- unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
- unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
- unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
- unrealon_sdk/src/clients/python_websocket/client.py +0 -490
- unrealon_sdk/src/clients/python_websocket/events.py +0 -732
- unrealon_sdk/src/clients/python_websocket/example.py +0 -136
- unrealon_sdk/src/clients/python_websocket/types.py +0 -871
- unrealon_sdk/src/core/__init__.py +0 -64
- unrealon_sdk/src/core/client.py +0 -556
- unrealon_sdk/src/core/config.py +0 -465
- unrealon_sdk/src/core/exceptions.py +0 -239
- unrealon_sdk/src/core/metadata.py +0 -191
- unrealon_sdk/src/core/models.py +0 -142
- unrealon_sdk/src/core/types.py +0 -68
- unrealon_sdk/src/dto/__init__.py +0 -268
- unrealon_sdk/src/dto/authentication.py +0 -108
- unrealon_sdk/src/dto/cache.py +0 -208
- unrealon_sdk/src/dto/common.py +0 -19
- unrealon_sdk/src/dto/concurrency.py +0 -393
- unrealon_sdk/src/dto/events.py +0 -108
- unrealon_sdk/src/dto/health.py +0 -339
- unrealon_sdk/src/dto/load_balancing.py +0 -336
- unrealon_sdk/src/dto/logging.py +0 -230
- unrealon_sdk/src/dto/performance.py +0 -165
- unrealon_sdk/src/dto/rate_limiting.py +0 -295
- unrealon_sdk/src/dto/resource_pooling.py +0 -128
- unrealon_sdk/src/dto/structured_logging.py +0 -112
- unrealon_sdk/src/dto/task_scheduling.py +0 -121
- unrealon_sdk/src/dto/websocket.py +0 -55
- unrealon_sdk/src/enterprise/__init__.py +0 -59
- unrealon_sdk/src/enterprise/authentication.py +0 -401
- unrealon_sdk/src/enterprise/cache_manager.py +0 -578
- unrealon_sdk/src/enterprise/error_recovery.py +0 -494
- unrealon_sdk/src/enterprise/event_system.py +0 -549
- unrealon_sdk/src/enterprise/health_monitor.py +0 -747
- unrealon_sdk/src/enterprise/load_balancer.py +0 -964
- unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
- unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
- unrealon_sdk/src/enterprise/logging/development.py +0 -744
- unrealon_sdk/src/enterprise/logging/service.py +0 -410
- unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
- unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
- unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
- unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
- unrealon_sdk/src/enterprise/resource_pool.py +0 -763
- unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
- unrealon_sdk/src/internal/__init__.py +0 -10
- unrealon_sdk/src/internal/command_router.py +0 -497
- unrealon_sdk/src/internal/connection_manager.py +0 -397
- unrealon_sdk/src/internal/http_client.py +0 -446
- unrealon_sdk/src/internal/websocket_client.py +0 -420
- unrealon_sdk/src/provider.py +0 -471
- unrealon_sdk/src/utils.py +0 -234
- /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
- /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
- /unrealon_browser/{src/cli → cli}/main.py +0 -0
- /unrealon_browser/{src/core → core}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
- /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
- /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
- /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Universal HTML Processing Model
|
|
3
|
-
|
|
4
|
-
Single simplified Pydantic model for any HTML page extraction with markdown documentation.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from typing import Dict, List, Union
|
|
8
|
-
from pydantic import BaseModel, Field, ConfigDict, field_validator
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class UniversalExtractionSchema(BaseModel):
|
|
12
|
-
"""Universal HTML page extraction schema with markdown documentation"""
|
|
13
|
-
|
|
14
|
-
model_config = ConfigDict(
|
|
15
|
-
validate_assignment=True,
|
|
16
|
-
extra="allow", # Allow extra fields for flexibility
|
|
17
|
-
title="Universal Extraction Schema"
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
# LLM analysis results
|
|
21
|
-
detected_item_type: str = Field(
|
|
22
|
-
...,
|
|
23
|
-
description="Auto-detected type of page (product, listing, article, service, etc.)"
|
|
24
|
-
)
|
|
25
|
-
extraction_strategy: str = Field(
|
|
26
|
-
...,
|
|
27
|
-
description="Brief description of extraction strategy"
|
|
28
|
-
)
|
|
29
|
-
confidence: float = Field(
|
|
30
|
-
...,
|
|
31
|
-
ge=0,
|
|
32
|
-
le=1,
|
|
33
|
-
description="Overall extraction confidence"
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
# Simple CSS selectors organized by field
|
|
37
|
-
selectors: Dict[str, List[str]] = Field(
|
|
38
|
-
...,
|
|
39
|
-
description="CSS selectors organized by field name (title, price, description, items_container, etc.)"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# Comprehensive markdown documentation
|
|
43
|
-
documentation: str = Field(
|
|
44
|
-
...,
|
|
45
|
-
description="Markdown documentation with examples, explanations, and extraction guidance"
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
@field_validator('selectors', mode='before')
|
|
49
|
-
@classmethod
|
|
50
|
-
def convert_strings_to_lists(cls, v):
|
|
51
|
-
"""Convert string selectors to lists automatically"""
|
|
52
|
-
if isinstance(v, dict):
|
|
53
|
-
for key, value in v.items():
|
|
54
|
-
if isinstance(value, str):
|
|
55
|
-
v[key] = [value]
|
|
56
|
-
return v
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
HTML Processor Factory
|
|
3
|
-
|
|
4
|
-
Factory class for creating HTML processors.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from unrealon_llm.src.core import SmartLLMClient
|
|
8
|
-
from unrealon_llm.src.dto import LLMConfig
|
|
9
|
-
|
|
10
|
-
from .listing_processor import ListingProcessor
|
|
11
|
-
from .details_processor import DetailsProcessor
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class UnrealOnLLM:
|
|
15
|
-
"""Factory class for creating UnrealOn LLM components"""
|
|
16
|
-
|
|
17
|
-
@staticmethod
|
|
18
|
-
def create_client(
|
|
19
|
-
openrouter_api_key: str,
|
|
20
|
-
default_model: str = "anthropic/claude-3.5-sonnet",
|
|
21
|
-
daily_cost_limit: float = 5.0,
|
|
22
|
-
enable_caching: bool = True,
|
|
23
|
-
cache_ttl_minutes: int = 30,
|
|
24
|
-
) -> SmartLLMClient:
|
|
25
|
-
"""
|
|
26
|
-
Create LLM client
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
openrouter_api_key: OpenRouter API key
|
|
30
|
-
default_model: Default model to use
|
|
31
|
-
daily_cost_limit: Daily cost limit in USD
|
|
32
|
-
enable_caching: Enable response caching
|
|
33
|
-
cache_ttl_minutes: Cache TTL in minutes
|
|
34
|
-
|
|
35
|
-
Returns:
|
|
36
|
-
Configured SmartLLMClient instance
|
|
37
|
-
"""
|
|
38
|
-
config = LLMConfig(
|
|
39
|
-
openrouter_api_key=openrouter_api_key,
|
|
40
|
-
default_model=default_model,
|
|
41
|
-
daily_cost_limit_usd=daily_cost_limit,
|
|
42
|
-
request_timeout_seconds=60,
|
|
43
|
-
max_retries=3,
|
|
44
|
-
enable_global_cache=enable_caching,
|
|
45
|
-
cache_ttl_hours=max(1, int(cache_ttl_minutes / 60)),
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
return SmartLLMClient(config)
|
|
49
|
-
|
|
50
|
-
@staticmethod
|
|
51
|
-
def create_listing_processor(
|
|
52
|
-
openrouter_api_key: str,
|
|
53
|
-
default_model: str = "anthropic/claude-3.5-sonnet",
|
|
54
|
-
daily_cost_limit: float = 1.0,
|
|
55
|
-
enable_caching: bool = False, # Disable cache for HTML processors
|
|
56
|
-
) -> ListingProcessor:
|
|
57
|
-
"""
|
|
58
|
-
Create listing processor
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
openrouter_api_key: OpenRouter API key
|
|
62
|
-
default_model: Default model to use
|
|
63
|
-
daily_cost_limit: Daily cost limit in USD
|
|
64
|
-
enable_caching: Enable response caching (disabled by default for HTML processing)
|
|
65
|
-
|
|
66
|
-
Returns:
|
|
67
|
-
Configured ListingProcessor instance
|
|
68
|
-
"""
|
|
69
|
-
llm_client = UnrealOnLLM.create_client(
|
|
70
|
-
openrouter_api_key=openrouter_api_key,
|
|
71
|
-
default_model=default_model,
|
|
72
|
-
daily_cost_limit=daily_cost_limit,
|
|
73
|
-
enable_caching=enable_caching,
|
|
74
|
-
)
|
|
75
|
-
return ListingProcessor(llm_client)
|
|
76
|
-
|
|
77
|
-
@staticmethod
|
|
78
|
-
def create_details_processor(
|
|
79
|
-
openrouter_api_key: str,
|
|
80
|
-
default_model: str = "anthropic/claude-3.5-sonnet",
|
|
81
|
-
daily_cost_limit: float = 1.0,
|
|
82
|
-
enable_caching: bool = False, # Disable cache for HTML processors
|
|
83
|
-
) -> DetailsProcessor:
|
|
84
|
-
"""
|
|
85
|
-
Create details processor
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
openrouter_api_key: OpenRouter API key
|
|
89
|
-
default_model: Default model to use
|
|
90
|
-
daily_cost_limit: Daily cost limit in USD
|
|
91
|
-
enable_caching: Enable response caching (disabled by default for HTML processing)
|
|
92
|
-
|
|
93
|
-
Returns:
|
|
94
|
-
Configured DetailsProcessor instance
|
|
95
|
-
"""
|
|
96
|
-
llm_client = UnrealOnLLM.create_client(
|
|
97
|
-
openrouter_api_key=openrouter_api_key,
|
|
98
|
-
default_model=default_model,
|
|
99
|
-
daily_cost_limit=daily_cost_limit,
|
|
100
|
-
enable_caching=enable_caching,
|
|
101
|
-
)
|
|
102
|
-
return DetailsProcessor(llm_client)
|
|
File without changes
|
|
File without changes
|
unrealon_llm/src/provider.py
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
UnrealOn LLM Provider
|
|
3
|
-
|
|
4
|
-
Simple provider module for UnrealOn LLM functionality.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
# Core LLM functionality
|
|
8
|
-
from unrealon_llm.src.core import SmartLLMClient
|
|
9
|
-
|
|
10
|
-
# HTML parsing and analysis
|
|
11
|
-
from unrealon_llm.src.utils.html_cleaner import SmartHTMLCleaner
|
|
12
|
-
from unrealon_llm.src.modules.html_processor import (
|
|
13
|
-
ListingProcessor,
|
|
14
|
-
DetailsProcessor,
|
|
15
|
-
UnrealOnLLM,
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
# Logging and configuration
|
|
19
|
-
from unrealon_llm.src.llm_config import setup_llm_logging, configure_llm_logging
|
|
20
|
-
from unrealon_llm.src.llm_logging import (
|
|
21
|
-
get_llm_logger,
|
|
22
|
-
LLMEventType,
|
|
23
|
-
LLMContext,
|
|
24
|
-
initialize_llm_logger,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# Configuration and DTOs
|
|
28
|
-
from unrealon_llm.src.dto import (
|
|
29
|
-
# Core configuration
|
|
30
|
-
LLMConfig,
|
|
31
|
-
AnalysisConfig,
|
|
32
|
-
TranslationConfig,
|
|
33
|
-
# Enums
|
|
34
|
-
LLMProvider,
|
|
35
|
-
OptimizationLevel,
|
|
36
|
-
CacheStrategy,
|
|
37
|
-
MessageRole,
|
|
38
|
-
PatternType,
|
|
39
|
-
SelectorType,
|
|
40
|
-
LanguageCode,
|
|
41
|
-
DataType,
|
|
42
|
-
SchemaFormat,
|
|
43
|
-
ProcessingStage,
|
|
44
|
-
# Core models
|
|
45
|
-
TokenUsage,
|
|
46
|
-
ChatMessage,
|
|
47
|
-
LLMResponse,
|
|
48
|
-
LanguageDetection,
|
|
49
|
-
CostBreakdown,
|
|
50
|
-
HealthStatus,
|
|
51
|
-
ProcessingMetrics,
|
|
52
|
-
# HTML Analysis models
|
|
53
|
-
DetectedPattern,
|
|
54
|
-
SelectorInfo,
|
|
55
|
-
HTMLAnalysisRequest,
|
|
56
|
-
HTMLAnalysisResult,
|
|
57
|
-
SelectorValidationResult,
|
|
58
|
-
CompleteAnalysisResult,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
# Utilities
|
|
62
|
-
from unrealon_llm.src.utils.data_extractor import SmartDataExtractor
|
|
63
|
-
from unrealon_llm.src.utils.smart_counter import SmartTokenCounter
|
|
64
|
-
from unrealon_llm.src.utils.language_detector import LanguageDetector
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
# Direct exports for convenience
|
|
68
|
-
__all__ = [
|
|
69
|
-
# Factory class
|
|
70
|
-
"UnrealOnLLM",
|
|
71
|
-
# Core classes
|
|
72
|
-
"SmartLLMClient",
|
|
73
|
-
"SmartHTMLCleaner",
|
|
74
|
-
"SmartDataExtractor",
|
|
75
|
-
"SmartTokenCounter",
|
|
76
|
-
"LanguageDetector",
|
|
77
|
-
"ListingProcessor",
|
|
78
|
-
"DetailsProcessor",
|
|
79
|
-
# Configuration classes
|
|
80
|
-
"LLMConfig",
|
|
81
|
-
"AnalysisConfig",
|
|
82
|
-
"TranslationConfig",
|
|
83
|
-
# Enums
|
|
84
|
-
"LLMProvider",
|
|
85
|
-
"OptimizationLevel",
|
|
86
|
-
"CacheStrategy",
|
|
87
|
-
"MessageRole",
|
|
88
|
-
"PatternType",
|
|
89
|
-
"SelectorType",
|
|
90
|
-
"LanguageCode",
|
|
91
|
-
"DataType",
|
|
92
|
-
"SchemaFormat",
|
|
93
|
-
"ProcessingStage",
|
|
94
|
-
# Core models
|
|
95
|
-
"TokenUsage",
|
|
96
|
-
"ChatMessage",
|
|
97
|
-
"LLMResponse",
|
|
98
|
-
"LanguageDetection",
|
|
99
|
-
"CostBreakdown",
|
|
100
|
-
"HealthStatus",
|
|
101
|
-
"ProcessingMetrics",
|
|
102
|
-
# HTML Analysis models
|
|
103
|
-
"DetectedPattern",
|
|
104
|
-
"SelectorInfo",
|
|
105
|
-
"HTMLAnalysisRequest",
|
|
106
|
-
"HTMLAnalysisResult",
|
|
107
|
-
"SelectorValidationResult",
|
|
108
|
-
"CompleteAnalysisResult",
|
|
109
|
-
# Logging
|
|
110
|
-
"setup_llm_logging",
|
|
111
|
-
"configure_llm_logging",
|
|
112
|
-
"get_llm_logger",
|
|
113
|
-
"initialize_llm_logger",
|
|
114
|
-
"LLMEventType",
|
|
115
|
-
"LLMContext",
|
|
116
|
-
]
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
UnrealOn LLM Utilities
|
|
3
|
-
|
|
4
|
-
Utility functions and helpers for UnrealOn LLM platform including
|
|
5
|
-
language detection, token counting, and model caching.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
# Language detection utilities
|
|
9
|
-
from .language_detector import (
|
|
10
|
-
LanguageDetector,
|
|
11
|
-
detect_language,
|
|
12
|
-
detect_multiple_languages,
|
|
13
|
-
is_language,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
# Token counting utilities (legacy)
|
|
17
|
-
from .token_counter import (
|
|
18
|
-
TokenCounter,
|
|
19
|
-
count_tokens,
|
|
20
|
-
count_message_tokens,
|
|
21
|
-
optimize_for_tokens,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
# Smart counting utilities (new approach)
|
|
25
|
-
from .smart_counter import (
|
|
26
|
-
SmartTokenCounter,
|
|
27
|
-
smart_count_tokens,
|
|
28
|
-
smart_count_messages,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
# Models cache utilities
|
|
32
|
-
from .models_cache import (
|
|
33
|
-
ModelInfo,
|
|
34
|
-
ModelsCache,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
# HTML cleaning utilities
|
|
38
|
-
from .html_cleaner import (
|
|
39
|
-
SmartHTMLCleaner,
|
|
40
|
-
clean_html_for_llm,
|
|
41
|
-
extract_js_data_only,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
# Common utilities
|
|
45
|
-
from .common import (
|
|
46
|
-
generate_correlation_id,
|
|
47
|
-
generate_request_id,
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# Data extraction utilities
|
|
51
|
-
from .data_extractor import (
|
|
52
|
-
SmartDataExtractor,
|
|
53
|
-
safe_extract_json,
|
|
54
|
-
extract_llm_response_data,
|
|
55
|
-
create_data_extractor,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
# Exports
|
|
59
|
-
__all__ = [
|
|
60
|
-
# Language Detection
|
|
61
|
-
"LanguageDetector",
|
|
62
|
-
"detect_language",
|
|
63
|
-
"detect_multiple_languages",
|
|
64
|
-
"is_language",
|
|
65
|
-
|
|
66
|
-
# Token Counting (Legacy)
|
|
67
|
-
"TokenCounter",
|
|
68
|
-
"count_tokens",
|
|
69
|
-
"count_message_tokens",
|
|
70
|
-
"optimize_for_tokens",
|
|
71
|
-
|
|
72
|
-
# Smart Counting (New)
|
|
73
|
-
"SmartTokenCounter",
|
|
74
|
-
"smart_count_tokens",
|
|
75
|
-
"smart_count_messages",
|
|
76
|
-
|
|
77
|
-
# Models Cache
|
|
78
|
-
"ModelInfo",
|
|
79
|
-
"ModelsCache",
|
|
80
|
-
|
|
81
|
-
# HTML Cleaning
|
|
82
|
-
"SmartHTMLCleaner",
|
|
83
|
-
"clean_html_for_llm",
|
|
84
|
-
"extract_js_data_only",
|
|
85
|
-
|
|
86
|
-
# Common Utilities
|
|
87
|
-
"generate_correlation_id",
|
|
88
|
-
"generate_request_id",
|
|
89
|
-
|
|
90
|
-
# Data Extraction
|
|
91
|
-
"SmartDataExtractor",
|
|
92
|
-
"safe_extract_json",
|
|
93
|
-
"extract_llm_response_data",
|
|
94
|
-
"create_data_extractor",
|
|
95
|
-
]
|
unrealon_llm/src/utils/common.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Common Utilities
|
|
3
|
-
|
|
4
|
-
General-purpose utility functions for UnrealOn LLM including
|
|
5
|
-
ID generation, validation, and other common helpers.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import uuid
|
|
9
|
-
import secrets
|
|
10
|
-
from typing import Optional
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def generate_correlation_id() -> str:
|
|
14
|
-
"""Generate a unique correlation ID for tracking operations."""
|
|
15
|
-
return f"llm_{uuid.uuid4().hex[:16]}"
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def generate_request_id() -> str:
|
|
19
|
-
"""Generate a unique request ID for API calls."""
|
|
20
|
-
return f"req_{secrets.token_hex(8)}"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def generate_session_id() -> str:
|
|
24
|
-
"""Generate a unique session ID."""
|
|
25
|
-
return f"sess_{uuid.uuid4().hex[:12]}"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def sanitize_model_name(model_name: str) -> str:
|
|
29
|
-
"""Sanitize model name for logging and metrics."""
|
|
30
|
-
return model_name.replace("/", "_").replace(":", "_").replace("-", "_")
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
|
|
34
|
-
"""Truncate text for logging purposes."""
|
|
35
|
-
if len(text) <= max_length:
|
|
36
|
-
return text
|
|
37
|
-
return text[:max_length - len(suffix)] + suffix
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def format_bytes(bytes_count: int) -> str:
|
|
41
|
-
"""Format bytes into human readable format."""
|
|
42
|
-
for unit in ['B', 'KB', 'MB', 'GB']:
|
|
43
|
-
if bytes_count < 1024.0:
|
|
44
|
-
return f"{bytes_count:.1f} {unit}"
|
|
45
|
-
bytes_count /= 1024.0
|
|
46
|
-
return f"{bytes_count:.1f} TB"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def format_duration_ms(duration_ms: float) -> str:
|
|
50
|
-
"""Format duration in milliseconds to human readable format."""
|
|
51
|
-
if duration_ms < 1000:
|
|
52
|
-
return f"{duration_ms:.1f}ms"
|
|
53
|
-
elif duration_ms < 60000:
|
|
54
|
-
return f"{duration_ms / 1000:.1f}s"
|
|
55
|
-
else:
|
|
56
|
-
minutes = int(duration_ms / 60000)
|
|
57
|
-
seconds = (duration_ms % 60000) / 1000
|
|
58
|
-
return f"{minutes}m{seconds:.1f}s"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def safe_get_env(key: str, default: Optional[str] = None) -> Optional[str]:
|
|
62
|
-
"""Safely get environment variable with optional default."""
|
|
63
|
-
import os
|
|
64
|
-
return os.getenv(key, default)
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Data Extractor
|
|
3
|
-
|
|
4
|
-
Simple wrapper around json_extractor lib for extracting JSON from text.
|
|
5
|
-
KISS methodology - just extract JSON, nothing more.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import json
|
|
9
|
-
import logging
|
|
10
|
-
import re
|
|
11
|
-
from typing import Any, Dict, List, Optional, Type, TypeVar, Union
|
|
12
|
-
from pydantic import BaseModel, ValidationError
|
|
13
|
-
|
|
14
|
-
from unrealon_llm.src.exceptions import ResponseParsingError
|
|
15
|
-
|
|
16
|
-
logger = logging.getLogger(__name__)
|
|
17
|
-
|
|
18
|
-
T = TypeVar("T", bound=BaseModel)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _extract_json_smart(content: str) -> Optional[str]:
|
|
22
|
-
"""
|
|
23
|
-
Smart JSON extraction from text - finds valid JSON objects/arrays.
|
|
24
|
-
|
|
25
|
-
Args:
|
|
26
|
-
content: Text content that may contain JSON
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
First valid JSON string found or None
|
|
30
|
-
"""
|
|
31
|
-
# Try to find JSON objects {} or arrays []
|
|
32
|
-
patterns = [
|
|
33
|
-
r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Simple nested objects
|
|
34
|
-
r"\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]", # Simple nested arrays
|
|
35
|
-
]
|
|
36
|
-
|
|
37
|
-
for pattern in patterns:
|
|
38
|
-
matches = re.finditer(pattern, content, re.DOTALL)
|
|
39
|
-
for match in matches:
|
|
40
|
-
json_candidate = match.group()
|
|
41
|
-
try:
|
|
42
|
-
# Test if it's valid JSON
|
|
43
|
-
json.loads(json_candidate)
|
|
44
|
-
return json_candidate
|
|
45
|
-
except json.JSONDecodeError:
|
|
46
|
-
continue
|
|
47
|
-
|
|
48
|
-
# Fallback: find between outermost braces
|
|
49
|
-
first_brace = content.find("{")
|
|
50
|
-
if first_brace == -1:
|
|
51
|
-
return None
|
|
52
|
-
|
|
53
|
-
# Find matching closing brace
|
|
54
|
-
brace_count = 0
|
|
55
|
-
for i, char in enumerate(content[first_brace:], first_brace):
|
|
56
|
-
if char == "{":
|
|
57
|
-
brace_count += 1
|
|
58
|
-
elif char == "}":
|
|
59
|
-
brace_count -= 1
|
|
60
|
-
if brace_count == 0:
|
|
61
|
-
return content[first_brace : i + 1]
|
|
62
|
-
|
|
63
|
-
return None
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def extract_json(
|
|
67
|
-
content: str,
|
|
68
|
-
expected_schema: Optional[Type[T]] = None,
|
|
69
|
-
fallback_value: Optional[Any] = None,
|
|
70
|
-
strict_mode: bool = True,
|
|
71
|
-
) -> Union[T, Dict[str, Any], None]:
|
|
72
|
-
"""
|
|
73
|
-
Extract JSON from text content.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
content: Text content containing JSON
|
|
77
|
-
expected_schema: Pydantic model for validation
|
|
78
|
-
fallback_value: Return value if extraction fails (non-strict mode)
|
|
79
|
-
strict_mode: Raise exception on failure if True
|
|
80
|
-
|
|
81
|
-
Returns:
|
|
82
|
-
Extracted JSON data
|
|
83
|
-
|
|
84
|
-
Raises:
|
|
85
|
-
ResponseParsingError: If extraction fails in strict mode
|
|
86
|
-
"""
|
|
87
|
-
if not content:
|
|
88
|
-
if strict_mode:
|
|
89
|
-
raise ResponseParsingError("Empty content", "json")
|
|
90
|
-
return fallback_value
|
|
91
|
-
|
|
92
|
-
try:
|
|
93
|
-
# Smart JSON extraction
|
|
94
|
-
json_content = _extract_json_smart(content)
|
|
95
|
-
|
|
96
|
-
if json_content is None:
|
|
97
|
-
if strict_mode:
|
|
98
|
-
raise ResponseParsingError("No valid JSON found", "json")
|
|
99
|
-
return fallback_value
|
|
100
|
-
|
|
101
|
-
# Parse the extracted JSON
|
|
102
|
-
extracted_data = json.loads(json_content)
|
|
103
|
-
logger.info(f"Successfully extracted and parsed JSON: {type(extracted_data)}")
|
|
104
|
-
|
|
105
|
-
# Validate with schema if provided
|
|
106
|
-
if expected_schema and issubclass(expected_schema, BaseModel):
|
|
107
|
-
return expected_schema.model_validate(extracted_data)
|
|
108
|
-
|
|
109
|
-
return extracted_data
|
|
110
|
-
|
|
111
|
-
except ValidationError as e:
|
|
112
|
-
if strict_mode:
|
|
113
|
-
raise e
|
|
114
|
-
logger.warning(f"Schema validation failed: {e}")
|
|
115
|
-
return fallback_value
|
|
116
|
-
|
|
117
|
-
except Exception as e:
|
|
118
|
-
if strict_mode:
|
|
119
|
-
raise ResponseParsingError(content[:200], "json")
|
|
120
|
-
logger.warning(f"JSON extraction failed: {e}")
|
|
121
|
-
return fallback_value
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def safe_extract_json(
|
|
125
|
-
content: str,
|
|
126
|
-
expected_schema: Optional[Type[T]] = None,
|
|
127
|
-
fallback_value: Optional[Any] = None,
|
|
128
|
-
) -> Union[T, Dict[str, Any], None]:
|
|
129
|
-
"""
|
|
130
|
-
Safe JSON extraction (non-strict mode).
|
|
131
|
-
|
|
132
|
-
Args:
|
|
133
|
-
content: Text content
|
|
134
|
-
expected_schema: Optional schema
|
|
135
|
-
fallback_value: Fallback value
|
|
136
|
-
|
|
137
|
-
Returns:
|
|
138
|
-
Extracted JSON or fallback
|
|
139
|
-
"""
|
|
140
|
-
return extract_json(content, expected_schema, fallback_value, strict_mode=False)
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def extract_llm_response_data(
|
|
144
|
-
response_content: str,
|
|
145
|
-
expected_schema: Optional[Type[T]] = None,
|
|
146
|
-
required_fields: Optional[List[str]] = None,
|
|
147
|
-
) -> Union[T, Dict[str, Any]]:
|
|
148
|
-
"""
|
|
149
|
-
Extract data from LLM response (strict mode).
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
response_content: LLM response text
|
|
153
|
-
expected_schema: Pydantic model
|
|
154
|
-
required_fields: Required fields to check
|
|
155
|
-
|
|
156
|
-
Returns:
|
|
157
|
-
Extracted and validated data
|
|
158
|
-
|
|
159
|
-
Raises:
|
|
160
|
-
ResponseParsingError: If extraction fails
|
|
161
|
-
"""
|
|
162
|
-
result = extract_json(response_content, expected_schema, strict_mode=True)
|
|
163
|
-
|
|
164
|
-
# Check required fields if no schema
|
|
165
|
-
if not expected_schema and required_fields and isinstance(result, dict):
|
|
166
|
-
missing_fields = [field for field in required_fields if field not in result]
|
|
167
|
-
if missing_fields:
|
|
168
|
-
raise ResponseParsingError(
|
|
169
|
-
f"Missing required fields: {missing_fields}", "json"
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
return result
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
# Legacy compatibility
|
|
176
|
-
class SmartDataExtractor:
|
|
177
|
-
"""Simple wrapper for compatibility."""
|
|
178
|
-
|
|
179
|
-
def __init__(self, strict_mode: bool = True):
|
|
180
|
-
self.strict_mode = strict_mode
|
|
181
|
-
|
|
182
|
-
def extract_json(self, content: str, expected_schema=None, fallback_value=None):
|
|
183
|
-
return extract_json(content, expected_schema, fallback_value, self.strict_mode)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def create_data_extractor(strict_mode: bool = True) -> SmartDataExtractor:
|
|
187
|
-
"""Create data extractor instance."""
|
|
188
|
-
return SmartDataExtractor(strict_mode)
|