unrealon 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.1.dist-info/METADATA +722 -0
  3. unrealon-1.1.1.dist-info/RECORD +82 -0
  4. {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.1.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.9.dist-info → unrealon-1.1.1.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.9.dist-info/METADATA +0 -810
  71. unrealon-1.0.9.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -1,15 +0,0 @@
1
- """
2
- LLM Services for UnrealOn Driver v3.0
3
-
4
- Simple, clean LLM services following KISS principle.
5
- """
6
-
7
- from .llm import LLMService
8
- from .browser_llm_service import BrowserLLMService, BrowserLLMConfig, ExtractionResult
9
-
10
- __all__ = [
11
- "LLMService",
12
- "BrowserLLMService",
13
- "BrowserLLMConfig",
14
- "ExtractionResult"
15
- ]
@@ -1,363 +0,0 @@
1
- """
2
- Browser LLM Service - UnrealOn Driver v3.0
3
-
4
- Simple URL → Browser → HTML → LLM → Response workflow.
5
- Just like the old driver but with v3.0 improvements.
6
- """
7
-
8
- import asyncio
9
- import json
10
- import re
11
- import shutil
12
- from datetime import datetime
13
- from pathlib import Path
14
- from typing import Optional
15
- from pydantic import BaseModel, Field, ConfigDict
16
- from urllib.parse import urlparse
17
-
18
- from unrealon_driver.src.services.browser_service import BrowserService
19
- from unrealon_driver.src.services.llm.llm import LLMService
20
- from unrealon_driver.src.dto.services import DriverBrowserConfig, LLMConfig
21
- from unrealon_driver.src.config.auto_config import AutoConfig
22
- from unrealon_driver.src.logging.driver_logger import DriverLogger
23
- from unrealon_driver.src.services.metrics_service import MetricsService
24
-
25
-
26
- class BrowserLLMConfig(BaseModel):
27
- """Configuration for Browser LLM Service."""
28
-
29
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
30
-
31
- # Browser settings
32
- browser_config: DriverBrowserConfig = Field(
33
- ..., description="Browser configuration"
34
- )
35
-
36
- # LLM settings
37
- llm_config: LLMConfig = Field(..., description="LLM configuration")
38
-
39
- # Processing settings - removed dom_wait_seconds as unnecessary
40
-
41
- # Output settings
42
- save_results: bool = Field(
43
- default=True, description="Save extraction results to files"
44
- )
45
- results_dir: Optional[str] = Field(
46
- default=None, description="Directory for saving results"
47
- )
48
-
49
-
50
- class ExtractionResult(BaseModel):
51
- """Result of browser + LLM extraction operation."""
52
-
53
- model_config = ConfigDict(validate_assignment=True, extra="forbid")
54
-
55
- # Core data
56
- data: dict = Field(..., description="Extracted structured data")
57
- url: str = Field(..., description="Source URL")
58
- extraction_id: str = Field(..., description="Unique extraction identifier")
59
- timestamp: datetime = Field(
60
- default_factory=datetime.utcnow, description="Extraction timestamp"
61
- )
62
-
63
- # Performance metrics
64
- total_duration_seconds: float = Field(
65
- ..., ge=0, description="Total operation duration"
66
- )
67
- browser_duration_seconds: float = Field(
68
- ..., ge=0, description="Browser operation duration"
69
- )
70
- llm_duration_seconds: float = Field(
71
- ..., ge=0, description="LLM processing duration"
72
- )
73
-
74
- # Content metrics
75
- html_size_bytes: int = Field(..., ge=0, description="HTML content size")
76
- success: bool = Field(..., description="Whether extraction was successful")
77
-
78
- # NEW: Additional data for comprehensive saving like html_processor_demo
79
- original_html: str = Field(default="", description="Original HTML content")
80
- cleaned_html: str = Field(default="", description="Cleaned HTML content")
81
-
82
- # File paths (if saved)
83
- result_file_path: Optional[str] = Field(
84
- default=None, description="Path to saved result file"
85
- )
86
- original_html_path: Optional[str] = Field(
87
- default=None, description="Path to saved original HTML"
88
- )
89
- cleaned_html_path: Optional[str] = Field(
90
- default=None, description="Path to saved cleaned HTML"
91
- )
92
- markdown_docs_path: Optional[str] = Field(
93
- default=None, description="Path to saved markdown documentation"
94
- )
95
-
96
-
97
- class BrowserLLMService:
98
- """
99
- 🌐 Browser + LLM Service - Simple Integration
100
-
101
- Simple URL → Browser → HTML → LLM → Data workflow:
102
-
103
- Main methods:
104
- - extract_listing(url) - for search results, catalogs
105
- - extract_details(url) - for product pages, articles
106
-
107
- Example:
108
- service = BrowserLLMService(config)
109
- result = await service.extract_listing("https://amazon.com/s?k=laptop")
110
- result = await service.extract_details("https://amazon.com/dp/B123456")
111
- """
112
-
113
- def __init__(
114
- self,
115
- config=None,
116
- auto_config: AutoConfig = None,
117
- logger: DriverLogger = None,
118
- metrics: MetricsService = None,
119
- ):
120
- """
121
- Initialize Browser + LLM service.
122
-
123
- Args:
124
- config: BrowserLLMConfig (legacy method)
125
- auto_config: AutoConfig with ready browser/llm configs (NEW SIMPLE METHOD!)
126
- logger: Logger instance
127
- metrics: Metrics service
128
- """
129
- self.logger = logger
130
- self.metrics = metrics
131
-
132
- # 🔥 NEW SIMPLE METHOD: Use AutoConfig directly!
133
- if auto_config:
134
- self.config = BrowserLLMConfig(
135
- browser_config=auto_config.browser_config,
136
- llm_config=auto_config.llm_config,
137
- save_results=True,
138
- results_dir=str(auto_config.system_dir / "results"),
139
- )
140
- elif config:
141
- # Legacy method for backward compatibility
142
- self.config = config
143
- else:
144
- raise ValueError("Either config or auto_config must be provided")
145
-
146
- # Initialize component services
147
- self.browser_service = BrowserService(
148
- config=self.config.browser_config,
149
- logger=logger,
150
- metrics=metrics,
151
- )
152
-
153
- self.llm_service = LLMService(config=self.config.llm_config, logger=logger)
154
-
155
- # Setup results directory
156
- if self.config.save_results and self.config.results_dir:
157
- self.results_dir = Path(self.config.results_dir)
158
- self.results_dir.mkdir(parents=True, exist_ok=True)
159
- else:
160
- self.results_dir = None
161
-
162
- if self.logger:
163
- self.logger.info("🌐 BrowserLLMService initialized successfully")
164
-
165
- async def extract_listing(self, url: str) -> ExtractionResult:
166
- """Extract listing data from URL (e.g., search results, category pages)."""
167
- return await self._extract_from_url(url, "listing")
168
-
169
- async def extract_details(self, url: str) -> ExtractionResult:
170
- """Extract detail data from URL (e.g., product page, item details)."""
171
- return await self._extract_from_url(url, "details")
172
-
173
- async def _extract_from_url(self, url: str, page_type: str) -> ExtractionResult:
174
- """
175
- Private method: Extract structured data from URL using Browser → LLM workflow.
176
-
177
- Args:
178
- url: Target URL to extract from
179
- page_type: "listing" or "details" for proper LLM routing
180
-
181
- Returns:
182
- ExtractionResult with data and metadata
183
- """
184
- extraction_id = f"extract_{int(datetime.utcnow().timestamp())}"
185
- start_time = datetime.utcnow()
186
-
187
- if self.logger:
188
- self.logger.info(f"🌐 Extracting {page_type} from: {url}")
189
-
190
- try:
191
- # Step 1: Browser → HTML
192
- browser_start = datetime.utcnow()
193
- html_content = await self.browser_service.get_html(url)
194
- browser_duration = (datetime.utcnow() - browser_start).total_seconds()
195
-
196
- # Step 2: LLM processing
197
- llm_start = datetime.utcnow()
198
- if page_type == "listing":
199
- extracted_data = await self.llm_service.process_listing(html_content)
200
- else:
201
- extracted_data = await self.llm_service.process_details(html_content)
202
- llm_duration = (datetime.utcnow() - llm_start).total_seconds()
203
-
204
- # Step 3: Get cleaned HTML from LLM service's processor
205
- cleaned_html = ""
206
- try:
207
- if page_type == "listing" and self.llm_service.listing_processor:
208
- processor = self.llm_service.listing_processor
209
- elif self.llm_service.details_processor:
210
- processor = self.llm_service.details_processor
211
- else:
212
- processor = None
213
-
214
- if processor and hasattr(processor, "cleaner"):
215
- cleaned_html, _ = processor.cleaner.clean_html(
216
- html_content, preserve_js_data=True, aggressive_cleaning=True
217
- )
218
- except Exception as e:
219
- if self.logger:
220
- self.logger.warning(f"⚠️ Could not get cleaned HTML: {e}")
221
-
222
- cleaned_html = html_content # Fallback to original
223
-
224
- # Step 4: Create result
225
- total_duration = (datetime.utcnow() - start_time).total_seconds()
226
-
227
- result = ExtractionResult(
228
- data=extracted_data,
229
- url=url,
230
- extraction_id=extraction_id,
231
- total_duration_seconds=total_duration,
232
- browser_duration_seconds=browser_duration,
233
- llm_duration_seconds=llm_duration,
234
- html_size_bytes=len(html_content.encode()),
235
- success=True,
236
- # NEW: Additional data
237
- original_html=html_content,
238
- cleaned_html=cleaned_html,
239
- )
240
-
241
- # Step 4: Save results if configured
242
- if self.config.save_results and self.results_dir:
243
- await self._save_extraction_result(result)
244
-
245
- if self.logger:
246
- self.logger.info(
247
- f"✅ {page_type.title()} extraction completed in {total_duration:.2f}s"
248
- )
249
-
250
- return result
251
-
252
- except Exception as e:
253
- if self.logger:
254
- self.logger.error(f"❌ {page_type.title()} extraction failed: {e}")
255
-
256
- # Create failed result
257
- total_duration = (datetime.utcnow() - start_time).total_seconds()
258
- return ExtractionResult(
259
- data={},
260
- url=url,
261
- extraction_id=extraction_id,
262
- total_duration_seconds=total_duration,
263
- browser_duration_seconds=0,
264
- llm_duration_seconds=0,
265
- html_size_bytes=0,
266
- success=False,
267
- original_html="",
268
- cleaned_html="",
269
- )
270
-
271
- async def _save_extraction_result(self, result: ExtractionResult) -> None:
272
- """Save comprehensive extraction results to files (JSON, HTML, MD) like html_processor_demo."""
273
- if not self.results_dir:
274
- return
275
-
276
- # Create listing-specific folder and clear old results
277
- listing_folder = self._create_listing_folder(result.url)
278
-
279
- # Determine page type for filenames
280
- page_type = "listing" if "listing" in result.extraction_id else "details"
281
- base_filename = f"{result.extraction_id}_{page_type}"
282
-
283
- # 1. Save main result as JSON
284
- result_file = listing_folder / f"{base_filename}.json"
285
- with open(result_file, "w", encoding="utf-8") as f:
286
- # Create clean data without huge HTML content for JSON
287
- clean_data = result.model_dump()
288
- # Don't save HTML content in JSON (too large)
289
- clean_data["original_html"] = f"<saved to {base_filename}_original.html>"
290
- clean_data["cleaned_html"] = f"<saved to {base_filename}_cleaned.html>"
291
- json.dump(clean_data, f, ensure_ascii=False, indent=2, default=str)
292
- result.result_file_path = str(result_file)
293
-
294
- # 2. Save original HTML
295
- if result.original_html:
296
- original_html_file = listing_folder / f"{base_filename}_original.html"
297
- with open(original_html_file, "w", encoding="utf-8") as f:
298
- f.write(result.original_html)
299
- result.original_html_path = str(original_html_file)
300
-
301
- # 3. Save cleaned HTML
302
- if result.cleaned_html:
303
- cleaned_html_file = listing_folder / f"{base_filename}_cleaned.html"
304
- with open(cleaned_html_file, "w", encoding="utf-8") as f:
305
- f.write(result.cleaned_html)
306
- result.cleaned_html_path = str(cleaned_html_file)
307
-
308
- # 4. Generate and save markdown documentation (like html_processor_demo)
309
- result_dict = result.data if isinstance(result.data, dict) else {}
310
- self._save_markdown_documentation(result_dict, f"{base_filename}_documentation")
311
-
312
- if self.logger:
313
- self.logger.info(f"💾 Comprehensive results saved to: {listing_folder}")
314
- self.logger.info(f"📊 JSON: {result_file.name}")
315
- self.logger.info(
316
- f"🌐 HTML: {base_filename}_original.html, {base_filename}_cleaned.html"
317
- )
318
- self.logger.info(f"📝 Docs: {markdown_file.name}")
319
-
320
- def _create_listing_folder(self, url: str) -> Path:
321
- """Create folder for listing based on URL and clear if exists."""
322
- # Simple folder name from URL host
323
- host = urlparse(url).netloc.replace("www.", "")
324
- folder_name = re.sub(r"[^\w\-_]", "_", host) or "listing"
325
-
326
- # Create folder path
327
- listing_folder = self.results_dir / folder_name
328
-
329
- # Clear folder if exists (new LLM cycle)
330
- if listing_folder.exists():
331
- if self.logger:
332
- self.logger.info(f"🗑️ Clearing existing folder: {listing_folder}")
333
- shutil.rmtree(listing_folder)
334
-
335
- # Create fresh folder
336
- listing_folder.mkdir(parents=True, exist_ok=True)
337
-
338
- if self.logger:
339
- self.logger.info(f"📁 Created listing folder: {listing_folder}")
340
-
341
- return listing_folder
342
-
343
- def _save_markdown_documentation(self, result_dict: dict, filename: str):
344
- """Save markdown documentation from selectors"""
345
- extraction_result = result_dict.get("extraction_result", {})
346
- selectors = extraction_result.get("selectors", {})
347
- documentation = extraction_result.get("documentation", "")
348
-
349
- filepath = self.results_dir / f"{filename}.md"
350
- with open(filepath, "w", encoding="utf-8") as f:
351
- f.write(documentation)
352
- print(f"Markdown documentation saved to: {filepath}")
353
-
354
- async def cleanup(self):
355
- """Clean up service resources."""
356
- await self.browser_service.cleanup()
357
- await self.llm_service.cleanup()
358
-
359
- if self.logger:
360
- self.logger.info("🌐 BrowserLLMService cleanup completed")
361
-
362
- def __repr__(self) -> str:
363
- return f"<BrowserLLMService(parser_id={self.config.browser_config.parser_id})>"
@@ -1,195 +0,0 @@
1
- """
2
- 🤖 LLM Service - UnrealOn Driver v3.0
3
-
4
- Simple wrapper around UnrealOn LLM for HTML processing.
5
- Just pass HTML and get parsed results.
6
- """
7
-
8
- from typing import Optional
9
- from pydantic import BaseModel, Field, ConfigDict
10
-
11
- from unrealon_llm.src.provider import UnrealOnLLM
12
- from unrealon_sdk.src.enterprise.logging.development import get_development_logger
13
- from unrealon_sdk.src.dto.logging import SDKContext, SDKEventType
14
-
15
- from unrealon_driver.src.dto.services import LLMConfig
16
- from unrealon_driver.src.core.exceptions import create_llm_error
17
-
18
-
19
- class LLMService:
20
- """
21
- Simple LLM service for HTML processing.
22
-
23
- Two main methods:
24
- - process_listing(html) - for catalog/listing pages
25
- - process_details(html) - for product/detail pages
26
-
27
- Example:
28
- llm = LLMService(config)
29
- result = await llm.process_listing(html_content)
30
- result = await llm.process_details(html_content)
31
- """
32
-
33
- def __init__(self, config: LLMConfig, logger=None):
34
- """
35
- Initialize LLM service.
36
-
37
- Args:
38
- config: LLMConfig with API key and settings
39
- logger: Optional logger
40
- """
41
- self.config = config
42
- self.logger = logger
43
-
44
- # ✅ DEVELOPMENT LOGGER INTEGRATION
45
- self.dev_logger = get_development_logger()
46
-
47
- # Initialize processors immediately (no lazy loading bullshit)
48
- if config.api_key:
49
- self.listing_processor = UnrealOnLLM.create_listing_processor(
50
- openrouter_api_key=config.api_key,
51
- default_model=config.model,
52
- daily_cost_limit=1.0, # Default $1 per day
53
- enable_caching=config.enable_caching
54
- )
55
-
56
- self.details_processor = UnrealOnLLM.create_details_processor(
57
- openrouter_api_key=config.api_key,
58
- default_model=config.model,
59
- daily_cost_limit=1.0, # Default $1 per day
60
- enable_caching=config.enable_caching
61
- )
62
- else:
63
- # For tests - create mock processors
64
- self.listing_processor = None
65
- self.details_processor = None
66
-
67
- if self.logger:
68
- self.logger.info(f"🤖 LLM service initialized with {config.provider}")
69
-
70
- # Log initialization with development logger
71
- if self.dev_logger:
72
- self.dev_logger.log_info(
73
- SDKEventType.COMPONENT_CREATED,
74
- "LLM service initialized",
75
- context=SDKContext(
76
- component_name="LLM",
77
- layer_name="UnrealOn_Driver",
78
- metadata={
79
- "provider": config.provider,
80
- "model": config.model,
81
- "cost_tracking": config.enable_cost_tracking
82
- }
83
- )
84
- )
85
-
86
- async def process_listing(self, html: str) -> dict:
87
- """
88
- Process listing/catalog page HTML.
89
-
90
- Args:
91
- html: Raw HTML content
92
-
93
- Returns:
94
- Extracted data as dict
95
- """
96
- try:
97
- if self.logger:
98
- self.logger.info("🔍 Processing listing page")
99
-
100
- if not self.listing_processor:
101
- return {"test_data": "mock_listing_result"}
102
-
103
- result = await self.listing_processor.extract_patterns(html)
104
-
105
- if self.logger:
106
- self.logger.info("✅ Listing processing complete")
107
-
108
- return self._convert_result(result)
109
-
110
- except Exception as e:
111
- if self.logger:
112
- self.logger.error(f"❌ Listing processing failed: {e}")
113
-
114
- # 🔥 FALLBACK: If LLM validation fails, return basic structure
115
- if "Input should be a valid dictionary" in str(e) or "must be a mapping" in str(e):
116
- return {
117
- "extracted_data": "LLM validation failed - Claude returned list instead of dict",
118
- "error": "LLM_VALIDATION_ERROR",
119
- "raw_error": str(e),
120
- "extraction_result": {
121
- "selectors": {},
122
- "documentation": "Extraction failed due to LLM format validation",
123
- "detected_item_type": "validation_error"
124
- }
125
- }
126
-
127
- raise create_llm_error(
128
- f"Listing processing failed: {e}",
129
- provider=self.config.provider,
130
- model=self.config.model,
131
- input_size=len(html)
132
- )
133
-
134
- async def process_details(self, html: str) -> dict:
135
- """
136
- Process detail/product page HTML.
137
-
138
- Args:
139
- html: Raw HTML content
140
-
141
- Returns:
142
- Extracted data as dict
143
- """
144
- try:
145
- if self.logger:
146
- self.logger.info("🔍 Processing details page")
147
-
148
- if not self.details_processor:
149
- return {"test_data": "mock_details_result"}
150
-
151
- result = await self.details_processor.extract_patterns(html)
152
-
153
- if self.logger:
154
- self.logger.info("✅ Details processing complete")
155
-
156
- return self._convert_result(result)
157
-
158
- except Exception as e:
159
- if self.logger:
160
- self.logger.error(f"❌ Details processing failed: {e}")
161
- raise create_llm_error(
162
- f"Details processing failed: {e}",
163
- provider=self.config.provider,
164
- model=self.config.model,
165
- input_size=len(html)
166
- )
167
-
168
- def _convert_result(self, result) -> dict:
169
- """Convert LLM result to simple dict."""
170
- try:
171
- # 🔥 FIX: Use model_dump() like in working example!
172
- if result:
173
- return result.model_dump()
174
- return {"extracted_data": "No extraction result found"}
175
- except Exception as e:
176
- return {"extracted_data": f"Error converting result: {e}"}
177
-
178
- async def cleanup(self):
179
- """Clean up LLM resources."""
180
- try:
181
- if hasattr(self.listing_processor, 'llm_client') and self.listing_processor.llm_client:
182
- await self.listing_processor.llm_client.close()
183
-
184
- if hasattr(self.details_processor, 'llm_client') and self.details_processor.llm_client:
185
- await self.details_processor.llm_client.close()
186
-
187
- if self.logger:
188
- self.logger.info("🤖 LLM service cleanup completed")
189
-
190
- except Exception as e:
191
- if self.logger:
192
- self.logger.error(f"❌ LLM cleanup error: {e}")
193
-
194
- def __repr__(self) -> str:
195
- return f"<LLMService(provider={self.config.provider}, model={self.config.model})>"