unrealon 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.9.dist-info/METADATA +0 -810
  71. unrealon-1.0.9.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -1,56 +0,0 @@
1
- """
2
- Universal HTML Processing Model
3
-
4
- Single simplified Pydantic model for any HTML page extraction with markdown documentation.
5
- """
6
-
7
- from typing import Dict, List, Union
8
- from pydantic import BaseModel, Field, ConfigDict, field_validator
9
-
10
-
11
- class UniversalExtractionSchema(BaseModel):
12
- """Universal HTML page extraction schema with markdown documentation"""
13
-
14
- model_config = ConfigDict(
15
- validate_assignment=True,
16
- extra="allow", # Allow extra fields for flexibility
17
- title="Universal Extraction Schema"
18
- )
19
-
20
- # LLM analysis results
21
- detected_item_type: str = Field(
22
- ...,
23
- description="Auto-detected type of page (product, listing, article, service, etc.)"
24
- )
25
- extraction_strategy: str = Field(
26
- ...,
27
- description="Brief description of extraction strategy"
28
- )
29
- confidence: float = Field(
30
- ...,
31
- ge=0,
32
- le=1,
33
- description="Overall extraction confidence"
34
- )
35
-
36
- # Simple CSS selectors organized by field
37
- selectors: Dict[str, List[str]] = Field(
38
- ...,
39
- description="CSS selectors organized by field name (title, price, description, items_container, etc.)"
40
- )
41
-
42
- # Comprehensive markdown documentation
43
- documentation: str = Field(
44
- ...,
45
- description="Markdown documentation with examples, explanations, and extraction guidance"
46
- )
47
-
48
- @field_validator('selectors', mode='before')
49
- @classmethod
50
- def convert_strings_to_lists(cls, v):
51
- """Convert string selectors to lists automatically"""
52
- if isinstance(v, dict):
53
- for key, value in v.items():
54
- if isinstance(value, str):
55
- v[key] = [value]
56
- return v
@@ -1,102 +0,0 @@
1
- """
2
- HTML Processor Factory
3
-
4
- Factory class for creating HTML processors.
5
- """
6
-
7
- from unrealon_llm.src.core import SmartLLMClient
8
- from unrealon_llm.src.dto import LLMConfig
9
-
10
- from .listing_processor import ListingProcessor
11
- from .details_processor import DetailsProcessor
12
-
13
-
14
- class UnrealOnLLM:
15
- """Factory class for creating UnrealOn LLM components"""
16
-
17
- @staticmethod
18
- def create_client(
19
- openrouter_api_key: str,
20
- default_model: str = "anthropic/claude-3.5-sonnet",
21
- daily_cost_limit: float = 5.0,
22
- enable_caching: bool = True,
23
- cache_ttl_minutes: int = 30,
24
- ) -> SmartLLMClient:
25
- """
26
- Create LLM client
27
-
28
- Args:
29
- openrouter_api_key: OpenRouter API key
30
- default_model: Default model to use
31
- daily_cost_limit: Daily cost limit in USD
32
- enable_caching: Enable response caching
33
- cache_ttl_minutes: Cache TTL in minutes
34
-
35
- Returns:
36
- Configured SmartLLMClient instance
37
- """
38
- config = LLMConfig(
39
- openrouter_api_key=openrouter_api_key,
40
- default_model=default_model,
41
- daily_cost_limit_usd=daily_cost_limit,
42
- request_timeout_seconds=60,
43
- max_retries=3,
44
- enable_global_cache=enable_caching,
45
- cache_ttl_hours=max(1, int(cache_ttl_minutes / 60)),
46
- )
47
-
48
- return SmartLLMClient(config)
49
-
50
- @staticmethod
51
- def create_listing_processor(
52
- openrouter_api_key: str,
53
- default_model: str = "anthropic/claude-3.5-sonnet",
54
- daily_cost_limit: float = 1.0,
55
- enable_caching: bool = False, # Disable cache for HTML processors
56
- ) -> ListingProcessor:
57
- """
58
- Create listing processor
59
-
60
- Args:
61
- openrouter_api_key: OpenRouter API key
62
- default_model: Default model to use
63
- daily_cost_limit: Daily cost limit in USD
64
- enable_caching: Enable response caching (disabled by default for HTML processing)
65
-
66
- Returns:
67
- Configured ListingProcessor instance
68
- """
69
- llm_client = UnrealOnLLM.create_client(
70
- openrouter_api_key=openrouter_api_key,
71
- default_model=default_model,
72
- daily_cost_limit=daily_cost_limit,
73
- enable_caching=enable_caching,
74
- )
75
- return ListingProcessor(llm_client)
76
-
77
- @staticmethod
78
- def create_details_processor(
79
- openrouter_api_key: str,
80
- default_model: str = "anthropic/claude-3.5-sonnet",
81
- daily_cost_limit: float = 1.0,
82
- enable_caching: bool = False, # Disable cache for HTML processors
83
- ) -> DetailsProcessor:
84
- """
85
- Create details processor
86
-
87
- Args:
88
- openrouter_api_key: OpenRouter API key
89
- default_model: Default model to use
90
- daily_cost_limit: Daily cost limit in USD
91
- enable_caching: Enable response caching (disabled by default for HTML processing)
92
-
93
- Returns:
94
- Configured DetailsProcessor instance
95
- """
96
- llm_client = UnrealOnLLM.create_client(
97
- openrouter_api_key=openrouter_api_key,
98
- default_model=default_model,
99
- daily_cost_limit=daily_cost_limit,
100
- enable_caching=enable_caching,
101
- )
102
- return DetailsProcessor(llm_client)
File without changes
File without changes
@@ -1,116 +0,0 @@
1
- """
2
- UnrealOn LLM Provider
3
-
4
- Simple provider module for UnrealOn LLM functionality.
5
- """
6
-
7
- # Core LLM functionality
8
- from unrealon_llm.src.core import SmartLLMClient
9
-
10
- # HTML parsing and analysis
11
- from unrealon_llm.src.utils.html_cleaner import SmartHTMLCleaner
12
- from unrealon_llm.src.modules.html_processor import (
13
- ListingProcessor,
14
- DetailsProcessor,
15
- UnrealOnLLM,
16
- )
17
-
18
- # Logging and configuration
19
- from unrealon_llm.src.llm_config import setup_llm_logging, configure_llm_logging
20
- from unrealon_llm.src.llm_logging import (
21
- get_llm_logger,
22
- LLMEventType,
23
- LLMContext,
24
- initialize_llm_logger,
25
- )
26
-
27
- # Configuration and DTOs
28
- from unrealon_llm.src.dto import (
29
- # Core configuration
30
- LLMConfig,
31
- AnalysisConfig,
32
- TranslationConfig,
33
- # Enums
34
- LLMProvider,
35
- OptimizationLevel,
36
- CacheStrategy,
37
- MessageRole,
38
- PatternType,
39
- SelectorType,
40
- LanguageCode,
41
- DataType,
42
- SchemaFormat,
43
- ProcessingStage,
44
- # Core models
45
- TokenUsage,
46
- ChatMessage,
47
- LLMResponse,
48
- LanguageDetection,
49
- CostBreakdown,
50
- HealthStatus,
51
- ProcessingMetrics,
52
- # HTML Analysis models
53
- DetectedPattern,
54
- SelectorInfo,
55
- HTMLAnalysisRequest,
56
- HTMLAnalysisResult,
57
- SelectorValidationResult,
58
- CompleteAnalysisResult,
59
- )
60
-
61
- # Utilities
62
- from unrealon_llm.src.utils.data_extractor import SmartDataExtractor
63
- from unrealon_llm.src.utils.smart_counter import SmartTokenCounter
64
- from unrealon_llm.src.utils.language_detector import LanguageDetector
65
-
66
-
67
- # Direct exports for convenience
68
- __all__ = [
69
- # Factory class
70
- "UnrealOnLLM",
71
- # Core classes
72
- "SmartLLMClient",
73
- "SmartHTMLCleaner",
74
- "SmartDataExtractor",
75
- "SmartTokenCounter",
76
- "LanguageDetector",
77
- "ListingProcessor",
78
- "DetailsProcessor",
79
- # Configuration classes
80
- "LLMConfig",
81
- "AnalysisConfig",
82
- "TranslationConfig",
83
- # Enums
84
- "LLMProvider",
85
- "OptimizationLevel",
86
- "CacheStrategy",
87
- "MessageRole",
88
- "PatternType",
89
- "SelectorType",
90
- "LanguageCode",
91
- "DataType",
92
- "SchemaFormat",
93
- "ProcessingStage",
94
- # Core models
95
- "TokenUsage",
96
- "ChatMessage",
97
- "LLMResponse",
98
- "LanguageDetection",
99
- "CostBreakdown",
100
- "HealthStatus",
101
- "ProcessingMetrics",
102
- # HTML Analysis models
103
- "DetectedPattern",
104
- "SelectorInfo",
105
- "HTMLAnalysisRequest",
106
- "HTMLAnalysisResult",
107
- "SelectorValidationResult",
108
- "CompleteAnalysisResult",
109
- # Logging
110
- "setup_llm_logging",
111
- "configure_llm_logging",
112
- "get_llm_logger",
113
- "initialize_llm_logger",
114
- "LLMEventType",
115
- "LLMContext",
116
- ]
@@ -1,95 +0,0 @@
1
- """
2
- UnrealOn LLM Utilities
3
-
4
- Utility functions and helpers for UnrealOn LLM platform including
5
- language detection, token counting, and model caching.
6
- """
7
-
8
- # Language detection utilities
9
- from .language_detector import (
10
- LanguageDetector,
11
- detect_language,
12
- detect_multiple_languages,
13
- is_language,
14
- )
15
-
16
- # Token counting utilities (legacy)
17
- from .token_counter import (
18
- TokenCounter,
19
- count_tokens,
20
- count_message_tokens,
21
- optimize_for_tokens,
22
- )
23
-
24
- # Smart counting utilities (new approach)
25
- from .smart_counter import (
26
- SmartTokenCounter,
27
- smart_count_tokens,
28
- smart_count_messages,
29
- )
30
-
31
- # Models cache utilities
32
- from .models_cache import (
33
- ModelInfo,
34
- ModelsCache,
35
- )
36
-
37
- # HTML cleaning utilities
38
- from .html_cleaner import (
39
- SmartHTMLCleaner,
40
- clean_html_for_llm,
41
- extract_js_data_only,
42
- )
43
-
44
- # Common utilities
45
- from .common import (
46
- generate_correlation_id,
47
- generate_request_id,
48
- )
49
-
50
- # Data extraction utilities
51
- from .data_extractor import (
52
- SmartDataExtractor,
53
- safe_extract_json,
54
- extract_llm_response_data,
55
- create_data_extractor,
56
- )
57
-
58
- # Exports
59
- __all__ = [
60
- # Language Detection
61
- "LanguageDetector",
62
- "detect_language",
63
- "detect_multiple_languages",
64
- "is_language",
65
-
66
- # Token Counting (Legacy)
67
- "TokenCounter",
68
- "count_tokens",
69
- "count_message_tokens",
70
- "optimize_for_tokens",
71
-
72
- # Smart Counting (New)
73
- "SmartTokenCounter",
74
- "smart_count_tokens",
75
- "smart_count_messages",
76
-
77
- # Models Cache
78
- "ModelInfo",
79
- "ModelsCache",
80
-
81
- # HTML Cleaning
82
- "SmartHTMLCleaner",
83
- "clean_html_for_llm",
84
- "extract_js_data_only",
85
-
86
- # Common Utilities
87
- "generate_correlation_id",
88
- "generate_request_id",
89
-
90
- # Data Extraction
91
- "SmartDataExtractor",
92
- "safe_extract_json",
93
- "extract_llm_response_data",
94
- "create_data_extractor",
95
- ]
@@ -1,64 +0,0 @@
1
- """
2
- Common Utilities
3
-
4
- General-purpose utility functions for UnrealOn LLM including
5
- ID generation, validation, and other common helpers.
6
- """
7
-
8
- import uuid
9
- import secrets
10
- from typing import Optional
11
-
12
-
13
- def generate_correlation_id() -> str:
14
- """Generate a unique correlation ID for tracking operations."""
15
- return f"llm_{uuid.uuid4().hex[:16]}"
16
-
17
-
18
- def generate_request_id() -> str:
19
- """Generate a unique request ID for API calls."""
20
- return f"req_{secrets.token_hex(8)}"
21
-
22
-
23
- def generate_session_id() -> str:
24
- """Generate a unique session ID."""
25
- return f"sess_{uuid.uuid4().hex[:12]}"
26
-
27
-
28
- def sanitize_model_name(model_name: str) -> str:
29
- """Sanitize model name for logging and metrics."""
30
- return model_name.replace("/", "_").replace(":", "_").replace("-", "_")
31
-
32
-
33
- def truncate_text(text: str, max_length: int = 100, suffix: str = "...") -> str:
34
- """Truncate text for logging purposes."""
35
- if len(text) <= max_length:
36
- return text
37
- return text[:max_length - len(suffix)] + suffix
38
-
39
-
40
- def format_bytes(bytes_count: int) -> str:
41
- """Format bytes into human readable format."""
42
- for unit in ['B', 'KB', 'MB', 'GB']:
43
- if bytes_count < 1024.0:
44
- return f"{bytes_count:.1f} {unit}"
45
- bytes_count /= 1024.0
46
- return f"{bytes_count:.1f} TB"
47
-
48
-
49
- def format_duration_ms(duration_ms: float) -> str:
50
- """Format duration in milliseconds to human readable format."""
51
- if duration_ms < 1000:
52
- return f"{duration_ms:.1f}ms"
53
- elif duration_ms < 60000:
54
- return f"{duration_ms / 1000:.1f}s"
55
- else:
56
- minutes = int(duration_ms / 60000)
57
- seconds = (duration_ms % 60000) / 1000
58
- return f"{minutes}m{seconds:.1f}s"
59
-
60
-
61
- def safe_get_env(key: str, default: Optional[str] = None) -> Optional[str]:
62
- """Safely get environment variable with optional default."""
63
- import os
64
- return os.getenv(key, default)
@@ -1,188 +0,0 @@
1
- """
2
- Data Extractor
3
-
4
- Simple wrapper around json_extractor lib for extracting JSON from text.
5
- KISS methodology - just extract JSON, nothing more.
6
- """
7
-
8
- import json
9
- import logging
10
- import re
11
- from typing import Any, Dict, List, Optional, Type, TypeVar, Union
12
- from pydantic import BaseModel, ValidationError
13
-
14
- from unrealon_llm.src.exceptions import ResponseParsingError
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
- T = TypeVar("T", bound=BaseModel)
19
-
20
-
21
- def _extract_json_smart(content: str) -> Optional[str]:
22
- """
23
- Smart JSON extraction from text - finds valid JSON objects/arrays.
24
-
25
- Args:
26
- content: Text content that may contain JSON
27
-
28
- Returns:
29
- First valid JSON string found or None
30
- """
31
- # Try to find JSON objects {} or arrays []
32
- patterns = [
33
- r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", # Simple nested objects
34
- r"\[[^\[\]]*(?:\[[^\[\]]*\][^\[\]]*)*\]", # Simple nested arrays
35
- ]
36
-
37
- for pattern in patterns:
38
- matches = re.finditer(pattern, content, re.DOTALL)
39
- for match in matches:
40
- json_candidate = match.group()
41
- try:
42
- # Test if it's valid JSON
43
- json.loads(json_candidate)
44
- return json_candidate
45
- except json.JSONDecodeError:
46
- continue
47
-
48
- # Fallback: find between outermost braces
49
- first_brace = content.find("{")
50
- if first_brace == -1:
51
- return None
52
-
53
- # Find matching closing brace
54
- brace_count = 0
55
- for i, char in enumerate(content[first_brace:], first_brace):
56
- if char == "{":
57
- brace_count += 1
58
- elif char == "}":
59
- brace_count -= 1
60
- if brace_count == 0:
61
- return content[first_brace : i + 1]
62
-
63
- return None
64
-
65
-
66
- def extract_json(
67
- content: str,
68
- expected_schema: Optional[Type[T]] = None,
69
- fallback_value: Optional[Any] = None,
70
- strict_mode: bool = True,
71
- ) -> Union[T, Dict[str, Any], None]:
72
- """
73
- Extract JSON from text content.
74
-
75
- Args:
76
- content: Text content containing JSON
77
- expected_schema: Pydantic model for validation
78
- fallback_value: Return value if extraction fails (non-strict mode)
79
- strict_mode: Raise exception on failure if True
80
-
81
- Returns:
82
- Extracted JSON data
83
-
84
- Raises:
85
- ResponseParsingError: If extraction fails in strict mode
86
- """
87
- if not content:
88
- if strict_mode:
89
- raise ResponseParsingError("Empty content", "json")
90
- return fallback_value
91
-
92
- try:
93
- # Smart JSON extraction
94
- json_content = _extract_json_smart(content)
95
-
96
- if json_content is None:
97
- if strict_mode:
98
- raise ResponseParsingError("No valid JSON found", "json")
99
- return fallback_value
100
-
101
- # Parse the extracted JSON
102
- extracted_data = json.loads(json_content)
103
- logger.info(f"Successfully extracted and parsed JSON: {type(extracted_data)}")
104
-
105
- # Validate with schema if provided
106
- if expected_schema and issubclass(expected_schema, BaseModel):
107
- return expected_schema.model_validate(extracted_data)
108
-
109
- return extracted_data
110
-
111
- except ValidationError as e:
112
- if strict_mode:
113
- raise e
114
- logger.warning(f"Schema validation failed: {e}")
115
- return fallback_value
116
-
117
- except Exception as e:
118
- if strict_mode:
119
- raise ResponseParsingError(content[:200], "json")
120
- logger.warning(f"JSON extraction failed: {e}")
121
- return fallback_value
122
-
123
-
124
- def safe_extract_json(
125
- content: str,
126
- expected_schema: Optional[Type[T]] = None,
127
- fallback_value: Optional[Any] = None,
128
- ) -> Union[T, Dict[str, Any], None]:
129
- """
130
- Safe JSON extraction (non-strict mode).
131
-
132
- Args:
133
- content: Text content
134
- expected_schema: Optional schema
135
- fallback_value: Fallback value
136
-
137
- Returns:
138
- Extracted JSON or fallback
139
- """
140
- return extract_json(content, expected_schema, fallback_value, strict_mode=False)
141
-
142
-
143
- def extract_llm_response_data(
144
- response_content: str,
145
- expected_schema: Optional[Type[T]] = None,
146
- required_fields: Optional[List[str]] = None,
147
- ) -> Union[T, Dict[str, Any]]:
148
- """
149
- Extract data from LLM response (strict mode).
150
-
151
- Args:
152
- response_content: LLM response text
153
- expected_schema: Pydantic model
154
- required_fields: Required fields to check
155
-
156
- Returns:
157
- Extracted and validated data
158
-
159
- Raises:
160
- ResponseParsingError: If extraction fails
161
- """
162
- result = extract_json(response_content, expected_schema, strict_mode=True)
163
-
164
- # Check required fields if no schema
165
- if not expected_schema and required_fields and isinstance(result, dict):
166
- missing_fields = [field for field in required_fields if field not in result]
167
- if missing_fields:
168
- raise ResponseParsingError(
169
- f"Missing required fields: {missing_fields}", "json"
170
- )
171
-
172
- return result
173
-
174
-
175
- # Legacy compatibility
176
- class SmartDataExtractor:
177
- """Simple wrapper for compatibility."""
178
-
179
- def __init__(self, strict_mode: bool = True):
180
- self.strict_mode = strict_mode
181
-
182
- def extract_json(self, content: str, expected_schema=None, fallback_value=None):
183
- return extract_json(content, expected_schema, fallback_value, self.strict_mode)
184
-
185
-
186
- def create_data_extractor(strict_mode: bool = True) -> SmartDataExtractor:
187
- """Create data extractor instance."""
188
- return SmartDataExtractor(strict_mode)