unrealon 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.8.dist-info/METADATA +0 -803
  71. unrealon-1.0.8.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -1,726 +0,0 @@
1
- """
2
- Smart Browser Service for UnrealOn Driver v3.0
3
-
4
- Zero-configuration browser automation with intelligent features.
5
- Wraps unrealon_browser with enhanced capabilities and smart defaults.
6
-
7
- CRITICAL REQUIREMENTS COMPLIANCE:
8
- - ✅ Absolute imports only
9
- - ✅ Pydantic v2 models everywhere
10
- - ✅ No Dict[str, Any] usage
11
- - ✅ Complete type annotations
12
- - ✅ Auto-generated model usage
13
- """
14
-
15
- import asyncio
16
- from pathlib import Path
17
- from typing import Any, List, Optional, Union, Callable
18
- from datetime import datetime
19
-
20
- from unrealon_browser.src.core.browser_manager import BrowserManager
21
- from unrealon_browser.src.managers import ProfileManager, CookieManager, StealthManager
22
- from unrealon_browser.src.dto.models.statistics import BrowserStatistics
23
- from unrealon_browser.src.dto.models.core import PageResult
24
- from unrealon_browser.src.dto.models.config import (
25
- BrowserConfig as UnrealOnBrowserConfig,
26
- )
27
-
28
- # CRITICAL REQUIREMENTS COMPLIANCE - NO INLINE IMPORTS!
29
- from unrealon_browser.src.dto import (
30
- BrowserConfig,
31
- BrowserType,
32
- BrowserMode,
33
- )
34
- from unrealon_sdk.src.provider import Utils
35
- from unrealon_sdk.src.clients.python_http.models.SuccessResponse import SuccessResponse
36
- from unrealon_sdk.src.clients.python_http.models.ErrorResponse import ErrorResponse
37
- from unrealon_sdk.src.enterprise.logging.development import get_development_logger
38
- from unrealon_sdk.src.dto.logging import SDKContext, SDKEventType
39
-
40
- from unrealon_driver.src.core.exceptions import BrowserError, create_browser_error
41
- from unrealon_driver.src.dto.services import (
42
- DriverBrowserConfig,
43
- ServiceHealthStatus,
44
- ServiceOperationResult,
45
- )
46
- from unrealon_driver.src.dto.events import DriverEventType
47
-
48
-
49
- class BrowserService:
50
- """
51
- 🌐 Smart Browser Service
52
-
53
- Zero-configuration browser automation with intelligent features:
54
- - 🔥 STEALTH BY DEFAULT - all navigation uses stealth automatically
55
- - Smart waiting and content detection
56
- - Automatic anti-detection measures
57
- - Error recovery and retries
58
- - Resource management
59
- - Performance optimization
60
-
61
- 🔥 NAVIGATION METHODS:
62
- - navigate(url) - STEALTH navigation (recommended for all use)
63
- - navigate_unsafe(url) - without stealth (use only when stealth not needed)
64
- - get_html(url) - STEALTH + special Amazon handling
65
- """
66
-
67
- def __init__(
68
- self,
69
- config: DriverBrowserConfig,
70
- logger: Optional[Any] = None,
71
- metrics: Optional[Any] = None,
72
- ):
73
- """Initialize browser service with auto-configuration."""
74
- self.config = config
75
- self.logger = logger
76
- self.metrics = metrics
77
-
78
- # ✅ DEVELOPMENT LOGGER INTEGRATION (CRITICAL REQUIREMENT)
79
- self.dev_logger = get_development_logger()
80
-
81
- # Browser management
82
- self._browser_manager: Optional[BrowserManager] = None
83
- self._current_page = None
84
- self._is_initialized = False
85
-
86
- # Performance tracking
87
- self._operation_count = 0
88
- self._total_duration = 0.0
89
-
90
- # Log initialization with development logger
91
- if self.dev_logger:
92
- self.dev_logger.log_info(
93
- SDKEventType.COMPONENT_CREATED,
94
- "Browser service initialized",
95
- context=SDKContext(
96
- parser_id=self.config.parser_id,
97
- component_name="Browser",
98
- layer_name="UnrealOn_Driver",
99
- metadata={
100
- "headless": self.config.headless,
101
- "stealth": True, # Always enabled
102
- "timeout": self.config.timeout,
103
- "debug_mode": self.config.debug_mode,
104
- },
105
- ),
106
- )
107
-
108
- async def _log_driver_event(
109
- self, event_type: DriverEventType, message: str, **metadata
110
- ) -> None:
111
- """Log ONLY driver-specific events (not browser module events)."""
112
- if self.dev_logger and event_type in [
113
- DriverEventType.SERVICE_INITIALIZED,
114
- DriverEventType.SERVICE_ERROR,
115
- DriverEventType.BROWSER_CONTENT_EXTRACTED,
116
- DriverEventType.BROWSER_SCREENSHOT_TAKEN,
117
- ]:
118
- self.dev_logger.log_info(
119
- event_type.value,
120
- message,
121
- context=SDKContext(
122
- parser_id=self.config.parser_id,
123
- component_name="Browser",
124
- layer_name="UnrealOn_Driver",
125
- metadata=metadata,
126
- ),
127
- )
128
-
129
- async def _ensure_initialized(self):
130
- """Ensure browser is initialized."""
131
- if not self._is_initialized:
132
- await self._initialize_browser()
133
-
134
- async def _initialize_browser(self):
135
- """Initialize browser with unrealon_browser integration."""
136
- try:
137
-
138
- browser_config = Utils.create_browser_config(
139
- parser_name=self.config.parser_id,
140
- browser_type=BrowserType.CHROMIUM,
141
- # 🔥 STEALTH ALWAYS ON - NO CONFIG NEEDED!
142
- headless=self.config.headless
143
- )
144
-
145
- # Create browser manager (logger_bridge auto-integrates with SDK)
146
- self._browser_manager = BrowserManager(config=browser_config)
147
-
148
- # Setup system paths if provided
149
- system_dir = self.config.user_data_dir
150
- if system_dir:
151
- profiles_dir = Path(system_dir) / "browser_profiles"
152
- cookies_dir = Path(system_dir) / "cookies"
153
-
154
- # Ensure directories exist
155
- profiles_dir.mkdir(parents=True, exist_ok=True)
156
- cookies_dir.mkdir(parents=True, exist_ok=True)
157
-
158
- # Override managers with custom paths
159
- self._browser_manager.profile_manager = ProfileManager(
160
- profiles_dir=str(profiles_dir)
161
- )
162
- self._browser_manager.cookie_manager = CookieManager(
163
- cookies_dir=str(cookies_dir),
164
- parser_name=self.config.parser_id,
165
- )
166
-
167
- # Initialize browser async
168
- await self._browser_manager.initialize_async()
169
-
170
- self._is_initialized = True
171
-
172
- # Log browser initialized event
173
- if self.logger:
174
- self.logger.info(
175
- f"Browser service initialized - headless: {self.config.headless}"
176
- )
177
-
178
- if self.logger:
179
- self.logger.info("Browser service initialized successfully")
180
-
181
- except Exception as e:
182
- # Log browser launch failure
183
- if self.logger:
184
- self.logger.error(f"Browser initialization failed: {e}")
185
- raise BrowserError(f"Failed to initialize browser: {e}")
186
-
187
- def _convert_config_to_unrealon_browser(self) -> UnrealOnBrowserConfig:
188
- """Convert our config to unrealon_browser Pydantic model with type safety."""
189
- return UnrealOnBrowserConfig(
190
- parser_name=self.config.parser_id,
191
- page_load_timeout_seconds=float(self.config.timeout),
192
- navigation_timeout_seconds=float(self.config.timeout),
193
- disable_images=not self.config.enable_images,
194
- # Map our settings to unrealon_browser settings
195
- use_proxy_rotation=False, # Default behavior
196
- realistic_ports_only=False, # Default behavior
197
- enable_stealth_check=self.config.debug_mode,
198
- )
199
-
200
- # ==========================================
201
- # SMART EXTRACTION METHODS
202
- # ==========================================
203
-
204
- async def extract(
205
- self,
206
- url: str,
207
- selector: str,
208
- limit: Optional[int] = None,
209
- timeout: Optional[int] = None,
210
- attribute: Optional[str] = None,
211
- **kwargs,
212
- ) -> List[str]:
213
- """
214
- 🎯 Smart extraction with automatic waiting and error handling.
215
-
216
- Args:
217
- url: Target URL
218
- selector: CSS selector
219
- limit: Maximum number of items to extract
220
- timeout: Custom timeout (uses default if not specified)
221
- attribute: Extract attribute instead of text
222
- **kwargs: Additional options
223
-
224
- Returns:
225
- List of extracted text/attributes
226
-
227
- Example:
228
- headlines = await browser.extract(
229
- "https://news.com",
230
- ".headline",
231
- limit=10
232
- )
233
- """
234
- start_time = datetime.now()
235
-
236
- try:
237
- await self._ensure_initialized()
238
-
239
- # Navigate to URL with smart waiting
240
- page = await self._navigate_smart(url, timeout=timeout)
241
-
242
- # Wait for content to be ready
243
- await self._wait_for_content_ready(page, selector, timeout)
244
-
245
- # Extract elements
246
- if attribute:
247
- elements = await page.query_selector_all(selector)
248
- results = [
249
- await element.get_attribute(attribute)
250
- for element in elements[:limit]
251
- if element
252
- ]
253
- results = [r for r in results if r] # Filter None values
254
- else:
255
- elements = await page.query_selector_all(selector)
256
- results = [
257
- await element.text_content()
258
- for element in elements[:limit]
259
- if element
260
- ]
261
- results = [r.strip() for r in results if r and r.strip()] # Clean text
262
-
263
- # Apply limit if specified
264
- if limit:
265
- results = results[:limit]
266
-
267
- # Record metrics
268
- duration = (datetime.now() - start_time).total_seconds()
269
- self._record_operation("extract", duration, len(results))
270
-
271
- if self.logger:
272
- self.logger.info(
273
- f"Extracted {len(results)} items from {url} in {duration:.2f}s"
274
- )
275
-
276
- return results
277
-
278
- except Exception as e:
279
- duration = (datetime.now() - start_time).total_seconds()
280
- self._record_operation("extract", duration, 0, error=str(e))
281
-
282
- raise create_browser_error(
283
- f"Failed to extract from {url}: {e}", url=url, selector=selector
284
- )
285
-
286
- async def extract_all(
287
- self, url: str, selector: str, timeout: Optional[int] = None, **kwargs
288
- ) -> List[str]:
289
- """Extract all matching elements without limit."""
290
- return await self.extract(url, selector, limit=None, timeout=timeout, **kwargs)
291
-
292
- async def extract_attributes(
293
- self,
294
- url: str,
295
- selector: str,
296
- attribute: str,
297
- limit: Optional[int] = None,
298
- timeout: Optional[int] = None,
299
- **kwargs,
300
- ) -> List[str]:
301
- """Extract specific attributes from elements."""
302
- return await self.extract(
303
- url, selector, limit=limit, timeout=timeout, attribute=attribute, **kwargs
304
- )
305
-
306
- async def extract_structured(
307
- self, url: str, schema: dict, timeout: Optional[int] = None, **kwargs
308
- ) -> dict:
309
- """
310
- 🏗️ Extract structured data using schema definition.
311
-
312
- Args:
313
- url: Target URL
314
- schema: Schema defining what to extract
315
- timeout: Custom timeout
316
- **kwargs: Additional options
317
-
318
- Returns:
319
- Structured data matching schema
320
-
321
- Example:
322
- products = await browser.extract_structured(
323
- "https://shop.com",
324
- schema={
325
- "name": ".product-name",
326
- "price": ".price",
327
- "rating": ".rating"
328
- }
329
- )
330
- """
331
- start_time = datetime.now()
332
-
333
- try:
334
- await self._ensure_initialized()
335
- page = await self._navigate_smart(url, timeout=timeout)
336
-
337
- result = {}
338
-
339
- for field, selector in schema.items():
340
- if isinstance(selector, dict):
341
- # Nested schema
342
- if "selector" in selector and "fields" in selector:
343
- # Multiple items with fields
344
- items = []
345
- elements = await page.query_selector_all(selector["selector"])
346
-
347
- for element in elements:
348
- item = {}
349
- for sub_field, sub_selector in selector["fields"].items():
350
- sub_element = await element.query_selector(sub_selector)
351
- if sub_element:
352
- item[sub_field] = (
353
- await sub_element.text_content()
354
- ).strip()
355
- if item:
356
- items.append(item)
357
-
358
- result[field] = items
359
- else:
360
- # Single nested object
361
- nested_result = {}
362
- for sub_field, sub_selector in selector.items():
363
- element = await page.query_selector(sub_selector)
364
- if element:
365
- nested_result[sub_field] = (
366
- await element.text_content()
367
- ).strip()
368
- result[field] = nested_result
369
- else:
370
- # Simple selector
371
- element = await page.query_selector(selector)
372
- if element:
373
- result[field] = (await element.text_content()).strip()
374
-
375
- duration = (datetime.now() - start_time).total_seconds()
376
- self._record_operation("extract_structured", duration, len(result))
377
-
378
- # Log content extraction success
379
- await self._log_driver_event(
380
- DriverEventType.BROWSER_CONTENT_EXTRACTED,
381
- f"Content extracted successfully from {url}",
382
- url=url,
383
- extraction_time_ms=duration * 1000,
384
- fields_extracted=len(result),
385
- schema_fields=list(schema.keys()),
386
- )
387
-
388
- return result
389
-
390
- except Exception as e:
391
- duration = (datetime.now() - start_time).total_seconds()
392
- self._record_operation("extract_structured", duration, 0, error=str(e))
393
-
394
- raise create_browser_error(
395
- f"Failed to extract structured data from {url}: {e}", url=url
396
- )
397
-
398
- # ==========================================
399
- # NAVIGATION AND PAGE CONTROL
400
- # ==========================================
401
-
402
- async def navigate(self, url: str, timeout: Optional[int] = None):
403
- """🔥 NAVIGATE WITH STEALTH BY DEFAULT - safer and better detection avoidance."""
404
- return await self._navigate_stealth(url, timeout)
405
-
406
- async def navigate_unsafe(self, url: str, timeout: Optional[int] = None):
407
- """Navigate WITHOUT stealth - use only when stealth is not needed."""
408
- await self._ensure_initialized()
409
- return await self._navigate_smart(url, timeout)
410
-
411
- async def _navigate_stealth(self, url: str, timeout: Optional[int] = None):
412
- """Private: Navigate with advanced stealth - blank page first, then target."""
413
- await self._ensure_initialized()
414
-
415
- # Step 1: Navigate to blank page first (stealth technique)
416
- page = self._current_page or self._browser_manager.page
417
-
418
- if self.logger:
419
- self.logger.info(f"🕸️ Stealth navigation: blank → {url}")
420
-
421
- # Navigate to blank page first
422
- await page.goto("about:blank", wait_until="domcontentloaded")
423
- await asyncio.sleep(1.0) # Brief pause
424
-
425
- # Step 2: Navigate to target URL with proper waiting
426
- return await self._navigate_smart(url, timeout)
427
-
428
- async def get_html(self, url: str, timeout: Optional[int] = None) -> str:
429
- """Get full HTML content from URL with proper stealth navigation."""
430
-
431
- # 🔥 AMAZON SPECIAL: Go to homepage first, then target URL!
432
- if "amazon.com" in url:
433
- await self._ensure_initialized()
434
- page = self._current_page or self._browser_manager.page
435
-
436
- if self.logger:
437
- self.logger.info(f"🛒 Amazon navigation: homepage → {url}")
438
-
439
- # Step 1: Go to Amazon homepage first (balanced approach)
440
- await page.goto("about:blank", wait_until="domcontentloaded")
441
- await asyncio.sleep(1.0)
442
- await page.goto("https://www.amazon.com", wait_until="domcontentloaded", timeout=15000)
443
- await asyncio.sleep(2.0) # Let homepage stabilize
444
-
445
- # Step 2: Navigate to target URL (balanced approach)
446
- await page.goto(url, wait_until="domcontentloaded", timeout=15000)
447
-
448
- # Step 3: Wait for search results to load dynamically
449
- await asyncio.sleep(3.0) # Wait for dynamic content
450
-
451
- # Step 4: Additional wait for any delayed content
452
- try:
453
- await page.wait_for_selector("[data-component-type='s-search-result']", timeout=5000)
454
- except:
455
- # Fallback: just wait a bit more
456
- await asyncio.sleep(2.0)
457
-
458
- return await page.content()
459
- else:
460
- # Regular stealth navigation for non-Amazon sites
461
- page = await self._navigate_stealth(url, timeout)
462
- return await page.content()
463
-
464
- async def screenshot(
465
- self,
466
- url: Optional[str] = None,
467
- path: Optional[str] = None,
468
- full_page: bool = True,
469
- ) -> str:
470
- """Take screenshot and return path."""
471
- try:
472
- if url:
473
- page = await self.navigate(url) # 🔥 Now uses stealth by default!
474
- else:
475
- page = self._current_page
476
- if not page:
477
- raise BrowserError("No active page for screenshot")
478
-
479
- if not path:
480
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
481
- path = f"screenshot_{timestamp}.png"
482
-
483
- await page.screenshot(path=path, full_page=full_page)
484
-
485
- if self.logger:
486
- self.logger.info(f"Screenshot saved: {path}")
487
-
488
- return path
489
-
490
- except Exception as e:
491
- raise BrowserError(f"Failed to take screenshot: {e}")
492
-
493
- # ==========================================
494
- # SMART FEATURES
495
- # ==========================================
496
-
497
- async def extract_with_retry(
498
- self,
499
- url: str,
500
- selector: str,
501
- max_retries: int = 3,
502
- backoff_factor: float = 2.0,
503
- **kwargs,
504
- ) -> List[str]:
505
- """Extract with automatic retry logic."""
506
- last_error = None
507
-
508
- for attempt in range(max_retries + 1):
509
- try:
510
- return await self.extract(url, selector, **kwargs)
511
- except Exception as e:
512
- last_error = e
513
- if attempt < max_retries:
514
- delay = backoff_factor**attempt
515
- if self.logger:
516
- self.logger.warning(
517
- f"Extraction attempt {attempt + 1} failed, retrying in {delay}s: {e}"
518
- )
519
- await asyncio.sleep(delay)
520
- else:
521
- if self.logger:
522
- self.logger.error(
523
- f"All {max_retries + 1} extraction attempts failed"
524
- )
525
-
526
- raise last_error
527
-
528
- async def extract_with_scroll(
529
- self,
530
- url: str,
531
- selector: str,
532
- max_scrolls: int = 10,
533
- scroll_delay: float = 1.0,
534
- auto_detect_end: bool = True,
535
- **kwargs,
536
- ) -> List[str]:
537
- """Extract with infinite scroll handling."""
538
- try:
539
- page = await self.navigate(url) # 🔥 Now uses stealth by default!
540
- all_results = []
541
- last_count = 0
542
-
543
- for scroll in range(max_scrolls):
544
- # Extract current items
545
- elements = await page.query_selector_all(selector)
546
- current_results = [
547
- (await elem.text_content()).strip() for elem in elements if elem
548
- ]
549
- current_results = [r for r in current_results if r]
550
-
551
- # Check if we found new items
552
- if auto_detect_end and len(current_results) == last_count:
553
- if self.logger:
554
- self.logger.info(
555
- f"No new items found, stopping scroll at {scroll}"
556
- )
557
- break
558
-
559
- all_results = current_results
560
- last_count = len(current_results)
561
-
562
- # Scroll to bottom
563
- await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
564
- await asyncio.sleep(scroll_delay)
565
-
566
- # Wait for potential new content
567
- await page.wait_for_timeout(1000)
568
-
569
- if self.logger:
570
- self.logger.info(
571
- f"Extracted {len(all_results)} items with {scroll + 1} scrolls"
572
- )
573
-
574
- return all_results
575
-
576
- except Exception as e:
577
- raise create_browser_error(
578
- f"Failed to extract with scroll from {url}: {e}",
579
- url=url,
580
- selector=selector,
581
- )
582
-
583
- # ==========================================
584
- # PRIVATE METHODS
585
- # ==========================================
586
-
587
- async def _navigate_smart(self, url: str, timeout: Optional[int] = None):
588
- """Smart navigation with optimal waiting."""
589
- timeout = timeout or self.config.timeout
590
- start_time = datetime.now()
591
-
592
- # Navigation events are automatically logged by unrealon_browser module
593
-
594
- try:
595
- # Get or create page
596
- if not self._current_page:
597
- self._current_page = self._browser_manager.page
598
-
599
- page = self._current_page
600
-
601
- # Navigate with fast waiting (like old driver)
602
- await page.goto(url, wait_until="domcontentloaded", timeout=timeout * 1000)
603
-
604
- # Quick wait for basic content (like old driver: 1 second)
605
- await asyncio.sleep(1.0)
606
-
607
- # Navigation success events are automatically logged by unrealon_browser module
608
-
609
- return page
610
-
611
- except Exception as e:
612
- # Navigation failure events are automatically logged by unrealon_browser module
613
- raise BrowserError(f"Failed to navigate to {url}: {e}")
614
-
615
- async def _wait_for_content_ready(
616
- self, page, selector: str, timeout: Optional[int] = None
617
- ):
618
- """Wait for content to be ready with intelligent detection."""
619
- timeout = timeout or self.config.timeout
620
-
621
- try:
622
- # Wait for selector to appear
623
- await page.wait_for_selector(selector, timeout=timeout * 1000)
624
-
625
- # Additional waiting for dynamic content
626
- await asyncio.sleep(0.5) # Brief pause for dynamic content
627
-
628
- except Exception:
629
- # Selector not found - this might be okay, let extraction handle it
630
- pass
631
-
632
- async def _wait_for_dynamic_content(self, page, max_wait: float = 3.0):
633
- """Wait for dynamic content to stabilize."""
634
- try:
635
- # Wait for network to be mostly idle
636
- await page.wait_for_load_state("networkidle", timeout=max_wait * 1000)
637
- except Exception as e:
638
- # Timeout is okay - page might be ready enough
639
- if self.logger:
640
- self.logger.debug(f"Network idle wait timeout (acceptable): {e}")
641
- pass
642
-
643
- def _record_operation(
644
- self,
645
- operation: str,
646
- duration: float,
647
- result_count: int,
648
- error: Optional[str] = None,
649
- ):
650
- """Record operation metrics."""
651
- self._operation_count += 1
652
- self._total_duration += duration
653
-
654
- if self.metrics:
655
- self.metrics.record_operation(
656
- service="browser",
657
- operation=operation,
658
- duration=duration,
659
- result_count=result_count,
660
- error=error,
661
- )
662
-
663
- # ==========================================
664
- # SERVICE MANAGEMENT
665
- # ==========================================
666
-
667
- async def health_check(self) -> dict:
668
- """Check browser service health with type safety."""
669
- try:
670
- last_check = datetime.now().isoformat()
671
-
672
- if not self._is_initialized:
673
- return {
674
- "status": "degraded", # Change to degraded instead of unhealthy
675
- "service_name": "browser",
676
- "last_check": last_check,
677
- "last_error": "Service not initialized",
678
- "error_count": 1,
679
- }
680
-
681
- # Basic health check - try to create a page
682
- start_time = datetime.now()
683
- test_page = await self._browser_manager.get_page()
684
- await test_page.close()
685
- response_time = (datetime.now() - start_time).total_seconds() * 1000
686
-
687
- return {
688
- "status": "healthy",
689
- "service_name": "browser",
690
- "last_check": last_check,
691
- "response_time_ms": response_time,
692
- "error_rate": 0.0,
693
- "uptime_seconds": self._operation_count, # Using operation count as proxy
694
- "error_count": 0,
695
- }
696
- except Exception as e:
697
- return {
698
- "status": "degraded", # Change to degraded for consistency
699
- "service_name": "browser",
700
- "last_check": datetime.now().isoformat(),
701
- "last_error": str(e),
702
- "error_count": 1,
703
- }
704
-
705
- async def cleanup(self):
706
- """Clean up browser resources."""
707
- try:
708
- if self._current_page:
709
- await self._current_page.close()
710
- self._current_page = None
711
-
712
- if self._browser_manager:
713
- await self._browser_manager.close_async()
714
- self._browser_manager = None
715
-
716
- self._is_initialized = False
717
-
718
- if self.logger:
719
- self.logger.info("Browser service cleaned up")
720
-
721
- except Exception as e:
722
- if self.logger:
723
- self.logger.error(f"Error during browser cleanup: {e}")
724
-
725
- def __repr__(self) -> str:
726
- return f"<BrowserService(initialized={self._is_initialized}, operations={self._operation_count})>"