unrealon 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.8.dist-info/METADATA +0 -803
  71. unrealon-1.0.8.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -0,0 +1,416 @@
1
+ """
2
+ Modern Browser Manager built on Playwright
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import uuid
8
+ from datetime import datetime, timezone
9
+ from typing import Optional, Dict, Any, List
10
+ from pathlib import Path
11
+
12
+ try:
13
+ from playwright.async_api import async_playwright, Browser, BrowserContext, Page
14
+ except ImportError:
15
+ async_playwright = None
16
+ Browser = None
17
+ BrowserContext = None
18
+ Page = None
19
+
20
+ from unrealon_rpc.logging import get_logger
21
+
22
+ from .config import BrowserConfig
23
+ from ..exceptions import BrowserError
24
+
25
+
26
+ class BrowserManager:
27
+ """
28
+ 🌐 Modern Browser Manager v4.0
29
+
30
+ Simplified browser automation built on Playwright with stealth capabilities.
31
+ Designed for the new architecture where complex automation is simplified.
32
+
33
+ Features:
34
+ - 🎭 Stealth Mode: Anti-detection by default
35
+ - 🍪 Cookie Persistence: Automatic cookie management
36
+ - 📸 Screenshots: Debug-friendly screenshot capture
37
+ - ⚡ Performance: Optimized for speed and reliability
38
+ - 🔧 Zero Config: Works out of the box
39
+ """
40
+
41
+ def __init__(self, config: BrowserConfig):
42
+ """
43
+ Initialize browser manager
44
+
45
+ Args:
46
+ config: Browser configuration
47
+ """
48
+ if async_playwright is None:
49
+ raise BrowserError(
50
+ "Playwright is not installed. Install it with: pip install playwright && playwright install"
51
+ )
52
+
53
+ self.config = config
54
+ self.logger = get_logger()
55
+
56
+ # Browser components
57
+ self._playwright = None
58
+ self._browser: Optional[Browser] = None
59
+ self._context: Optional[BrowserContext] = None
60
+ self._page: Optional[Page] = None
61
+
62
+ # State
63
+ self._is_initialized = False
64
+ self._session_id = str(uuid.uuid4())
65
+
66
+ # ==========================================
67
+ # LIFECYCLE MANAGEMENT
68
+ # ==========================================
69
+
70
+ async def initialize(self) -> None:
71
+ """Initialize browser components"""
72
+ if self._is_initialized:
73
+ return
74
+
75
+ try:
76
+ self.logger.info("Initializing browser manager...")
77
+
78
+ # Start Playwright
79
+ self._playwright = await async_playwright().start()
80
+
81
+ # Launch browser
82
+ browser_args = self._get_browser_args()
83
+
84
+ if self.config.browser_type == "chromium":
85
+ self._browser = await self._playwright.chromium.launch(**browser_args)
86
+ elif self.config.browser_type == "firefox":
87
+ self._browser = await self._playwright.firefox.launch(**browser_args)
88
+ elif self.config.browser_type == "webkit":
89
+ self._browser = await self._playwright.webkit.launch(**browser_args)
90
+ else:
91
+ raise BrowserError(f"Unsupported browser type: {self.config.browser_type}")
92
+
93
+ # Create context
94
+ context_args = self._get_context_args()
95
+ self._context = await self._browser.new_context(**context_args)
96
+
97
+ # Load cookies if available
98
+ await self._load_cookies()
99
+
100
+ # Create page
101
+ self._page = await self._context.new_page()
102
+
103
+ # Setup stealth mode
104
+ if self.config.stealth_mode:
105
+ await self._setup_stealth()
106
+
107
+ # Set timeouts
108
+ self._page.set_default_timeout(self.config.page_timeout)
109
+ self._page.set_default_navigation_timeout(self.config.navigation_timeout)
110
+
111
+ self._is_initialized = True
112
+ self.logger.info(f"Browser initialized: {self.config.browser_type}")
113
+
114
+ except Exception as e:
115
+ await self.cleanup()
116
+ raise BrowserError(f"Failed to initialize browser: {e}")
117
+
118
+ async def cleanup(self) -> None:
119
+ """Clean up browser resources"""
120
+ self.logger.info("Cleaning up browser resources...")
121
+
122
+ try:
123
+ # Save cookies
124
+ if self._context and self.config.persist_cookies:
125
+ await self._save_cookies()
126
+
127
+ # Close page
128
+ if self._page:
129
+ await self._page.close()
130
+ self._page = None
131
+
132
+ # Close context
133
+ if self._context:
134
+ await self._context.close()
135
+ self._context = None
136
+
137
+ # Close browser
138
+ if self._browser:
139
+ await self._browser.close()
140
+ self._browser = None
141
+
142
+ # Stop Playwright
143
+ if self._playwright:
144
+ await self._playwright.stop()
145
+ self._playwright = None
146
+
147
+ self._is_initialized = False
148
+ self.logger.info("Browser cleanup completed")
149
+
150
+ except Exception as e:
151
+ self.logger.error(f"Error during browser cleanup: {e}")
152
+
153
+ # ==========================================
154
+ # HIGH-LEVEL METHODS
155
+ # ==========================================
156
+
157
+ async def get_html(self, url: str, wait_for: Optional[str] = None) -> str:
158
+ """
159
+ Get HTML content from URL
160
+
161
+ Args:
162
+ url: Target URL
163
+ wait_for: Optional CSS selector to wait for
164
+
165
+ Returns:
166
+ HTML content as string
167
+ """
168
+ await self._ensure_initialized()
169
+
170
+ try:
171
+ self.logger.info(f"Navigating to: {url}")
172
+
173
+ # Navigate to URL
174
+ await self._page.goto(url, wait_until="domcontentloaded")
175
+
176
+ # Wait for specific element if requested
177
+ if wait_for:
178
+ await self._page.wait_for_selector(wait_for, timeout=self.config.element_timeout)
179
+
180
+ # Get HTML content
181
+ html = await self._page.content()
182
+
183
+ # Save screenshot if debugging
184
+ if self.config.save_screenshots:
185
+ await self._save_screenshot(f"get_html_{url.replace('/', '_')}")
186
+
187
+ self.logger.info(f"Retrieved HTML content: {len(html)} characters")
188
+ return html
189
+
190
+ except Exception as e:
191
+ if self.config.save_screenshots:
192
+ await self._save_screenshot(f"error_{url.replace('/', '_')}")
193
+ raise BrowserError(f"Failed to get HTML from {url}: {e}")
194
+
195
+ async def extract_elements(
196
+ self,
197
+ url: str,
198
+ selector: str,
199
+ attribute: Optional[str] = None
200
+ ) -> List[str]:
201
+ """
202
+ Extract elements from URL using CSS selector
203
+
204
+ Args:
205
+ url: Target URL
206
+ selector: CSS selector
207
+ attribute: Optional attribute to extract (default: text content)
208
+
209
+ Returns:
210
+ List of extracted values
211
+ """
212
+ await self._ensure_initialized()
213
+
214
+ try:
215
+ self.logger.info(f"Extracting elements from: {url}")
216
+
217
+ # Navigate to URL
218
+ await self._page.goto(url, wait_until="domcontentloaded")
219
+
220
+ # Wait for elements
221
+ await self._page.wait_for_selector(selector, timeout=self.config.element_timeout)
222
+
223
+ # Extract elements
224
+ if attribute:
225
+ elements = await self._page.eval_on_selector_all(
226
+ selector,
227
+ f"elements => elements.map(el => el.getAttribute('{attribute}'))"
228
+ )
229
+ else:
230
+ elements = await self._page.eval_on_selector_all(
231
+ selector,
232
+ "elements => elements.map(el => el.textContent.trim())"
233
+ )
234
+
235
+ # Filter out empty values
236
+ elements = [el for el in elements if el and el.strip()]
237
+
238
+ self.logger.info(f"Extracted {len(elements)} elements")
239
+ return elements
240
+
241
+ except Exception as e:
242
+ raise BrowserError(f"Failed to extract elements from {url}: {e}")
243
+
244
+ async def screenshot(self, filename: Optional[str] = None) -> Path:
245
+ """
246
+ Take screenshot of current page
247
+
248
+ Args:
249
+ filename: Optional filename (auto-generated if not provided)
250
+
251
+ Returns:
252
+ Path to screenshot file
253
+ """
254
+ await self._ensure_initialized()
255
+
256
+ if not filename:
257
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
258
+ filename = f"screenshot_{timestamp}.png"
259
+
260
+ if not filename.endswith('.png'):
261
+ filename += '.png'
262
+
263
+ screenshot_path = self.config.screenshots_dir / filename
264
+
265
+ try:
266
+ await self._page.screenshot(path=str(screenshot_path), full_page=True)
267
+ self.logger.info(f"Screenshot saved: {screenshot_path}")
268
+ return screenshot_path
269
+
270
+ except Exception as e:
271
+ raise BrowserError(f"Failed to take screenshot: {e}")
272
+
273
+ # ==========================================
274
+ # UTILITY METHODS
275
+ # ==========================================
276
+
277
+ async def health_check(self) -> Dict[str, Any]:
278
+ """Browser health check"""
279
+ return {
280
+ "status": "healthy" if self._is_initialized else "not_initialized",
281
+ "browser_type": self.config.browser_type,
282
+ "session_id": self._session_id,
283
+ "stealth_mode": self.config.stealth_mode,
284
+ "headless": self.config.headless,
285
+ "initialized": self._is_initialized
286
+ }
287
+
288
+ # ==========================================
289
+ # PRIVATE METHODS
290
+ # ==========================================
291
+
292
+ async def _ensure_initialized(self) -> None:
293
+ """Ensure browser is initialized"""
294
+ if not self._is_initialized:
295
+ await self.initialize()
296
+
297
+ def _get_browser_args(self) -> Dict[str, Any]:
298
+ """Get browser launch arguments"""
299
+ args = {
300
+ "headless": self.config.headless,
301
+ "args": self.config.extra_args.copy()
302
+ }
303
+
304
+ # Add stealth arguments
305
+ if self.config.stealth_mode:
306
+ args["args"].extend([
307
+ "--no-first-run",
308
+ "--no-default-browser-check",
309
+ "--disable-blink-features=AutomationControlled",
310
+ "--disable-web-security",
311
+ "--disable-features=VizDisplayCompositor"
312
+ ])
313
+
314
+ # Add performance arguments
315
+ if self.config.disable_images:
316
+ args["args"].append("--disable-images")
317
+
318
+ return args
319
+
320
+ def _get_context_args(self) -> Dict[str, Any]:
321
+ """Get browser context arguments"""
322
+ args = {
323
+ "viewport": {
324
+ "width": self.config.viewport_width,
325
+ "height": self.config.viewport_height
326
+ }
327
+ }
328
+
329
+ # User agent
330
+ if self.config.user_agent:
331
+ args["user_agent"] = self.config.user_agent
332
+
333
+ # Proxy
334
+ if self.config.proxy_url:
335
+ proxy_config = {"server": self.config.proxy_url}
336
+ if self.config.proxy_username:
337
+ proxy_config["username"] = self.config.proxy_username
338
+ if self.config.proxy_password:
339
+ proxy_config["password"] = self.config.proxy_password
340
+ args["proxy"] = proxy_config
341
+
342
+ # Disable resources
343
+ if self.config.disable_javascript:
344
+ args["java_script_enabled"] = False
345
+
346
+ return args
347
+
348
+ async def _setup_stealth(self) -> None:
349
+ """Setup stealth mode"""
350
+ # Add stealth scripts
351
+ await self._page.add_init_script("""
352
+ // Remove webdriver property
353
+ Object.defineProperty(navigator, 'webdriver', {
354
+ get: () => undefined,
355
+ });
356
+
357
+ // Mock plugins
358
+ Object.defineProperty(navigator, 'plugins', {
359
+ get: () => [1, 2, 3, 4, 5],
360
+ });
361
+
362
+ // Mock languages
363
+ Object.defineProperty(navigator, 'languages', {
364
+ get: () => ['en-US', 'en'],
365
+ });
366
+ """)
367
+
368
+ async def _load_cookies(self) -> None:
369
+ """Load cookies from file"""
370
+ if not self.config.persist_cookies or not self.config.cookies_file:
371
+ return
372
+
373
+ try:
374
+ if self.config.cookies_file.exists():
375
+ with open(self.config.cookies_file, 'r') as f:
376
+ cookies = json.load(f)
377
+ await self._context.add_cookies(cookies)
378
+ self.logger.info(f"Loaded {len(cookies)} cookies")
379
+ except Exception as e:
380
+ self.logger.warning(f"Failed to load cookies: {e}")
381
+
382
+ async def _save_cookies(self) -> None:
383
+ """Save cookies to file"""
384
+ if not self.config.persist_cookies or not self.config.cookies_file:
385
+ return
386
+
387
+ try:
388
+ cookies = await self._context.cookies()
389
+ with open(self.config.cookies_file, 'w') as f:
390
+ json.dump(cookies, f, indent=2)
391
+ self.logger.info(f"Saved {len(cookies)} cookies")
392
+ except Exception as e:
393
+ self.logger.warning(f"Failed to save cookies: {e}")
394
+
395
+ async def _save_screenshot(self, name: str) -> None:
396
+ """Save debug screenshot"""
397
+ try:
398
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
399
+ filename = f"{name}_{timestamp}.png"
400
+ await self.screenshot(filename)
401
+ except Exception as e:
402
+ self.logger.warning(f"Failed to save debug screenshot: {e}")
403
+
404
+ # ==========================================
405
+ # CONTEXT MANAGER SUPPORT
406
+ # ==========================================
407
+
408
+ async def __aenter__(self):
409
+ """Async context manager entry"""
410
+ await self.initialize()
411
+ return self
412
+
413
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
414
+ """Async context manager exit"""
415
+ await self.cleanup()
416
+ return False
@@ -0,0 +1,28 @@
1
+ """
2
+ UnrealOn Driver exceptions
3
+ """
4
+
5
+
6
+ class ParserError(Exception):
7
+ """Base exception for parser errors"""
8
+ pass
9
+
10
+
11
+ class BrowserError(ParserError):
12
+ """Browser-related errors"""
13
+ pass
14
+
15
+
16
+ class HTMLCleaningError(ParserError):
17
+ """HTML cleaning errors"""
18
+ pass
19
+
20
+
21
+ class ConfigurationError(ParserError):
22
+ """Configuration errors"""
23
+ pass
24
+
25
+
26
+ class ConnectionError(ParserError):
27
+ """Connection errors"""
28
+ pass
@@ -0,0 +1,55 @@
1
+ """
2
+ Parser management system with specialized managers
3
+
4
+ Strict Pydantic v2 compliance and type safety
5
+ """
6
+
7
+ from .parser_manager import ParserManager, ParserManagerConfig, ParserStats, get_parser_manager, quick_parse
8
+ from .daemon_manager import DaemonManager, DaemonStatus
9
+ from .cli_manager import CLIManager
10
+ from .managers import (
11
+ ConfigManager, ParserConfig,
12
+ ResultManager, ParseResult, ParseMetrics, OperationStatus,
13
+ ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity,
14
+ LoggingManager, LoggingConfig, LogLevel, LogContext,
15
+ HTMLManager, HTMLCleaningConfig, HTMLCleaningStats,
16
+ BrowserManager, BrowserConfig, BrowserStats
17
+ )
18
+
19
+ __all__ = [
20
+ # Main Parser Manager
21
+ "ParserManager",
22
+ "ParserManagerConfig",
23
+ "ParserStats",
24
+ "get_parser_manager",
25
+ "quick_parse",
26
+
27
+ # Daemon Manager
28
+ "DaemonManager",
29
+ "DaemonStatus",
30
+
31
+ # CLI Manager
32
+ "CLIManager",
33
+
34
+ # Individual Managers
35
+ "ConfigManager",
36
+ "ParserConfig",
37
+ "ResultManager",
38
+ "ParseResult",
39
+ "ParseMetrics",
40
+ "OperationStatus",
41
+ "ErrorManager",
42
+ "RetryConfig",
43
+ "ErrorInfo",
44
+ "ErrorSeverity",
45
+ "LoggingManager",
46
+ "LoggingConfig",
47
+ "LogLevel",
48
+ "LogContext",
49
+ "HTMLManager",
50
+ "HTMLCleaningConfig",
51
+ "HTMLCleaningStats",
52
+ "BrowserManager",
53
+ "BrowserConfig",
54
+ "BrowserStats"
55
+ ]
@@ -0,0 +1,141 @@
1
+ """
2
+ CLI Manager - Base class for parser CLI interfaces
3
+
4
+ Strict Pydantic v2 compliance and type safety
5
+ """
6
+
7
+ import asyncio
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import List, Optional, Any, Dict
11
+ import click
12
+
13
+ from .parser_manager import ParserManager, ParserManagerConfig
14
+ from .managers import ParserConfig, LoggingConfig, HTMLCleaningConfig, BrowserConfig
15
+
16
+
17
+ class CLIManager(ParserManager):
18
+ """Base CLI manager with common CLI functionality."""
19
+
20
+ def __init__(self, parser_name: str, parser_type: str, system_dir: str,
21
+ bridge_enabled: bool = False, websocket_url: str = "ws://localhost:8000/ws"):
22
+ # Create parser config
23
+ parser_config = ParserConfig(
24
+ parser_name=parser_name,
25
+ parser_type=parser_type,
26
+ system_dir=Path(system_dir)
27
+ )
28
+
29
+ # Create logging config
30
+ logging_config = LoggingConfig(parser_name=parser_name)
31
+
32
+ # Create other configs
33
+ html_config = HTMLCleaningConfig()
34
+ browser_config = BrowserConfig()
35
+
36
+ # Create manager config
37
+ manager_config = ParserManagerConfig(
38
+ parser_config=parser_config,
39
+ logging_config=logging_config,
40
+ html_config=html_config,
41
+ browser_config=browser_config,
42
+ bridge_enabled=bridge_enabled
43
+ )
44
+
45
+ super().__init__(manager_config)
46
+
47
+ async def run_parse_command(self, urls: Optional[List[str]] = None) -> bool:
48
+ """Run parse command."""
49
+ try:
50
+ await self.initialize()
51
+
52
+ if urls:
53
+ click.echo(f"🚀 Parsing {len(urls)} URLs...")
54
+ results = []
55
+ for url in urls:
56
+ result = await self.parse_url(url)
57
+ results.append(result)
58
+
59
+ success_count = sum(1 for r in results if r.get("success") == "true")
60
+ click.echo(f"✅ Parse completed: {success_count}/{len(results)} URLs successful")
61
+ return success_count > 0
62
+ else:
63
+ click.echo("❌ No URLs provided", err=True)
64
+ return False
65
+
66
+ except Exception as e:
67
+ click.echo(f"❌ Parse error: {e}", err=True)
68
+ return False
69
+ finally:
70
+ await self.cleanup()
71
+
72
+ async def run_test_command(self) -> bool:
73
+ """Run test command."""
74
+ try:
75
+ click.echo("🧪 Running test...")
76
+
77
+ await self.initialize()
78
+ click.echo("✅ Parser initialization: OK")
79
+
80
+ # Test HTML cleaning
81
+ html = "<html><body><h1>Test</h1></body></html>"
82
+ cleaned = await self.clean_html(html)
83
+ click.echo(f"✅ HTML cleaning: OK ({len(html)} → {len(cleaned)} chars)")
84
+
85
+ click.echo("✅ All tests passed!")
86
+ return True
87
+
88
+ except Exception as e:
89
+ click.echo(f"❌ Test failed: {e}", err=True)
90
+ return False
91
+ finally:
92
+ await self.cleanup()
93
+
94
+ async def run_quick_command(self, urls: List[str]) -> bool:
95
+ """Run quick parse command."""
96
+ try:
97
+ click.echo(f"⚡ Quick parse of {len(urls)} URLs...")
98
+
99
+ await self.initialize()
100
+ results = []
101
+ for url in urls:
102
+ result = await self.parse_url(url)
103
+ results.append(result)
104
+
105
+ success_count = sum(1 for r in results if r.get("success") == "true")
106
+ click.echo(f"✅ Quick parse completed: {success_count}/{len(results)} URLs successful")
107
+
108
+ return success_count > 0
109
+
110
+ except Exception as e:
111
+ click.echo(f"❌ Quick parse error: {e}", err=True)
112
+ return False
113
+ finally:
114
+ await self.cleanup()
115
+
116
+ def show_status(self, config_data: Dict[str, Any]) -> None:
117
+ """Show parser status."""
118
+ click.echo("📊 Parser Status")
119
+ click.echo("=" * 40)
120
+ click.echo(f"Parser Name: {self.config.parser_name}")
121
+ click.echo(f"Parser Type: {self.config.parser_type}")
122
+ click.echo(f"System Dir: {self.config.system_dir}")
123
+ click.echo(f"Bridge: {'Enabled' if self.config.bridge_enabled else 'Disabled'}")
124
+ if self.config.bridge_enabled:
125
+ click.echo(f" URL: {self.config.websocket_url}")
126
+
127
+ @staticmethod
128
+ def create_config_file(config_path: Path, create_func) -> None:
129
+ """Create configuration file."""
130
+ try:
131
+ create_func(config_path)
132
+ click.echo(f"✅ Configuration file created: {config_path}")
133
+ click.echo(" Edit the file to customize your parser settings")
134
+ except Exception as e:
135
+ click.echo(f"❌ Failed to create configuration: {e}", err=True)
136
+
137
+ @staticmethod
138
+ def run_async_command(coro):
139
+ """Helper to run async command and exit with proper code."""
140
+ success = asyncio.run(coro)
141
+ sys.exit(0 if success else 1)