unrealon 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.9.dist-info/METADATA +0 -810
  71. unrealon-1.0.9.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -1,638 +0,0 @@
1
- """
2
- 🚀 Revolutionary Parser Class - UnrealOn Driver v3.0
3
-
4
- Zero-configuration web automation with AI-first design and multiple execution modes.
5
- Built from scratch for modern web automation without legacy complexity.
6
- """
7
-
8
- import asyncio
9
- import os
10
- import sys
11
- from datetime import datetime, timezone
12
- from typing import Any, Dict, List, Optional, Union, Callable
13
- from pathlib import Path
14
-
15
- # Core exceptions
16
- from .exceptions import ParserError, ConfigurationError
17
-
18
- # Service integrations
19
- from unrealon_driver.src.services.browser_service import BrowserService
20
- from unrealon_driver.src.services.llm import LLMService
21
- from unrealon_driver.src.services.llm.browser_llm_service import BrowserLLMService
22
- from unrealon_driver.src.services.websocket_service import WebSocketService
23
- from unrealon_driver.src.logging import DriverLogger, ensure_driver_logger
24
- from unrealon_driver.src.services.metrics_service import MetricsService
25
-
26
- # Configuration system
27
- from unrealon_driver.src.config.auto_config import AutoConfig
28
-
29
- # Execution modes
30
- from unrealon_driver.src.execution.test_mode import TestMode
31
- from unrealon_driver.src.execution.daemon_mode import DaemonMode
32
- from unrealon_driver.src.execution.scheduled_mode import ScheduledMode
33
- from unrealon_driver.src.execution.interactive_mode import InteractiveMode
34
-
35
- # Type-safe execution configuration
36
- from unrealon_driver.src.dto.execution import (
37
- ParserTestConfig,
38
- DaemonModeConfig,
39
- ScheduledModeConfig,
40
- InteractiveModeConfig,
41
- )
42
-
43
-
44
- class Parser:
45
- """
46
- 🚀 Revolutionary Parser Class
47
-
48
- Zero-configuration web automation with AI-first design.
49
-
50
- Features:
51
- - 🎯 Zero Configuration: Everything works out of the box
52
- - 🤖 AI-First Design: LLM integration as core feature
53
- - 🔌 Multiple Execution Modes: test, daemon, scheduled, interactive
54
- - 🌐 Smart Browser: Intelligent automation with stealth
55
- - ⏰ Human-Readable Scheduling: "30m", "1h", "daily"
56
- - 📊 Built-in Monitoring: Enterprise observability
57
-
58
- Quick Start:
59
- class MyParser(Parser):
60
- async def parse(self):
61
- # Simple browser extraction
62
- return await self.browser.extract("https://example.com", ".item")
63
-
64
- # AI-powered extraction (browser + LLM combined)
65
- return await self.browser_llm.extract("https://example.com", schema={
66
- "products": [{"name": "string", "price": "number"}]
67
- })
68
-
69
- # Development testing
70
- result = await MyParser().test()
71
-
72
- # Production daemon (WebSocket service)
73
- await MyParser().daemon()
74
-
75
- # Automated scheduling
76
- await MyParser().schedule(every="30m")
77
- """
78
-
79
- def __init__(
80
- self,
81
- parser_id: Optional[str] = None,
82
- parser_name: Optional[str] = None,
83
- config: Optional[AutoConfig] = None,
84
- **kwargs,
85
- ):
86
- """
87
- Initialize parser with zero configuration.
88
-
89
- Args:
90
- parser_id: Unique identifier (auto-generated if not provided)
91
- parser_name: Human-readable name (auto-generated if not provided)
92
- config: Optional configuration override
93
- **kwargs: Additional configuration options
94
- """
95
- # Auto-generate identifiers
96
- self.parser_id = parser_id or self._generate_parser_id()
97
- self.parser_name = parser_name or self._generate_parser_name()
98
-
99
- # Initialize auto-configuration
100
- self._config: AutoConfig = AutoConfig.create_development(
101
- self.parser_id, config=config
102
- )
103
-
104
- # Service initialization (lazy-loaded)
105
- self._browser: BrowserService = None
106
- self._llm: LLMService = None
107
- self._browser_llm: BrowserLLMService = None
108
- self._websocket: WebSocketService = None
109
- self._logger: DriverLogger = None
110
- self._metrics: MetricsService = None
111
-
112
- # Execution mode handlers
113
- self._test_mode: TestMode = None
114
- self._daemon_mode: DaemonMode = None
115
- self._scheduled_mode: ScheduledMode = None
116
- self._interactive_mode: InteractiveMode = None
117
-
118
- # Runtime state
119
- self._is_initialized = False
120
- self._shutdown_event = asyncio.Event()
121
-
122
- # ==========================================
123
- # ZERO-CONFIG SERVICE PROPERTIES
124
- # ==========================================
125
-
126
- @property
127
- def browser(self) -> BrowserService:
128
- """Smart browser service with zero configuration."""
129
- if self._browser is None:
130
- self._browser = BrowserService(
131
- config=self._config.browser_config,
132
- logger=self.logger,
133
- metrics=self.metrics,
134
- )
135
- return self._browser
136
-
137
- @property
138
- def llm(self) -> LLMService:
139
- """AI-powered extraction service."""
140
- if self._llm is None:
141
- self._llm = LLMService(
142
- config=self._config.llm_config,
143
- logger=self.logger,
144
- )
145
- return self._llm
146
-
147
- @property
148
- def browser_llm(self) -> BrowserLLMService:
149
- """🔥 AI-powered browser service - auto-configured and ready to use."""
150
- if self._browser_llm is None:
151
- self._browser_llm = BrowserLLMService(
152
- auto_config=self._config,
153
- logger=self.logger,
154
- metrics=self.metrics,
155
- )
156
- return self._browser_llm
157
-
158
- @property
159
- def websocket(self) -> WebSocketService:
160
- """WebSocket service for daemon mode."""
161
- if self._websocket is None:
162
- self._websocket = WebSocketService(
163
- config=self._config.websocket_config,
164
- logger=self.logger,
165
- metrics=self.metrics,
166
- parser_id=self.parser_id,
167
- )
168
- return self._websocket
169
-
170
- @property
171
- def logger(self) -> DriverLogger:
172
- """Enterprise logging service with SDK integration."""
173
- if self._logger is None:
174
- self._logger = ensure_driver_logger(
175
- parser_id=self.parser_id,
176
- parser_name=self.parser_name,
177
- system_dir=str(self._config.system_dir) if self._config.system_dir else None,
178
- )
179
- return self._logger
180
-
181
- @property
182
- def metrics(self) -> MetricsService:
183
- """Built-in metrics and monitoring."""
184
- if self._metrics is None:
185
- self._metrics = MetricsService(
186
- config=self._config.metrics_config, parser_id=self.parser_id
187
- )
188
- return self._metrics
189
-
190
- # ==========================================
191
- # CORE PARSING METHOD
192
- # ==========================================
193
-
194
- async def parse(self) -> dict:
195
- """
196
- 🎯 Main parsing method - OVERRIDE THIS
197
-
198
- This is where you implement your parsing logic.
199
-
200
- Returns:
201
- Dictionary containing parsed data
202
-
203
- Example:
204
- async def parse(self):
205
- # Simple extraction
206
- headlines = await self.browser.extract(
207
- "https://news.com",
208
- ".headline"
209
- )
210
-
211
- # AI-powered extraction
212
- products = await self.llm.extract(html, schema={
213
- "products": [{"name": "string", "price": "number"}]
214
- })
215
-
216
- return {"headlines": headlines, "products": products}
217
- """
218
- raise NotImplementedError(
219
- f"Parser '{self.parser_name}' must implement the parse() method. "
220
- f"This is where you define your parsing logic."
221
- )
222
-
223
- # ==========================================
224
- # EXECUTION MODES
225
- # ==========================================
226
-
227
- async def test(self, **kwargs) -> dict:
228
- """
229
- 🧪 Test Mode - Development and debugging
230
-
231
- Single execution for development and testing.
232
-
233
- Features:
234
- - Detailed logging and debugging
235
- - Error reporting with suggestions
236
- - Performance metrics
237
- - Results visualization
238
-
239
- Args:
240
- **kwargs: Test configuration options
241
-
242
- Returns:
243
- Parsed data with metadata
244
-
245
- Example:
246
- result = await parser.test()
247
- print(result)
248
- """
249
- if self._test_mode is None:
250
- # Create type-safe test configuration
251
- test_config = ParserTestConfig(
252
- verbose=kwargs.get("verbose", False),
253
- show_browser=kwargs.get("show_browser", False),
254
- save_screenshots=kwargs.get("save_screenshots", False),
255
- timeout_seconds=kwargs.get("timeout", 60),
256
- )
257
-
258
- self._test_mode = TestMode(parser=self, config=test_config)
259
-
260
- return await self._test_mode.execute(**kwargs)
261
-
262
- async def daemon(
263
- self, server: Optional[str] = None, api_key: Optional[str] = None, **kwargs
264
- ) -> None:
265
- """
266
- 🔌 Daemon Mode - Production WebSocket service
267
-
268
- Connects to UnrealOn server as persistent WebSocket service.
269
-
270
- Features:
271
- - Auto-connection with reconnection
272
- - Command handling and response
273
- - Health monitoring and reporting
274
- - Graceful shutdown handling
275
- - Load balancing support
276
-
277
- Args:
278
- server: WebSocket server URL (auto-detected if not provided)
279
- api_key: Authentication key (auto-detected if not provided)
280
- **kwargs: Daemon configuration options
281
-
282
- Example:
283
- # Auto-configured daemon
284
- await parser.daemon()
285
-
286
- # Custom server
287
- await parser.daemon(
288
- server="wss://my-server.com",
289
- api_key="my_key"
290
- )
291
- """
292
- if self._daemon_mode is None:
293
- self._daemon_mode = DaemonMode(
294
- parser=self, config=self._config.daemon_config
295
- )
296
-
297
- await self._daemon_mode.start(server=server, api_key=api_key, **kwargs)
298
-
299
- async def schedule(self, every: str, at: Optional[str] = None, **kwargs) -> None:
300
- """
301
- ⏰ Scheduled Mode - Automated recurring execution
302
-
303
- Human-readable scheduling with enterprise monitoring.
304
-
305
- Features:
306
- - Natural language intervals ("30m", "1h", "daily")
307
- - Smart load balancing with jitter
308
- - Error recovery and retries
309
- - Health monitoring and alerting
310
- - Production-ready reliability
311
-
312
- Args:
313
- every: Human-readable interval ("30m", "1h", "daily", etc.)
314
- at: Specific time for daily/weekly schedules ("09:00")
315
- **kwargs: Scheduling configuration options
316
-
317
- Examples:
318
- # Every 30 minutes
319
- await parser.schedule(every="30m")
320
-
321
- # Daily at 9 AM
322
- await parser.schedule(every="daily", at="09:00")
323
-
324
- # Every hour with monitoring
325
- await parser.schedule(
326
- every="1h",
327
- monitoring=True,
328
- error_handling=True
329
- )
330
- """
331
- if self._scheduled_mode is None:
332
- self._scheduled_mode = ScheduledMode(
333
- parser=self, config=self._config.scheduled_config
334
- )
335
-
336
- await self._scheduled_mode.start(every=every, at=at, **kwargs)
337
-
338
- async def interactive(self, **kwargs) -> None:
339
- """
340
- 🎮 Interactive Mode - Live development and debugging
341
-
342
- Interactive shell for live development and testing.
343
-
344
- Features:
345
- - Live parser execution
346
- - Real-time result inspection
347
- - Dynamic configuration changes
348
- - Browser debugging tools
349
- - Performance profiling
350
-
351
- Args:
352
- **kwargs: Interactive mode options
353
-
354
- Example:
355
- await parser.interactive()
356
- """
357
- if self._interactive_mode is None:
358
- self._interactive_mode = InteractiveMode(
359
- parser=self, config=self._config.interactive_config
360
- )
361
-
362
- await self._interactive_mode.start(**kwargs)
363
-
364
- # ==========================================
365
- # UTILITY METHODS
366
- # ==========================================
367
-
368
- def now(self) -> str:
369
- """Get current timestamp in ISO format."""
370
- return datetime.now(timezone.utc).isoformat()
371
-
372
- def get_system_info(self) -> dict:
373
- """Get system information for debugging."""
374
- import psutil
375
- import os
376
-
377
- process = psutil.Process(os.getpid())
378
- memory_mb = process.memory_info().rss / 1024 / 1024
379
-
380
- return {
381
- "parser_id": self.parser_id,
382
- "parser_name": self.parser_name,
383
- "python_version": sys.version,
384
- "platform": sys.platform,
385
- "working_directory": str(Path.cwd()),
386
- "memory_usage_mb": round(memory_mb, 2),
387
- "environment": dict(os.environ),
388
- "config": self._config.model_dump(),
389
- }
390
-
391
- async def health_check(self) -> dict:
392
- """Comprehensive health check."""
393
- health = {
394
- "status": "healthy",
395
- "timestamp": self.now(),
396
- "parser_id": self.parser_id,
397
- "services": {},
398
- }
399
-
400
- # Check each service individually
401
- service_errors = []
402
-
403
- # Check browser service
404
- if self._browser:
405
- try:
406
- health["services"]["browser"] = await self._browser.health_check()
407
- except Exception as e:
408
- health["services"]["browser"] = {"status": "error", "error": str(e)}
409
- service_errors.append(f"browser: {e}")
410
-
411
- # Check LLM service
412
- if self._llm:
413
- try:
414
- health["services"]["llm"] = await self._llm.health_check()
415
- except Exception as e:
416
- health["services"]["llm"] = {"status": "error", "error": str(e)}
417
- service_errors.append(f"llm: {e}")
418
-
419
- # Check Browser LLM service
420
- if self._browser_llm:
421
- try:
422
- health["services"]["browser_llm"] = await self._browser_llm.health_check()
423
- except Exception as e:
424
- health["services"]["browser_llm"] = {"status": "error", "error": str(e)}
425
- service_errors.append(f"browser_llm: {e}")
426
-
427
- # Check WebSocket service
428
- if self._websocket:
429
- try:
430
- health["services"]["websocket"] = await self._websocket.health_check()
431
- except Exception as e:
432
- health["services"]["websocket"] = {"status": "error", "error": str(e)}
433
- service_errors.append(f"websocket: {e}")
434
-
435
- # Check logger service
436
- if self._logger:
437
- try:
438
- health["services"]["logger"] = self._logger.health_check()
439
- except Exception as e:
440
- health["services"]["logger"] = {"status": "error", "error": str(e)}
441
- service_errors.append(f"logger: {e}")
442
-
443
- # Check metrics service
444
- if self._metrics:
445
- try:
446
- health["services"]["metrics"] = self._metrics.health_check()
447
- except Exception as e:
448
- health["services"]["metrics"] = {"status": "error", "error": str(e)}
449
- service_errors.append(f"metrics: {e}")
450
-
451
- # Determine overall status
452
- if service_errors:
453
- health["status"] = "degraded" # Instead of "unhealthy"
454
- health["service_errors"] = service_errors
455
-
456
- # Add system info as expected by tests
457
- health["system_info"] = {
458
- "parser_version": "3.0",
459
- "environment": getattr(self._config, "environment", "development"),
460
- "active_services": len(health["services"]),
461
- }
462
-
463
- return health
464
-
465
- async def cleanup(self):
466
- """Clean up resources gracefully."""
467
- self.logger.info("Starting parser cleanup...")
468
-
469
- # Cleanup services (gracefully handle errors)
470
- cleanup_errors = []
471
-
472
- if self._browser:
473
- try:
474
- await self._browser.cleanup()
475
- except Exception as e:
476
- cleanup_errors.append(f"browser: {e}")
477
-
478
- if self._llm:
479
- try:
480
- await self._llm.cleanup()
481
- except Exception as e:
482
- cleanup_errors.append(f"llm: {e}")
483
-
484
- if self._browser_llm:
485
- try:
486
- await self._browser_llm.cleanup()
487
- except Exception as e:
488
- cleanup_errors.append(f"browser_llm: {e}")
489
-
490
- if self._websocket:
491
- try:
492
- await self._websocket.cleanup()
493
- except Exception as e:
494
- cleanup_errors.append(f"websocket: {e}")
495
-
496
- if self._logger:
497
- try:
498
- await self._logger.cleanup()
499
- except Exception as e:
500
- cleanup_errors.append(f"logger: {e}")
501
-
502
- if self._metrics:
503
- try:
504
- await self._metrics.cleanup()
505
- except Exception as e:
506
- cleanup_errors.append(f"metrics: {e}")
507
-
508
- # Log cleanup errors but don't raise
509
- if cleanup_errors:
510
- self.logger.warning(f"Cleanup errors: {'; '.join(cleanup_errors)}")
511
-
512
- self.logger.info("Parser cleanup completed")
513
-
514
- # ==========================================
515
- # PRIVATE METHODS
516
- # ==========================================
517
-
518
- def _generate_parser_id(self) -> str:
519
- """Generate unique parser ID."""
520
- class_name = self.__class__.__name__.lower()
521
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
522
- return f"{class_name}_{timestamp}"
523
-
524
- def _generate_parser_name(self) -> str:
525
- """Generate human-readable parser name."""
526
- class_name = self.__class__.__name__
527
- if class_name.endswith("Parser"):
528
- class_name = class_name[:-6] # Remove "Parser" suffix
529
-
530
- # Convert CamelCase to Title Case
531
- import re
532
-
533
- name = re.sub(r"([A-Z])", r" \1", class_name).strip()
534
- return name if name else f"UnrealOn Parser {self.parser_id[-8:]}"
535
-
536
- def __repr__(self) -> str:
537
- return f"<{self.__class__.__name__}(id='{self.parser_id}', name='{self.parser_name}')>"
538
-
539
- def __str__(self) -> str:
540
- return f"{self.parser_name} ({self.parser_id})"
541
-
542
- # ==========================================
543
- # CONTEXT MANAGER SUPPORT
544
- # ==========================================
545
-
546
- async def __aenter__(self):
547
- """Async context manager entry."""
548
- await self._initialize()
549
- return self
550
-
551
- async def __aexit__(self, exc_type, exc_val, exc_tb):
552
- """Async context manager exit."""
553
- await self.cleanup()
554
- # Return None/False to let exceptions propagate
555
- return False
556
-
557
- async def _initialize(self):
558
- """Initialize parser for context manager usage."""
559
- if not self._is_initialized:
560
- self.logger.info(f"Initializing parser: {self.parser_name}")
561
- self._is_initialized = True
562
-
563
- def _generate_parser_id(self) -> str:
564
- """Generate unique parser ID."""
565
- import time
566
- import uuid
567
-
568
- timestamp = int(time.time() * 1000000) # Microseconds for uniqueness
569
- short_uuid = str(uuid.uuid4())[:8]
570
- return f"parser_{timestamp}_{short_uuid}"
571
-
572
- def _generate_parser_name(self) -> str:
573
- """Generate parser name."""
574
- return f"UnrealOn Parser {self.parser_id[-8:]}"
575
-
576
-
577
- # ==========================================
578
- # CONVENIENCE FUNCTIONS
579
- # ==========================================
580
-
581
-
582
- async def quick_extract(url: str, selector: str, **kwargs) -> List[str]:
583
- """
584
- 🚀 Quick extraction without creating parser class
585
-
586
- Convenience function for simple one-off extractions.
587
-
588
- Args:
589
- url: Target URL
590
- selector: CSS selector
591
- **kwargs: Additional options
592
-
593
- Returns:
594
- List of extracted text
595
-
596
- Example:
597
- headlines = await quick_extract(
598
- "https://news.com",
599
- ".headline"
600
- )
601
- """
602
-
603
- class QuickParser(Parser):
604
- async def parse(self):
605
- return await self.browser.extract(url, selector, **kwargs)
606
-
607
- result = await QuickParser().test()
608
- return result.get("data", [])
609
-
610
-
611
- async def quick_extract_with_ai(url: str, schema: dict, **kwargs) -> dict:
612
- """
613
- 🤖 Quick AI extraction without creating parser class
614
-
615
- Convenience function for AI-powered extractions.
616
-
617
- Args:
618
- url: Target URL
619
- schema: Data schema for AI extraction
620
- **kwargs: Additional options
621
-
622
- Returns:
623
- Structured data extracted by AI
624
-
625
- Example:
626
- products = await quick_extract_with_ai(
627
- "https://shop.com",
628
- schema={"products": [{"name": "string", "price": "number"}]}
629
- )
630
- """
631
-
632
- class QuickAIParser(Parser):
633
- async def parse(self):
634
- html = await self.browser.get_html(url)
635
- return await self.llm.extract(html, schema, **kwargs)
636
-
637
- result = await QuickAIParser().test()
638
- return result.get("data", {})
@@ -1,66 +0,0 @@
1
- """
2
- Data Transfer Objects for UnrealOn Driver v3.0
3
-
4
- Type-safe configuration and data models using Pydantic v2.
5
- COMPLIANCE: 100% Pydantic v2 compliant.
6
- """
7
-
8
- from .cli import ParserInstanceConfig, create_parser_config
9
- from .config import LogLevel
10
- from .execution import (
11
- ParserTestConfig,
12
- DaemonModeConfig,
13
- ScheduledModeConfig,
14
- InteractiveModeConfig,
15
- ExecutionResult,
16
- ErrorInfo,
17
- PerformanceMetrics,
18
- ExecutionEnvironment,
19
- ScheduledModeStatus,
20
- DaemonCommandResult,
21
- DaemonStatusResult,
22
- DaemonHealthResult,
23
- )
24
- from .events import (
25
- DriverEventType,
26
- DriverEventContext,
27
- DriverEventMetrics,
28
- BROWSER_EVENTS,
29
- PARSER_EVENTS,
30
- LLM_EVENTS,
31
- SCHEDULER_EVENTS,
32
- WEBSOCKET_EVENTS,
33
- METRICS_EVENTS,
34
- ERROR_EVENTS,
35
- )
36
-
37
- __all__ = [
38
- "ParserInstanceConfig",
39
- "create_parser_config",
40
- "LogLevel",
41
- # Execution models
42
- "ParserTestConfig",
43
- "DaemonModeConfig",
44
- "ScheduledModeConfig",
45
- "InteractiveModeConfig",
46
- "ExecutionResult",
47
- "ErrorInfo",
48
- "PerformanceMetrics",
49
- "ExecutionEnvironment",
50
- # Daemon models
51
- "ScheduledModeStatus",
52
- "DaemonCommandResult",
53
- "DaemonStatusResult",
54
- "DaemonHealthResult",
55
- # Event models
56
- "DriverEventType",
57
- "DriverEventContext",
58
- "DriverEventMetrics",
59
- "BROWSER_EVENTS",
60
- "PARSER_EVENTS",
61
- "LLM_EVENTS",
62
- "SCHEDULER_EVENTS",
63
- "WEBSOCKET_EVENTS",
64
- "METRICS_EVENTS",
65
- "ERROR_EVENTS",
66
- ]