unrealon 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.8.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.8.dist-info/METADATA +0 -803
  71. unrealon-1.0.8.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -0,0 +1,227 @@
1
+ """
2
+ Daemon Manager - Base class for parser daemons
3
+
4
+ Strict Pydantic v2 compliance and type safety
5
+ """
6
+
7
+ import asyncio
8
+ import signal
9
+ import time
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+ from typing import Optional, Dict, Any
13
+ from pydantic import BaseModel, Field
14
+
15
+ from .parser_manager import ParserManager, ParserManagerConfig
16
+ from .managers import ParserConfig, LoggingConfig, HTMLCleaningConfig, BrowserConfig
17
+
18
+ # RPC removed - all commands go through WebSocket bridge
19
+
20
+
21
+ class DaemonStatus(BaseModel):
22
+ """Daemon status information."""
23
+ running: bool = Field(..., description="Whether daemon is running")
24
+ parser_id: str = Field(..., description="Parser identifier")
25
+ started_at: datetime = Field(..., description="Daemon start time")
26
+ uptime_seconds: float = Field(..., description="Uptime in seconds")
27
+ schedule_enabled: bool = Field(default=False, description="Whether scheduling is active")
28
+ next_run_at: Optional[datetime] = Field(default=None, description="Next scheduled run")
29
+ total_runs: int = Field(default=0, description="Total completed runs")
30
+ successful_runs: int = Field(default=0, description="Successful runs")
31
+ failed_runs: int = Field(default=0, description="Failed runs")
32
+
33
+
34
+ class DaemonManager(ParserManager):
35
+ """Base daemon manager with scheduling and status display."""
36
+
37
+ def __init__(self, parser_name: str, parser_type: str, system_dir: str,
38
+ bridge_enabled: bool = False, websocket_url: str = "ws://localhost:8000/ws"):
39
+ # Create parser config
40
+ parser_config = ParserConfig(
41
+ parser_name=parser_name,
42
+ parser_type=parser_type,
43
+ system_dir=Path(system_dir)
44
+ )
45
+
46
+ # Create logging config
47
+ logging_config = LoggingConfig(parser_name=parser_name)
48
+
49
+ # Create other configs
50
+ html_config = HTMLCleaningConfig()
51
+ browser_config = BrowserConfig()
52
+
53
+ # Create manager config
54
+ manager_config = ParserManagerConfig(
55
+ parser_config=parser_config,
56
+ logging_config=logging_config,
57
+ html_config=html_config,
58
+ browser_config=browser_config,
59
+ bridge_enabled=bridge_enabled
60
+ )
61
+
62
+ super().__init__(manager_config)
63
+
64
+ # Daemon state
65
+ self.running = False
66
+ self.started_at: Optional[datetime] = None
67
+ self.next_run_at: Optional[datetime] = None
68
+
69
+ # Statistics
70
+ self.total_runs = 0
71
+ self.successful_runs = 0
72
+ self.failed_runs = 0
73
+
74
+ # Setup signal handlers
75
+ signal.signal(signal.SIGINT, self._signal_handler)
76
+ signal.signal(signal.SIGTERM, self._signal_handler)
77
+
78
+ # RPC removed - commands come through WebSocket bridge
79
+
80
+ def _signal_handler(self, signum: int, frame) -> None:
81
+ """Handle shutdown signals."""
82
+ self.logger.info(f"🛑 Received signal {signum}, shutting down...")
83
+ self.running = False
84
+
85
+ # RPC methods removed - commands handled through WebSocket bridge
86
+
87
+ async def start_daemon(self, schedule_enabled: bool = False, interval_minutes: Optional[int] = None) -> bool:
88
+ """Start the daemon."""
89
+ try:
90
+ self.logger.info("🚀 Starting daemon...")
91
+ self.running = True
92
+ self.started_at = datetime.now()
93
+
94
+ # Initialize parser
95
+ await self.initialize()
96
+
97
+ # RPC server removed - using WebSocket bridge
98
+
99
+ # Calculate next run if scheduling enabled
100
+ if schedule_enabled and interval_minutes:
101
+ self._calculate_next_run(interval_minutes)
102
+
103
+ # Start main loop
104
+ await self._daemon_loop(schedule_enabled, interval_minutes)
105
+
106
+ return True
107
+
108
+ except Exception as e:
109
+ self.logger.error(f"❌ Daemon startup failed: {e}")
110
+ return False
111
+ finally:
112
+ await self.cleanup()
113
+
114
+ def _calculate_next_run(self, interval_minutes: int) -> None:
115
+ """Calculate next scheduled run time."""
116
+ now = datetime.now()
117
+ self.next_run_at = now + timedelta(minutes=interval_minutes)
118
+
119
+ async def _daemon_loop(self, schedule_enabled: bool, interval_minutes: Optional[int]) -> None:
120
+ """Main daemon loop."""
121
+ self.logger.info("🔄 Daemon loop started")
122
+
123
+ if schedule_enabled and self.next_run_at:
124
+ self.logger.info(f"⏰ Next run: {self.next_run_at.strftime('%Y-%m-%d %H:%M:%S')}")
125
+ else:
126
+ self.logger.info("📋 Manual mode")
127
+
128
+ last_status_update = time.time()
129
+
130
+ while self.running:
131
+ try:
132
+ current_time = time.time()
133
+
134
+ # Update status every second
135
+ if current_time - last_status_update >= 1.0:
136
+ self._display_status(schedule_enabled)
137
+ last_status_update = current_time
138
+
139
+ # Check for scheduled run
140
+ if self._should_run_now():
141
+ await self._execute_run()
142
+ if interval_minutes:
143
+ self._calculate_next_run(interval_minutes)
144
+
145
+ await asyncio.sleep(0.1)
146
+
147
+ except Exception as e:
148
+ self.logger.error(f"❌ Daemon loop error: {e}")
149
+ await asyncio.sleep(1)
150
+
151
+ def _display_status(self, schedule_enabled: bool) -> None:
152
+ """Display live status."""
153
+ if not self.running:
154
+ return
155
+
156
+ # Clear previous lines
157
+ print("\033[2K\033[1A" * 3, end="")
158
+
159
+ now = datetime.now()
160
+ uptime = (now - self.started_at).total_seconds() if self.started_at else 0
161
+
162
+ print(f"🕐 {now.strftime('%H:%M:%S')} | ⏱️ Uptime: {int(uptime//3600):02d}:{int((uptime%3600)//60):02d}:{int(uptime%60):02d}")
163
+
164
+ # Schedule status
165
+ if self.next_run_at and schedule_enabled:
166
+ seconds_until = (self.next_run_at - now).total_seconds()
167
+ if seconds_until > 0:
168
+ hours = int(seconds_until // 3600)
169
+ minutes = int((seconds_until % 3600) // 60)
170
+ seconds = int(seconds_until % 60)
171
+ print(f"⏰ Next run in: {hours:02d}:{minutes:02d}:{seconds:02d} | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
172
+ else:
173
+ print(f"🚀 Running now... | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
174
+ else:
175
+ print(f"📋 Manual mode | 📊 Runs: {self.successful_runs}✅ {self.failed_runs}❌")
176
+
177
+ status = "🟢 RUNNING" if self.running else "🔴 STOPPED"
178
+ print(f"{status} | 💾 System: {self.config.system_dir}")
179
+
180
+ def _should_run_now(self) -> bool:
181
+ """Check if should run now."""
182
+ if not self.next_run_at:
183
+ return False
184
+ return datetime.now() >= self.next_run_at
185
+
186
+ async def _execute_run(self) -> None:
187
+ """Execute a parsing run - override in subclass."""
188
+ self.logger.info("🚀 Starting parsing run...")
189
+
190
+ try:
191
+ # Default implementation - override in subclass
192
+ result = await self.parse_url("https://example.com")
193
+
194
+ self.total_runs += 1
195
+
196
+ if result.get("success") == "true":
197
+ self.successful_runs += 1
198
+ self.logger.info("✅ Run completed successfully")
199
+ else:
200
+ self.failed_runs += 1
201
+ self.logger.error("❌ Run failed")
202
+
203
+ except Exception as e:
204
+ self.failed_runs += 1
205
+ self.logger.error(f"❌ Run exception: {e}")
206
+
207
+ def get_status(self) -> DaemonStatus:
208
+ """Get daemon status."""
209
+ now = datetime.now()
210
+ uptime = (now - self.started_at).total_seconds() if self.started_at else 0
211
+
212
+ return DaemonStatus(
213
+ running=self.running,
214
+ parser_id=self.config.parser_config.parser_name,
215
+ started_at=self.started_at or now,
216
+ uptime_seconds=uptime,
217
+ schedule_enabled=bool(self.next_run_at),
218
+ next_run_at=self.next_run_at,
219
+ total_runs=self.total_runs,
220
+ successful_runs=self.successful_runs,
221
+ failed_runs=self.failed_runs
222
+ )
223
+
224
+ async def cleanup(self):
225
+ """Cleanup daemon resources."""
226
+ # RPC server removed - only parent cleanup needed
227
+ await super().cleanup()
@@ -0,0 +1,46 @@
1
+ """
2
+ Parser Managers - Specialized management components
3
+
4
+ All managers follow strict Pydantic v2 compliance and CRITICAL_REQUIREMENTS.md
5
+ """
6
+
7
+ from .config import ConfigManager, ParserConfig
8
+ from .result import ResultManager, ParseResult, ParseMetrics, OperationStatus
9
+ from .error import ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity
10
+ from .logging import LoggingManager, LoggingConfig, LogLevel, LogContext
11
+ from .html import HTMLManager, HTMLCleaningConfig, HTMLCleaningStats
12
+ from .browser import BrowserManager, BrowserConfig, BrowserStats
13
+
14
+ __all__ = [
15
+ # Config Manager
16
+ "ConfigManager",
17
+ "ParserConfig",
18
+
19
+ # Result Manager
20
+ "ResultManager",
21
+ "ParseResult",
22
+ "ParseMetrics",
23
+ "OperationStatus",
24
+
25
+ # Error Manager
26
+ "ErrorManager",
27
+ "RetryConfig",
28
+ "ErrorInfo",
29
+ "ErrorSeverity",
30
+
31
+ # Logging Manager
32
+ "LoggingManager",
33
+ "LoggingConfig",
34
+ "LogLevel",
35
+ "LogContext",
36
+
37
+ # HTML Manager
38
+ "HTMLManager",
39
+ "HTMLCleaningConfig",
40
+ "HTMLCleaningStats",
41
+
42
+ # Browser Manager
43
+ "BrowserManager",
44
+ "BrowserConfig",
45
+ "BrowserStats"
46
+ ]
@@ -0,0 +1,51 @@
1
+ """
2
+ Browser Manager - Wrapper over unrealon_driver.browser
3
+
4
+ Simple wrapper that inherits from the main BrowserManager
5
+ """
6
+
7
+ from typing import Optional, Dict, Any
8
+ from pydantic import BaseModel, Field, ConfigDict
9
+
10
+ from unrealon_driver.browser import BrowserManager as BaseBrowserManager, BrowserConfig as BaseBrowserConfig
11
+
12
+
13
+ class BrowserConfig(BaseBrowserConfig):
14
+ """Extended browser configuration for parser manager"""
15
+
16
+ model_config = ConfigDict(validate_assignment=True, extra="forbid")
17
+
18
+
19
+ class BrowserStats(BaseModel):
20
+ """Browser usage statistics"""
21
+
22
+ model_config = ConfigDict(validate_assignment=True, extra="forbid")
23
+
24
+ pages_visited: int = Field(default=0, ge=0)
25
+ total_load_time: float = Field(default=0.0, ge=0.0)
26
+ average_load_time: float = Field(default=0.0, ge=0.0)
27
+ screenshots_taken: int = Field(default=0, ge=0)
28
+ cookies_saved: int = Field(default=0, ge=0)
29
+ errors_count: int = Field(default=0, ge=0)
30
+ session_duration: float = Field(default=0.0, ge=0.0)
31
+
32
+
33
+ class BrowserManager(BaseBrowserManager):
34
+ """
35
+ 🌐 Browser Manager - Wrapper over base browser manager
36
+
37
+ Simple wrapper that extends the base BrowserManager with parser-specific functionality
38
+ """
39
+
40
+ def __init__(self, config: BrowserConfig):
41
+ super().__init__(config)
42
+ self._stats = BrowserStats()
43
+
44
+ def get_stats(self) -> BrowserStats:
45
+ """Get browser usage statistics"""
46
+ return self._stats
47
+
48
+ async def health_check(self) -> Dict[str, Any]:
49
+ """Browser health check"""
50
+ base_health = await super().health_check()
51
+ return {**base_health, "parser_manager": True}
@@ -0,0 +1,281 @@
1
+ """
2
+ Config Manager - Type-safe configuration management with Pydantic v2
3
+
4
+ Strict compliance with CRITICAL_REQUIREMENTS.md:
5
+ - No Dict[str, Any] usage
6
+ - Complete type annotations
7
+ - Pydantic v2 models everywhere
8
+ - No mutable defaults
9
+ """
10
+
11
+ from typing import Optional, List
12
+ from pathlib import Path
13
+ from pydantic import BaseModel, Field, ConfigDict, field_validator
14
+ import uuid
15
+
16
+
17
+ class ParserConfig(BaseModel):
18
+ """
19
+ Parser configuration with smart defaults and strict typing
20
+
21
+ Zero configuration approach - everything has sensible defaults
22
+ """
23
+ model_config = ConfigDict(
24
+ validate_assignment=True,
25
+ extra="forbid",
26
+ str_strip_whitespace=True
27
+ )
28
+
29
+ # Parser identity
30
+ parser_id: str = Field(
31
+ default_factory=lambda: f"parser_{uuid.uuid4().hex[:8]}",
32
+ description="Unique parser identifier"
33
+ )
34
+ parser_name: str = Field(
35
+ default="UnrealOn Parser",
36
+ description="Human-readable parser name"
37
+ )
38
+ parser_type: str = Field(
39
+ default="generic",
40
+ description="Parser type for classification"
41
+ )
42
+
43
+ # Connection settings
44
+ websocket_url: str = Field(
45
+ default="ws://localhost:8002/ws",
46
+ description="WebSocket bridge URL"
47
+ )
48
+ api_key: Optional[str] = Field(
49
+ default=None,
50
+ description="API key for authentication"
51
+ )
52
+
53
+ # Browser settings
54
+ headless: bool = Field(
55
+ default=True,
56
+ description="Run browser in headless mode"
57
+ )
58
+ stealth_mode: bool = Field(
59
+ default=True,
60
+ description="Enable stealth mode"
61
+ )
62
+ user_agent: Optional[str] = Field(
63
+ default=None,
64
+ description="Custom user agent"
65
+ )
66
+
67
+ # HTML cleaning settings
68
+ aggressive_cleaning: bool = Field(
69
+ default=True,
70
+ description="Enable aggressive HTML cleaning"
71
+ )
72
+ preserve_js_data: bool = Field(
73
+ default=True,
74
+ description="Preserve JavaScript data during cleaning"
75
+ )
76
+
77
+ # Timeouts (in milliseconds)
78
+ page_timeout: int = Field(
79
+ default=30000,
80
+ ge=1000,
81
+ le=300000,
82
+ description="Page load timeout in milliseconds"
83
+ )
84
+ navigation_timeout: int = Field(
85
+ default=30000,
86
+ ge=1000,
87
+ le=300000,
88
+ description="Navigation timeout in milliseconds"
89
+ )
90
+
91
+ # Directories
92
+ system_dir: Optional[Path] = Field(
93
+ default=None,
94
+ description="System directory for logs and data"
95
+ )
96
+ screenshots_dir: Optional[Path] = Field(
97
+ default=None,
98
+ description="Screenshots directory"
99
+ )
100
+
101
+ # Development settings
102
+ debug: bool = Field(
103
+ default=False,
104
+ description="Enable debug mode"
105
+ )
106
+ save_html: bool = Field(
107
+ default=False,
108
+ description="Save HTML files for debugging"
109
+ )
110
+ save_screenshots: bool = Field(
111
+ default=False,
112
+ description="Save screenshots for debugging"
113
+ )
114
+
115
+ @field_validator('parser_name')
116
+ @classmethod
117
+ def validate_parser_name(cls, v: str) -> str:
118
+ """Validate parser name is not empty"""
119
+ if not v.strip():
120
+ raise ValueError("Parser name cannot be empty")
121
+ return v.strip()
122
+
123
+ @field_validator('parser_type')
124
+ @classmethod
125
+ def validate_parser_type(cls, v: str) -> str:
126
+ """Validate parser type"""
127
+ allowed_types = {
128
+ "generic", "ecommerce", "news", "jobs",
129
+ "real_estate", "social_media", "reviews",
130
+ "events", "directory"
131
+ }
132
+ if v not in allowed_types:
133
+ raise ValueError(f"Parser type must be one of: {', '.join(allowed_types)}")
134
+ return v
135
+
136
+ @field_validator('websocket_url')
137
+ @classmethod
138
+ def validate_websocket_url(cls, v: str) -> str:
139
+ """Validate WebSocket URL format"""
140
+ if not v.startswith(('ws://', 'wss://')):
141
+ raise ValueError("WebSocket URL must start with ws:// or wss://")
142
+ return v
143
+
144
+ def model_post_init(self, __context) -> None:
145
+ """Post-initialization setup"""
146
+ # Setup system directory if not provided
147
+ if self.system_dir is None:
148
+ self.system_dir = Path.cwd() / "system"
149
+
150
+ # Setup screenshots directory if not provided
151
+ if self.screenshots_dir is None:
152
+ self.screenshots_dir = self.system_dir / "screenshots"
153
+
154
+ # Create directories
155
+ self.system_dir.mkdir(parents=True, exist_ok=True)
156
+ self.screenshots_dir.mkdir(parents=True, exist_ok=True)
157
+
158
+
159
+ class ConfigManager:
160
+ """
161
+ 🔧 Config Manager - Type-safe configuration management
162
+
163
+ Features:
164
+ - Pydantic v2 validation
165
+ - Environment variable integration
166
+ - Configuration profiles
167
+ - Hot reloading
168
+ - Type safety enforcement
169
+ """
170
+
171
+ def __init__(self, config: Optional[ParserConfig] = None):
172
+ self._config: ParserConfig = config or ParserConfig()
173
+ self._profiles: dict[str, ParserConfig] = {}
174
+ self._current_profile: Optional[str] = None
175
+
176
+ @property
177
+ def config(self) -> ParserConfig:
178
+ """Get current configuration"""
179
+ return self._config
180
+
181
+ def update_config(self, **kwargs) -> None:
182
+ """Update configuration with new values"""
183
+ # Create new config with updated values
184
+ current_data = self._config.model_dump()
185
+ current_data.update(kwargs)
186
+ self._config = ParserConfig.model_validate(current_data)
187
+
188
+ def load_from_dict(self, config_dict: dict[str, str]) -> None:
189
+ """Load configuration from dictionary"""
190
+ self._config = ParserConfig.model_validate(config_dict)
191
+
192
+ def load_from_env(self, prefix: str = "PARSER_") -> None:
193
+ """Load configuration from environment variables"""
194
+ import os
195
+
196
+ env_config = {}
197
+ for key, value in os.environ.items():
198
+ if key.startswith(prefix):
199
+ config_key = key[len(prefix):].lower()
200
+
201
+ # Convert string values to appropriate types
202
+ if config_key in ['headless', 'stealth_mode', 'aggressive_cleaning',
203
+ 'preserve_js_data', 'debug', 'save_html', 'save_screenshots']:
204
+ env_config[config_key] = value.lower() in ('true', '1', 'yes', 'on')
205
+ elif config_key in ['page_timeout', 'navigation_timeout']:
206
+ env_config[config_key] = int(value)
207
+ elif config_key in ['system_dir', 'screenshots_dir']:
208
+ env_config[config_key] = Path(value)
209
+ else:
210
+ env_config[config_key] = value
211
+
212
+ if env_config:
213
+ current_data = self._config.model_dump()
214
+ current_data.update(env_config)
215
+ self._config = ParserConfig.model_validate(current_data)
216
+
217
+ def save_profile(self, name: str) -> None:
218
+ """Save current configuration as a profile"""
219
+ if not name.strip():
220
+ raise ValueError("Profile name cannot be empty")
221
+ self._profiles[name] = ParserConfig.model_validate(self._config.model_dump())
222
+
223
+ def load_profile(self, name: str) -> None:
224
+ """Load configuration from a saved profile"""
225
+ if name not in self._profiles:
226
+ raise ValueError(f"Profile '{name}' not found")
227
+ self._config = ParserConfig.model_validate(self._profiles[name].model_dump())
228
+ self._current_profile = name
229
+
230
+ def get_profiles(self) -> List[str]:
231
+ """Get list of available profiles"""
232
+ return list(self._profiles.keys())
233
+
234
+ def delete_profile(self, name: str) -> None:
235
+ """Delete a saved profile"""
236
+ if name not in self._profiles:
237
+ raise ValueError(f"Profile '{name}' not found")
238
+ del self._profiles[name]
239
+ if self._current_profile == name:
240
+ self._current_profile = None
241
+
242
+ def get_current_profile(self) -> Optional[str]:
243
+ """Get current profile name"""
244
+ return self._current_profile
245
+
246
+ def validate_config(self) -> List[str]:
247
+ """Validate current configuration and return any issues"""
248
+ issues = []
249
+
250
+ # Check directory permissions
251
+ try:
252
+ test_file = self._config.system_dir / ".test"
253
+ test_file.touch()
254
+ test_file.unlink()
255
+ except PermissionError:
256
+ issues.append(f"No write permission for system directory: {self._config.system_dir}")
257
+ except Exception as e:
258
+ issues.append(f"System directory issue: {e}")
259
+
260
+ # Check timeouts are reasonable
261
+ if self._config.page_timeout < 5000:
262
+ issues.append("Page timeout is very low (< 5 seconds)")
263
+ if self._config.navigation_timeout < 5000:
264
+ issues.append("Navigation timeout is very low (< 5 seconds)")
265
+
266
+ return issues
267
+
268
+ def to_dict(self) -> dict[str, str]:
269
+ """Export configuration as dictionary"""
270
+ return self._config.model_dump(mode='json')
271
+
272
+ def to_env_format(self, prefix: str = "PARSER_") -> List[str]:
273
+ """Export configuration as environment variable format"""
274
+ config_dict = self.to_dict()
275
+ env_vars = []
276
+
277
+ for key, value in config_dict.items():
278
+ env_key = f"{prefix}{key.upper()}"
279
+ env_vars.append(f"{env_key}={value}")
280
+
281
+ return env_vars