unrealon 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. unrealon/__init__.py +23 -21
  2. unrealon-1.1.0.dist-info/METADATA +164 -0
  3. unrealon-1.1.0.dist-info/RECORD +82 -0
  4. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info}/WHEEL +1 -1
  5. unrealon-1.1.0.dist-info/entry_points.txt +9 -0
  6. {unrealon-1.0.9.dist-info → unrealon-1.1.0.dist-info/licenses}/LICENSE +1 -1
  7. unrealon_bridge/__init__.py +114 -0
  8. unrealon_bridge/cli.py +316 -0
  9. unrealon_bridge/client/__init__.py +93 -0
  10. unrealon_bridge/client/base.py +78 -0
  11. unrealon_bridge/client/commands.py +89 -0
  12. unrealon_bridge/client/connection.py +90 -0
  13. unrealon_bridge/client/events.py +65 -0
  14. unrealon_bridge/client/health.py +38 -0
  15. unrealon_bridge/client/html_parser.py +146 -0
  16. unrealon_bridge/client/logging.py +139 -0
  17. unrealon_bridge/client/proxy.py +70 -0
  18. unrealon_bridge/client/scheduler.py +450 -0
  19. unrealon_bridge/client/session.py +70 -0
  20. unrealon_bridge/configs/__init__.py +14 -0
  21. unrealon_bridge/configs/bridge_config.py +212 -0
  22. unrealon_bridge/configs/bridge_config.yaml +39 -0
  23. unrealon_bridge/models/__init__.py +138 -0
  24. unrealon_bridge/models/base.py +28 -0
  25. unrealon_bridge/models/command.py +41 -0
  26. unrealon_bridge/models/events.py +40 -0
  27. unrealon_bridge/models/html_parser.py +79 -0
  28. unrealon_bridge/models/logging.py +55 -0
  29. unrealon_bridge/models/parser.py +63 -0
  30. unrealon_bridge/models/proxy.py +41 -0
  31. unrealon_bridge/models/requests.py +95 -0
  32. unrealon_bridge/models/responses.py +88 -0
  33. unrealon_bridge/models/scheduler.py +592 -0
  34. unrealon_bridge/models/session.py +28 -0
  35. unrealon_bridge/server/__init__.py +91 -0
  36. unrealon_bridge/server/base.py +171 -0
  37. unrealon_bridge/server/handlers/__init__.py +23 -0
  38. unrealon_bridge/server/handlers/command.py +110 -0
  39. unrealon_bridge/server/handlers/html_parser.py +139 -0
  40. unrealon_bridge/server/handlers/logging.py +95 -0
  41. unrealon_bridge/server/handlers/parser.py +95 -0
  42. unrealon_bridge/server/handlers/proxy.py +75 -0
  43. unrealon_bridge/server/handlers/scheduler.py +545 -0
  44. unrealon_bridge/server/handlers/session.py +66 -0
  45. unrealon_browser/__init__.py +61 -18
  46. unrealon_browser/{src/cli → cli}/browser_cli.py +6 -13
  47. unrealon_browser/{src/cli → cli}/cookies_cli.py +5 -1
  48. unrealon_browser/{src/core → core}/browser_manager.py +2 -2
  49. unrealon_browser/{src/managers → managers}/captcha.py +1 -1
  50. unrealon_browser/{src/managers → managers}/cookies.py +1 -1
  51. unrealon_browser/managers/logger_bridge.py +231 -0
  52. unrealon_browser/{src/managers → managers}/profile.py +1 -1
  53. unrealon_driver/__init__.py +73 -19
  54. unrealon_driver/browser/__init__.py +8 -0
  55. unrealon_driver/browser/config.py +74 -0
  56. unrealon_driver/browser/manager.py +416 -0
  57. unrealon_driver/exceptions.py +28 -0
  58. unrealon_driver/parser/__init__.py +55 -0
  59. unrealon_driver/parser/cli_manager.py +141 -0
  60. unrealon_driver/parser/daemon_manager.py +227 -0
  61. unrealon_driver/parser/managers/__init__.py +46 -0
  62. unrealon_driver/parser/managers/browser.py +51 -0
  63. unrealon_driver/parser/managers/config.py +281 -0
  64. unrealon_driver/parser/managers/error.py +412 -0
  65. unrealon_driver/parser/managers/html.py +732 -0
  66. unrealon_driver/parser/managers/logging.py +609 -0
  67. unrealon_driver/parser/managers/result.py +321 -0
  68. unrealon_driver/parser/parser_manager.py +628 -0
  69. unrealon/sdk_config.py +0 -88
  70. unrealon-1.0.9.dist-info/METADATA +0 -810
  71. unrealon-1.0.9.dist-info/RECORD +0 -246
  72. unrealon_browser/pyproject.toml +0 -182
  73. unrealon_browser/src/__init__.py +0 -62
  74. unrealon_browser/src/managers/logger_bridge.py +0 -395
  75. unrealon_driver/README.md +0 -204
  76. unrealon_driver/pyproject.toml +0 -187
  77. unrealon_driver/src/__init__.py +0 -90
  78. unrealon_driver/src/cli/__init__.py +0 -10
  79. unrealon_driver/src/cli/main.py +0 -66
  80. unrealon_driver/src/cli/simple.py +0 -510
  81. unrealon_driver/src/config/__init__.py +0 -11
  82. unrealon_driver/src/config/auto_config.py +0 -478
  83. unrealon_driver/src/core/__init__.py +0 -18
  84. unrealon_driver/src/core/exceptions.py +0 -289
  85. unrealon_driver/src/core/parser.py +0 -638
  86. unrealon_driver/src/dto/__init__.py +0 -66
  87. unrealon_driver/src/dto/cli.py +0 -119
  88. unrealon_driver/src/dto/config.py +0 -18
  89. unrealon_driver/src/dto/events.py +0 -237
  90. unrealon_driver/src/dto/execution.py +0 -313
  91. unrealon_driver/src/dto/services.py +0 -311
  92. unrealon_driver/src/execution/__init__.py +0 -23
  93. unrealon_driver/src/execution/daemon_mode.py +0 -317
  94. unrealon_driver/src/execution/interactive_mode.py +0 -88
  95. unrealon_driver/src/execution/modes.py +0 -45
  96. unrealon_driver/src/execution/scheduled_mode.py +0 -209
  97. unrealon_driver/src/execution/test_mode.py +0 -250
  98. unrealon_driver/src/logging/__init__.py +0 -24
  99. unrealon_driver/src/logging/driver_logger.py +0 -512
  100. unrealon_driver/src/services/__init__.py +0 -24
  101. unrealon_driver/src/services/browser_service.py +0 -726
  102. unrealon_driver/src/services/llm/__init__.py +0 -15
  103. unrealon_driver/src/services/llm/browser_llm_service.py +0 -363
  104. unrealon_driver/src/services/llm/llm.py +0 -195
  105. unrealon_driver/src/services/logger_service.py +0 -232
  106. unrealon_driver/src/services/metrics_service.py +0 -185
  107. unrealon_driver/src/services/scheduler_service.py +0 -489
  108. unrealon_driver/src/services/websocket_service.py +0 -362
  109. unrealon_driver/src/utils/__init__.py +0 -16
  110. unrealon_driver/src/utils/service_factory.py +0 -317
  111. unrealon_driver/src/utils/time_formatter.py +0 -338
  112. unrealon_llm/README.md +0 -44
  113. unrealon_llm/__init__.py +0 -26
  114. unrealon_llm/pyproject.toml +0 -154
  115. unrealon_llm/src/__init__.py +0 -228
  116. unrealon_llm/src/cli/__init__.py +0 -0
  117. unrealon_llm/src/core/__init__.py +0 -11
  118. unrealon_llm/src/core/smart_client.py +0 -438
  119. unrealon_llm/src/dto/__init__.py +0 -155
  120. unrealon_llm/src/dto/models/__init__.py +0 -0
  121. unrealon_llm/src/dto/models/config.py +0 -343
  122. unrealon_llm/src/dto/models/core.py +0 -328
  123. unrealon_llm/src/dto/models/enums.py +0 -123
  124. unrealon_llm/src/dto/models/html_analysis.py +0 -345
  125. unrealon_llm/src/dto/models/statistics.py +0 -473
  126. unrealon_llm/src/dto/models/translation.py +0 -383
  127. unrealon_llm/src/dto/models/type_conversion.py +0 -462
  128. unrealon_llm/src/dto/schemas/__init__.py +0 -0
  129. unrealon_llm/src/exceptions.py +0 -392
  130. unrealon_llm/src/llm_config/__init__.py +0 -20
  131. unrealon_llm/src/llm_config/logging_config.py +0 -178
  132. unrealon_llm/src/llm_logging/__init__.py +0 -42
  133. unrealon_llm/src/llm_logging/llm_events.py +0 -107
  134. unrealon_llm/src/llm_logging/llm_logger.py +0 -466
  135. unrealon_llm/src/managers/__init__.py +0 -15
  136. unrealon_llm/src/managers/cache_manager.py +0 -67
  137. unrealon_llm/src/managers/cost_manager.py +0 -107
  138. unrealon_llm/src/managers/request_manager.py +0 -298
  139. unrealon_llm/src/modules/__init__.py +0 -0
  140. unrealon_llm/src/modules/html_processor/__init__.py +0 -25
  141. unrealon_llm/src/modules/html_processor/base_processor.py +0 -415
  142. unrealon_llm/src/modules/html_processor/details_processor.py +0 -85
  143. unrealon_llm/src/modules/html_processor/listing_processor.py +0 -91
  144. unrealon_llm/src/modules/html_processor/models/__init__.py +0 -20
  145. unrealon_llm/src/modules/html_processor/models/processing_models.py +0 -40
  146. unrealon_llm/src/modules/html_processor/models/universal_model.py +0 -56
  147. unrealon_llm/src/modules/html_processor/processor.py +0 -102
  148. unrealon_llm/src/modules/llm/__init__.py +0 -0
  149. unrealon_llm/src/modules/translator/__init__.py +0 -0
  150. unrealon_llm/src/provider.py +0 -116
  151. unrealon_llm/src/utils/__init__.py +0 -95
  152. unrealon_llm/src/utils/common.py +0 -64
  153. unrealon_llm/src/utils/data_extractor.py +0 -188
  154. unrealon_llm/src/utils/html_cleaner.py +0 -767
  155. unrealon_llm/src/utils/language_detector.py +0 -308
  156. unrealon_llm/src/utils/models_cache.py +0 -592
  157. unrealon_llm/src/utils/smart_counter.py +0 -229
  158. unrealon_llm/src/utils/token_counter.py +0 -189
  159. unrealon_sdk/README.md +0 -25
  160. unrealon_sdk/__init__.py +0 -30
  161. unrealon_sdk/pyproject.toml +0 -231
  162. unrealon_sdk/src/__init__.py +0 -150
  163. unrealon_sdk/src/cli/__init__.py +0 -12
  164. unrealon_sdk/src/cli/commands/__init__.py +0 -22
  165. unrealon_sdk/src/cli/commands/benchmark.py +0 -42
  166. unrealon_sdk/src/cli/commands/diagnostics.py +0 -573
  167. unrealon_sdk/src/cli/commands/health.py +0 -46
  168. unrealon_sdk/src/cli/commands/integration.py +0 -498
  169. unrealon_sdk/src/cli/commands/reports.py +0 -43
  170. unrealon_sdk/src/cli/commands/security.py +0 -36
  171. unrealon_sdk/src/cli/commands/server.py +0 -483
  172. unrealon_sdk/src/cli/commands/servers.py +0 -56
  173. unrealon_sdk/src/cli/commands/tests.py +0 -55
  174. unrealon_sdk/src/cli/main.py +0 -126
  175. unrealon_sdk/src/cli/utils/reporter.py +0 -519
  176. unrealon_sdk/src/clients/openapi.yaml +0 -3347
  177. unrealon_sdk/src/clients/python_http/__init__.py +0 -3
  178. unrealon_sdk/src/clients/python_http/api_config.py +0 -228
  179. unrealon_sdk/src/clients/python_http/models/BaseModel.py +0 -12
  180. unrealon_sdk/src/clients/python_http/models/BroadcastDeliveryStats.py +0 -33
  181. unrealon_sdk/src/clients/python_http/models/BroadcastMessage.py +0 -17
  182. unrealon_sdk/src/clients/python_http/models/BroadcastMessageRequest.py +0 -35
  183. unrealon_sdk/src/clients/python_http/models/BroadcastPriority.py +0 -10
  184. unrealon_sdk/src/clients/python_http/models/BroadcastResponse.py +0 -21
  185. unrealon_sdk/src/clients/python_http/models/BroadcastResultResponse.py +0 -33
  186. unrealon_sdk/src/clients/python_http/models/BroadcastTarget.py +0 -11
  187. unrealon_sdk/src/clients/python_http/models/ConnectionStats.py +0 -27
  188. unrealon_sdk/src/clients/python_http/models/ConnectionsResponse.py +0 -21
  189. unrealon_sdk/src/clients/python_http/models/DeveloperMessageResponse.py +0 -23
  190. unrealon_sdk/src/clients/python_http/models/ErrorResponse.py +0 -25
  191. unrealon_sdk/src/clients/python_http/models/HTTPValidationError.py +0 -16
  192. unrealon_sdk/src/clients/python_http/models/HealthResponse.py +0 -23
  193. unrealon_sdk/src/clients/python_http/models/HealthStatus.py +0 -33
  194. unrealon_sdk/src/clients/python_http/models/LogLevel.py +0 -10
  195. unrealon_sdk/src/clients/python_http/models/LoggingRequest.py +0 -27
  196. unrealon_sdk/src/clients/python_http/models/LoggingResponse.py +0 -23
  197. unrealon_sdk/src/clients/python_http/models/MaintenanceMode.py +0 -9
  198. unrealon_sdk/src/clients/python_http/models/MaintenanceModeRequest.py +0 -33
  199. unrealon_sdk/src/clients/python_http/models/MaintenanceStatusResponse.py +0 -39
  200. unrealon_sdk/src/clients/python_http/models/ParserCommandRequest.py +0 -25
  201. unrealon_sdk/src/clients/python_http/models/ParserMessageResponse.py +0 -21
  202. unrealon_sdk/src/clients/python_http/models/ParserRegistrationRequest.py +0 -28
  203. unrealon_sdk/src/clients/python_http/models/ParserRegistrationResponse.py +0 -25
  204. unrealon_sdk/src/clients/python_http/models/ParserType.py +0 -10
  205. unrealon_sdk/src/clients/python_http/models/ProxyBlockRequest.py +0 -19
  206. unrealon_sdk/src/clients/python_http/models/ProxyEndpointResponse.py +0 -20
  207. unrealon_sdk/src/clients/python_http/models/ProxyListResponse.py +0 -19
  208. unrealon_sdk/src/clients/python_http/models/ProxyProvider.py +0 -10
  209. unrealon_sdk/src/clients/python_http/models/ProxyPurchaseRequest.py +0 -25
  210. unrealon_sdk/src/clients/python_http/models/ProxyResponse.py +0 -47
  211. unrealon_sdk/src/clients/python_http/models/ProxyRotationRequest.py +0 -23
  212. unrealon_sdk/src/clients/python_http/models/ProxyStatus.py +0 -10
  213. unrealon_sdk/src/clients/python_http/models/ProxyUsageRequest.py +0 -19
  214. unrealon_sdk/src/clients/python_http/models/ProxyUsageStatsResponse.py +0 -26
  215. unrealon_sdk/src/clients/python_http/models/ServiceRegistrationDto.py +0 -23
  216. unrealon_sdk/src/clients/python_http/models/ServiceStatsResponse.py +0 -31
  217. unrealon_sdk/src/clients/python_http/models/SessionStartRequest.py +0 -23
  218. unrealon_sdk/src/clients/python_http/models/SuccessResponse.py +0 -25
  219. unrealon_sdk/src/clients/python_http/models/SystemNotificationResponse.py +0 -23
  220. unrealon_sdk/src/clients/python_http/models/ValidationError.py +0 -18
  221. unrealon_sdk/src/clients/python_http/models/ValidationErrorResponse.py +0 -21
  222. unrealon_sdk/src/clients/python_http/models/WebSocketMetrics.py +0 -21
  223. unrealon_sdk/src/clients/python_http/models/__init__.py +0 -44
  224. unrealon_sdk/src/clients/python_http/services/None_service.py +0 -35
  225. unrealon_sdk/src/clients/python_http/services/ParserManagement_service.py +0 -190
  226. unrealon_sdk/src/clients/python_http/services/ProxyManagement_service.py +0 -289
  227. unrealon_sdk/src/clients/python_http/services/SocketLogging_service.py +0 -187
  228. unrealon_sdk/src/clients/python_http/services/SystemHealth_service.py +0 -119
  229. unrealon_sdk/src/clients/python_http/services/WebSocketAPI_service.py +0 -198
  230. unrealon_sdk/src/clients/python_http/services/__init__.py +0 -0
  231. unrealon_sdk/src/clients/python_http/services/admin_service.py +0 -125
  232. unrealon_sdk/src/clients/python_http/services/async_None_service.py +0 -35
  233. unrealon_sdk/src/clients/python_http/services/async_ParserManagement_service.py +0 -190
  234. unrealon_sdk/src/clients/python_http/services/async_ProxyManagement_service.py +0 -289
  235. unrealon_sdk/src/clients/python_http/services/async_SocketLogging_service.py +0 -189
  236. unrealon_sdk/src/clients/python_http/services/async_SystemHealth_service.py +0 -123
  237. unrealon_sdk/src/clients/python_http/services/async_WebSocketAPI_service.py +0 -200
  238. unrealon_sdk/src/clients/python_http/services/async_admin_service.py +0 -125
  239. unrealon_sdk/src/clients/python_websocket/__init__.py +0 -28
  240. unrealon_sdk/src/clients/python_websocket/client.py +0 -490
  241. unrealon_sdk/src/clients/python_websocket/events.py +0 -732
  242. unrealon_sdk/src/clients/python_websocket/example.py +0 -136
  243. unrealon_sdk/src/clients/python_websocket/types.py +0 -871
  244. unrealon_sdk/src/core/__init__.py +0 -64
  245. unrealon_sdk/src/core/client.py +0 -556
  246. unrealon_sdk/src/core/config.py +0 -465
  247. unrealon_sdk/src/core/exceptions.py +0 -239
  248. unrealon_sdk/src/core/metadata.py +0 -191
  249. unrealon_sdk/src/core/models.py +0 -142
  250. unrealon_sdk/src/core/types.py +0 -68
  251. unrealon_sdk/src/dto/__init__.py +0 -268
  252. unrealon_sdk/src/dto/authentication.py +0 -108
  253. unrealon_sdk/src/dto/cache.py +0 -208
  254. unrealon_sdk/src/dto/common.py +0 -19
  255. unrealon_sdk/src/dto/concurrency.py +0 -393
  256. unrealon_sdk/src/dto/events.py +0 -108
  257. unrealon_sdk/src/dto/health.py +0 -339
  258. unrealon_sdk/src/dto/load_balancing.py +0 -336
  259. unrealon_sdk/src/dto/logging.py +0 -230
  260. unrealon_sdk/src/dto/performance.py +0 -165
  261. unrealon_sdk/src/dto/rate_limiting.py +0 -295
  262. unrealon_sdk/src/dto/resource_pooling.py +0 -128
  263. unrealon_sdk/src/dto/structured_logging.py +0 -112
  264. unrealon_sdk/src/dto/task_scheduling.py +0 -121
  265. unrealon_sdk/src/dto/websocket.py +0 -55
  266. unrealon_sdk/src/enterprise/__init__.py +0 -59
  267. unrealon_sdk/src/enterprise/authentication.py +0 -401
  268. unrealon_sdk/src/enterprise/cache_manager.py +0 -578
  269. unrealon_sdk/src/enterprise/error_recovery.py +0 -494
  270. unrealon_sdk/src/enterprise/event_system.py +0 -549
  271. unrealon_sdk/src/enterprise/health_monitor.py +0 -747
  272. unrealon_sdk/src/enterprise/load_balancer.py +0 -964
  273. unrealon_sdk/src/enterprise/logging/__init__.py +0 -68
  274. unrealon_sdk/src/enterprise/logging/cleanup.py +0 -156
  275. unrealon_sdk/src/enterprise/logging/development.py +0 -744
  276. unrealon_sdk/src/enterprise/logging/service.py +0 -410
  277. unrealon_sdk/src/enterprise/multithreading_manager.py +0 -853
  278. unrealon_sdk/src/enterprise/performance_monitor.py +0 -539
  279. unrealon_sdk/src/enterprise/proxy_manager.py +0 -696
  280. unrealon_sdk/src/enterprise/rate_limiter.py +0 -652
  281. unrealon_sdk/src/enterprise/resource_pool.py +0 -763
  282. unrealon_sdk/src/enterprise/task_scheduler.py +0 -709
  283. unrealon_sdk/src/internal/__init__.py +0 -10
  284. unrealon_sdk/src/internal/command_router.py +0 -497
  285. unrealon_sdk/src/internal/connection_manager.py +0 -397
  286. unrealon_sdk/src/internal/http_client.py +0 -446
  287. unrealon_sdk/src/internal/websocket_client.py +0 -420
  288. unrealon_sdk/src/provider.py +0 -471
  289. unrealon_sdk/src/utils.py +0 -234
  290. /unrealon_browser/{src/cli → cli}/__init__.py +0 -0
  291. /unrealon_browser/{src/cli → cli}/interactive_mode.py +0 -0
  292. /unrealon_browser/{src/cli → cli}/main.py +0 -0
  293. /unrealon_browser/{src/core → core}/__init__.py +0 -0
  294. /unrealon_browser/{src/dto → dto}/__init__.py +0 -0
  295. /unrealon_browser/{src/dto → dto}/models/config.py +0 -0
  296. /unrealon_browser/{src/dto → dto}/models/core.py +0 -0
  297. /unrealon_browser/{src/dto → dto}/models/dataclasses.py +0 -0
  298. /unrealon_browser/{src/dto → dto}/models/detection.py +0 -0
  299. /unrealon_browser/{src/dto → dto}/models/enums.py +0 -0
  300. /unrealon_browser/{src/dto → dto}/models/statistics.py +0 -0
  301. /unrealon_browser/{src/managers → managers}/__init__.py +0 -0
  302. /unrealon_browser/{src/managers → managers}/stealth.py +0 -0
@@ -0,0 +1,628 @@
1
+ """
2
+ Parser Manager - Unified parser management system with Pydantic v2
3
+
4
+ Strict compliance with CRITICAL_REQUIREMENTS.md:
5
+ - No Dict[str, Any] usage
6
+ - Complete type annotations
7
+ - Pydantic v2 models everywhere
8
+ - Custom exception hierarchy
9
+ - No try blocks in imports
10
+ """
11
+
12
+ import asyncio
13
+ from datetime import datetime, timezone
14
+ from typing import Optional, List, Union, Any
15
+ from pathlib import Path
16
+ from pydantic import BaseModel, Field, ConfigDict, field_validator
17
+
18
+ from unrealon_bridge import ParserBridgeClient
19
+ from unrealon_rpc.logging import get_logger
20
+
21
+ from .managers import (
22
+ ConfigManager, ParserConfig,
23
+ ResultManager, ParseResult, ParseMetrics,
24
+ ErrorManager, RetryConfig, ErrorInfo,
25
+ LoggingManager, LoggingConfig, LogLevel,
26
+ HTMLManager, HTMLCleaningConfig,
27
+ BrowserManager, BrowserConfig
28
+ )
29
+
30
+
31
+ class ParserManagerConfig(BaseModel):
32
+ """Complete parser manager configuration"""
33
+ model_config = ConfigDict(
34
+ validate_assignment=True,
35
+ extra="forbid"
36
+ )
37
+
38
+ # Core configuration
39
+ parser_config: ParserConfig = Field(
40
+ default_factory=ParserConfig,
41
+ description="Core parser configuration"
42
+ )
43
+
44
+ # Manager configurations
45
+ logging_config: LoggingConfig = Field(
46
+ default_factory=LoggingConfig,
47
+ description="Logging configuration"
48
+ )
49
+ html_config: HTMLCleaningConfig = Field(
50
+ default_factory=HTMLCleaningConfig,
51
+ description="HTML cleaning configuration"
52
+ )
53
+ browser_config: BrowserConfig = Field(
54
+ default_factory=BrowserConfig,
55
+ description="Browser configuration"
56
+ )
57
+ retry_config: RetryConfig = Field(
58
+ default_factory=RetryConfig,
59
+ description="Retry configuration"
60
+ )
61
+
62
+ # Bridge settings
63
+ bridge_enabled: bool = Field(
64
+ default=True,
65
+ description="Enable bridge connection"
66
+ )
67
+ auto_register: bool = Field(
68
+ default=True,
69
+ description="Auto-register parser with bridge"
70
+ )
71
+
72
+ def model_post_init(self, __context) -> None:
73
+ """Sync configurations across managers"""
74
+ # Sync parser name across all configs
75
+ parser_name = self.parser_config.parser_name
76
+ if hasattr(self.logging_config, 'parser_name'):
77
+ self.logging_config.parser_name = parser_name
78
+
79
+ # Sync system directories
80
+ system_dir = self.parser_config.system_dir
81
+ if system_dir:
82
+ self.logging_config.log_dir = system_dir / "logs"
83
+ self.browser_config.screenshots_dir = system_dir / "screenshots"
84
+ self.browser_config.cookies_file = system_dir / "cookies.json"
85
+
86
+
87
+ class ParserStats(BaseModel):
88
+ """Comprehensive parser statistics"""
89
+ model_config = ConfigDict(
90
+ validate_assignment=True,
91
+ extra="forbid"
92
+ )
93
+
94
+ parser_id: str = Field(...)
95
+ parser_name: str = Field(...)
96
+ session_id: Optional[str] = Field(default=None)
97
+
98
+ # Timing
99
+ session_start: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
100
+ session_duration: float = Field(default=0.0, ge=0.0)
101
+
102
+ # Operations
103
+ operations_completed: int = Field(default=0, ge=0)
104
+ operations_failed: int = Field(default=0, ge=0)
105
+ success_rate: float = Field(default=0.0, ge=0.0, le=100.0)
106
+
107
+ # Content processing
108
+ pages_processed: int = Field(default=0, ge=0)
109
+ html_cleaned_count: int = Field(default=0, ge=0)
110
+ total_html_reduction: float = Field(default=0.0, ge=0.0)
111
+
112
+ # Errors
113
+ total_errors: int = Field(default=0, ge=0)
114
+ retries_attempted: int = Field(default=0, ge=0)
115
+
116
+ # Bridge
117
+ bridge_connected: bool = Field(default=False)
118
+ bridge_messages_sent: int = Field(default=0, ge=0)
119
+
120
+
121
+ class ParserManagerError(Exception):
122
+ """Base exception for parser manager"""
123
+ def __init__(self, message: str, operation: str, details: Optional[dict[str, str]] = None):
124
+ self.message = message
125
+ self.operation = operation
126
+ self.details = details or {}
127
+ super().__init__(message)
128
+
129
+
130
+ class InitializationError(ParserManagerError):
131
+ """Raised when parser manager initialization fails"""
132
+ pass
133
+
134
+
135
+ class OperationError(ParserManagerError):
136
+ """Raised when parser operation fails"""
137
+ pass
138
+
139
+
140
+ class ParserManager:
141
+ """
142
+ 🚀 Parser Manager - Unified parser management system
143
+
144
+ Features:
145
+ - Unified Configuration: Single config for all managers
146
+ - Automatic Lifecycle: Handles initialization, execution, cleanup
147
+ - Error Recovery: Smart retry logic with exponential backoff
148
+ - Performance Monitoring: Comprehensive statistics and metrics
149
+ - Bridge Integration: Seamless communication with Django
150
+ - Type Safety: Full Pydantic v2 compliance
151
+
152
+ Usage:
153
+ config = ParserManagerConfig(
154
+ parser_config=ParserConfig(parser_name="MyParser"),
155
+ bridge_enabled=True
156
+ )
157
+
158
+ async with ParserManager(config) as parser:
159
+ # Navigate and extract
160
+ html = await parser.get_html("https://example.com")
161
+ cleaned_html = await parser.clean_html(html)
162
+ result = await parser.analyze_html(cleaned_html)
163
+
164
+ # Results are automatically tracked
165
+ stats = parser.get_stats()
166
+ """
167
+
168
+ def __init__(self, config: ParserManagerConfig):
169
+ self.config = config
170
+ self.internal_logger = get_logger()
171
+
172
+ # Initialize managers
173
+ self.config_manager = ConfigManager(self.config.parser_config)
174
+ self.result_manager = ResultManager(self.config.parser_config.parser_id)
175
+ self.error_manager = ErrorManager(self.internal_logger)
176
+ self.logging_manager = LoggingManager(self.config.logging_config)
177
+ self.html_manager = HTMLManager(self.config.html_config)
178
+ self.browser_manager = BrowserManager(self.config.browser_config)
179
+
180
+ # Bridge client
181
+ self.bridge_client: Optional[ParserBridgeClient] = None
182
+
183
+ # State
184
+ self._is_initialized = False
185
+ self._session_id: Optional[str] = None
186
+ self._stats = ParserStats(
187
+ parser_id=self.config.parser_config.parser_id,
188
+ parser_name=self.config.parser_config.parser_name
189
+ )
190
+
191
+ # Register retry configurations
192
+ self._setup_retry_configs()
193
+
194
+ # ==========================================
195
+ # LIFECYCLE MANAGEMENT
196
+ # ==========================================
197
+
198
+ async def initialize(self) -> None:
199
+ """Initialize all managers and establish connections"""
200
+ if self._is_initialized:
201
+ return
202
+
203
+ try:
204
+ self.logging_manager.info("🚀 Initializing parser manager...")
205
+
206
+ # Initialize bridge client
207
+ if self.config.bridge_enabled:
208
+ await self._initialize_bridge()
209
+
210
+ # Initialize browser
211
+ await self.browser_manager.initialize()
212
+
213
+ # Update logging manager with bridge client
214
+ if self.bridge_client:
215
+ self.logging_manager.update_bridge_client(self.bridge_client)
216
+
217
+ # Register parser if enabled
218
+ if self.config.auto_register and self.bridge_client:
219
+ await self._register_parser()
220
+
221
+ self._is_initialized = True
222
+ self.logging_manager.info("✅ Parser manager initialized successfully")
223
+
224
+ except Exception as e:
225
+ self.error_manager.record_error(e, "initialization")
226
+ raise InitializationError(
227
+ message=f"Failed to initialize parser manager: {e}",
228
+ operation="initialization"
229
+ ) from e
230
+
231
+ async def cleanup(self) -> None:
232
+ """Clean up all resources"""
233
+ self.logging_manager.info("🧹 Cleaning up parser manager...")
234
+
235
+ cleanup_errors = []
236
+
237
+ # End session if active
238
+ if self._session_id and self.bridge_client:
239
+ try:
240
+ await self.bridge_client.end_session()
241
+ except Exception as e:
242
+ cleanup_errors.append(f"end_session: {e}")
243
+
244
+ # Cleanup browser
245
+ try:
246
+ await self.browser_manager.cleanup()
247
+ except Exception as e:
248
+ cleanup_errors.append(f"browser_cleanup: {e}")
249
+
250
+ # Disconnect bridge
251
+ if self.bridge_client:
252
+ try:
253
+ await self.bridge_client.disconnect()
254
+ except Exception as e:
255
+ cleanup_errors.append(f"bridge_disconnect: {e}")
256
+
257
+ # Update final stats
258
+ self._update_session_stats()
259
+
260
+ # Log cleanup errors but don't raise
261
+ if cleanup_errors:
262
+ self.logging_manager.warning(f"Cleanup errors: {'; '.join(cleanup_errors)}")
263
+
264
+ self.logging_manager.info("✅ Parser manager cleanup completed")
265
+
266
+ # ==========================================
267
+ # CORE PARSING METHODS
268
+ # ==========================================
269
+
270
+ async def get_html(self, url: str) -> str:
271
+ """Get HTML content from URL with error handling"""
272
+ if not self._is_initialized:
273
+ await self.initialize()
274
+
275
+ @self.error_manager.with_retry("get_html", self.config.retry_config)
276
+ async def _get_html_with_retry():
277
+ self.logging_manager.url_access(url, "fetching")
278
+ html = await self.browser_manager.get_html(url)
279
+ self._stats.pages_processed += 1
280
+ return html
281
+
282
+ try:
283
+ return await _get_html_with_retry()
284
+ except Exception as e:
285
+ self._stats.total_errors += 1
286
+ raise OperationError(
287
+ message=f"Failed to get HTML from {url}: {e}",
288
+ operation="get_html",
289
+ details={"url": url}
290
+ ) from e
291
+
292
+ async def clean_html(self, html: str, **kwargs) -> str:
293
+ """Clean HTML content for LLM analysis"""
294
+ try:
295
+ self.logging_manager.info(f"🧹 Cleaning HTML: {len(html)} characters")
296
+
297
+ cleaned_html = await self.html_manager.clean_html(html, **kwargs)
298
+
299
+ # Update stats
300
+ self._stats.html_cleaned_count += 1
301
+ stats = self.html_manager.get_cleaning_stats(html, cleaned_html)
302
+ self._stats.total_html_reduction += stats.size_reduction_percent
303
+
304
+ self.logging_manager.info(
305
+ f"✅ HTML cleaned: {len(html)} → {len(cleaned_html)} chars "
306
+ f"({stats.size_reduction_percent:.1f}% reduction)"
307
+ )
308
+
309
+ return cleaned_html
310
+
311
+ except Exception as e:
312
+ self._stats.total_errors += 1
313
+ raise OperationError(
314
+ message=f"Failed to clean HTML: {e}",
315
+ operation="clean_html"
316
+ ) from e
317
+
318
+ async def analyze_html(
319
+ self,
320
+ html: str,
321
+ instructions: Optional[str] = None,
322
+ **kwargs
323
+ ) -> dict[str, str]:
324
+ """Analyze HTML content via bridge"""
325
+ if not self.bridge_client:
326
+ raise OperationError(
327
+ message="Bridge client not available for HTML analysis",
328
+ operation="analyze_html"
329
+ )
330
+
331
+ try:
332
+ self.logging_manager.info("🤖 Analyzing HTML with LLM...")
333
+
334
+ result = await self.bridge_client.parse_html(
335
+ html_content=html,
336
+ instructions=instructions,
337
+ parse_type="general",
338
+ timeout=kwargs.get("timeout", 60),
339
+ metadata=kwargs.get("metadata", {})
340
+ )
341
+
342
+ return {
343
+ "success": str(result.success),
344
+ "parsed_data": str(result.parsed_data),
345
+ "markdown": result.markdown or "",
346
+ "error_message": result.error_message or ""
347
+ }
348
+
349
+ except Exception as e:
350
+ self._stats.total_errors += 1
351
+ raise OperationError(
352
+ message=f"Failed to analyze HTML: {e}",
353
+ operation="analyze_html"
354
+ ) from e
355
+
356
+ async def parse_url(
357
+ self,
358
+ url: str,
359
+ instructions: Optional[str] = None,
360
+ **kwargs
361
+ ) -> dict[str, str]:
362
+ """Complete parsing workflow: fetch → clean → analyze"""
363
+ operation = self.result_manager.start_operation()
364
+
365
+ try:
366
+ self.logging_manager.start_operation("parse_url")
367
+
368
+ # Fetch HTML
369
+ html = await self.get_html(url)
370
+
371
+ # Clean HTML
372
+ cleaned_html = await self.clean_html(html, **kwargs)
373
+
374
+ # Analyze HTML
375
+ analysis_result = await self.analyze_html(cleaned_html, instructions, **kwargs)
376
+
377
+ # Complete operation
378
+ self.result_manager.complete_operation(
379
+ data=[], # Analysis result is returned directly
380
+ source_urls=[url],
381
+ success=analysis_result.get("success", "false") == "true"
382
+ )
383
+
384
+ self._stats.operations_completed += 1
385
+ self.logging_manager.end_operation("parse_url", operation.duration_seconds)
386
+
387
+ return analysis_result
388
+
389
+ except Exception as e:
390
+ self.result_manager.complete_operation(
391
+ data=[],
392
+ source_urls=[url],
393
+ success=False,
394
+ error_message=str(e)
395
+ )
396
+
397
+ self._stats.operations_failed += 1
398
+ self.logging_manager.fail_operation("parse_url", str(e))
399
+ raise
400
+
401
+ # ==========================================
402
+ # SESSION MANAGEMENT
403
+ # ==========================================
404
+
405
+ async def start_session(self, session_type: str = "parsing") -> str:
406
+ """Start a new parsing session"""
407
+ if not self.bridge_client:
408
+ raise OperationError(
409
+ message="Bridge client not available for session management",
410
+ operation="start_session"
411
+ )
412
+
413
+ try:
414
+ session_id = await self.bridge_client.start_session(
415
+ session_type=session_type,
416
+ metadata={
417
+ "parser_name": self.config.parser_config.parser_name,
418
+ "parser_type": self.config.parser_config.parser_type
419
+ }
420
+ )
421
+
422
+ self._session_id = session_id
423
+ self._stats.session_id = session_id
424
+ self.logging_manager.set_session(session_id)
425
+
426
+ self.logging_manager.info(f"📋 Session started: {session_id}")
427
+ return session_id
428
+
429
+ except Exception as e:
430
+ raise OperationError(
431
+ message=f"Failed to start session: {e}",
432
+ operation="start_session"
433
+ ) from e
434
+
435
+ async def end_session(self) -> None:
436
+ """End current parsing session"""
437
+ if not self._session_id or not self.bridge_client:
438
+ return
439
+
440
+ try:
441
+ await self.bridge_client.end_session()
442
+ self.logging_manager.info(f"📋 Session ended: {self._session_id}")
443
+ self._session_id = None
444
+ self._stats.session_id = None
445
+
446
+ except Exception as e:
447
+ self.logging_manager.warning(f"Failed to end session: {e}")
448
+
449
+ # ==========================================
450
+ # STATISTICS AND MONITORING
451
+ # ==========================================
452
+
453
+ def get_stats(self) -> ParserStats:
454
+ """Get comprehensive parser statistics"""
455
+ self._update_session_stats()
456
+ return ParserStats.model_validate(self._stats.model_dump())
457
+
458
+ def get_manager_stats(self) -> dict[str, dict[str, str]]:
459
+ """Get statistics from all managers"""
460
+ return {
461
+ "result_manager": self.result_manager.get_stats(),
462
+ "error_manager": self.error_manager.get_error_stats(),
463
+ "browser_manager": self.browser_manager.get_stats().model_dump(mode='json'),
464
+ "logging_manager": self.logging_manager.get_log_stats()
465
+ }
466
+
467
+ async def health_check(self) -> dict[str, str]:
468
+ """Comprehensive health check"""
469
+ health = {
470
+ "status": "healthy",
471
+ "parser_id": self.config.parser_config.parser_id,
472
+ "parser_name": self.config.parser_config.parser_name,
473
+ "initialized": str(self._is_initialized),
474
+ "session_active": str(self._session_id is not None)
475
+ }
476
+
477
+ # Check browser health
478
+ try:
479
+ browser_health = await self.browser_manager.health_check()
480
+ health["browser_status"] = browser_health.get("status", "unknown")
481
+ except Exception as e:
482
+ health["browser_status"] = f"error: {e}"
483
+
484
+ # Check bridge health
485
+ if self.bridge_client:
486
+ health["bridge_connected"] = "true"
487
+ else:
488
+ health["bridge_connected"] = "false"
489
+
490
+ return health
491
+
492
+ # ==========================================
493
+ # INTERNAL METHODS
494
+ # ==========================================
495
+
496
+ async def _initialize_bridge(self) -> None:
497
+ """Initialize bridge client"""
498
+ self.bridge_client = ParserBridgeClient(
499
+ websocket_url=self.config.parser_config.websocket_url,
500
+ parser_type=self.config.parser_config.parser_type,
501
+ api_key=self.config.parser_config.api_key
502
+ )
503
+
504
+ await self.bridge_client.bridge_client.connect()
505
+ self._stats.bridge_connected = True
506
+ self.logging_manager.info("🔗 Bridge client connected")
507
+
508
+ async def _register_parser(self) -> None:
509
+ """Register parser with bridge"""
510
+ if not self.bridge_client:
511
+ return
512
+
513
+ parser_info = await self.bridge_client.register_parser(
514
+ metadata={
515
+ "driver_version": "4.0.0",
516
+ "capabilities": "scraping,html_cleaning,llm_integration",
517
+ "managers": "config,result,error,logging,html,browser"
518
+ }
519
+ )
520
+
521
+ # Update parser ID
522
+ self.config.parser_config.parser_id = parser_info.parser_id
523
+ self._stats.parser_id = parser_info.parser_id
524
+
525
+ self.logging_manager.info(f"📝 Parser registered: {parser_info.parser_id}")
526
+
527
+ def _setup_retry_configs(self) -> None:
528
+ """Setup retry configurations for different operations"""
529
+ # Navigation retry config
530
+ nav_config = RetryConfig(
531
+ max_attempts=3,
532
+ base_delay=2.0,
533
+ retry_on_exceptions=["NavigationError", "TimeoutError", "ConnectionError"]
534
+ )
535
+ self.error_manager.register_retry_config("get_html", nav_config)
536
+
537
+ # Bridge communication retry config
538
+ bridge_config = RetryConfig(
539
+ max_attempts=2,
540
+ base_delay=1.0,
541
+ retry_on_exceptions=["ConnectionError", "TimeoutError"]
542
+ )
543
+ self.error_manager.register_retry_config("analyze_html", bridge_config)
544
+
545
+ def _update_session_stats(self) -> None:
546
+ """Update session statistics"""
547
+ self._stats.session_duration = (datetime.now(timezone.utc) - self._stats.session_start).total_seconds()
548
+
549
+ total_operations = self._stats.operations_completed + self._stats.operations_failed
550
+ if total_operations > 0:
551
+ self._stats.success_rate = (self._stats.operations_completed / total_operations) * 100.0
552
+
553
+ # ==========================================
554
+ # CONTEXT MANAGER SUPPORT
555
+ # ==========================================
556
+
557
+ async def __aenter__(self):
558
+ """Async context manager entry"""
559
+ await self.initialize()
560
+ return self
561
+
562
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
563
+ """Async context manager exit"""
564
+ await self.cleanup()
565
+ return False
566
+
567
+ def __repr__(self) -> str:
568
+ return f"<ParserManager(id='{self.config.parser_config.parser_id}', name='{self.config.parser_config.parser_name}')>"
569
+
570
+
571
+ # ==========================================
572
+ # CONVENIENCE FUNCTIONS
573
+ # ==========================================
574
+
575
+ def get_parser_manager(
576
+ parser_name: str,
577
+ parser_type: str = "generic",
578
+ **kwargs
579
+ ) -> ParserManager:
580
+ """
581
+ Get a parser manager instance with minimal configuration
582
+
583
+ Args:
584
+ parser_name: Name of the parser
585
+ parser_type: Type of parser (generic, ecommerce, news, etc.)
586
+ **kwargs: Additional configuration options
587
+
588
+ Returns:
589
+ Configured ParserManager instance
590
+ """
591
+ parser_config = ParserConfig(
592
+ parser_name=parser_name,
593
+ parser_type=parser_type,
594
+ **{k: v for k, v in kwargs.items() if k in ParserConfig.model_fields}
595
+ )
596
+
597
+ # Create logging config with parser name
598
+ logging_config = LoggingConfig(parser_name=parser_name)
599
+
600
+ config = ParserManagerConfig(
601
+ parser_config=parser_config,
602
+ logging_config=logging_config,
603
+ **{k: v for k, v in kwargs.items() if k in ParserManagerConfig.model_fields and k not in ['parser_config', 'logging_config']}
604
+ )
605
+
606
+ return ParserManager(config)
607
+
608
+
609
+ async def quick_parse(
610
+ url: str,
611
+ parser_name: str = "QuickParser",
612
+ instructions: Optional[str] = None,
613
+ **kwargs
614
+ ) -> dict[str, str]:
615
+ """
616
+ Quick parsing convenience function
617
+
618
+ Args:
619
+ url: URL to parse
620
+ parser_name: Name for the parser
621
+ instructions: Optional parsing instructions
622
+ **kwargs: Additional configuration
623
+
624
+ Returns:
625
+ Parsing result
626
+ """
627
+ async with get_parser_manager(parser_name, **kwargs) as parser:
628
+ return await parser.parse_url(url, instructions, **kwargs)