unrealon 1.1.6__py3-none-any.whl → 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {unrealon-1.1.6.dist-info/licenses → unrealon-2.0.4.dist-info}/LICENSE +1 -1
  2. unrealon-2.0.4.dist-info/METADATA +491 -0
  3. unrealon-2.0.4.dist-info/RECORD +129 -0
  4. {unrealon-1.1.6.dist-info → unrealon-2.0.4.dist-info}/WHEEL +2 -1
  5. unrealon-2.0.4.dist-info/entry_points.txt +3 -0
  6. unrealon-2.0.4.dist-info/top_level.txt +3 -0
  7. unrealon_browser/__init__.py +5 -6
  8. unrealon_browser/cli/browser_cli.py +18 -9
  9. unrealon_browser/cli/interactive_mode.py +13 -4
  10. unrealon_browser/core/browser_manager.py +29 -16
  11. unrealon_browser/dto/__init__.py +21 -0
  12. unrealon_browser/dto/bot_detection.py +175 -0
  13. unrealon_browser/dto/models/config.py +9 -3
  14. unrealon_browser/managers/__init__.py +1 -1
  15. unrealon_browser/managers/logger_bridge.py +1 -4
  16. unrealon_browser/stealth/__init__.py +27 -0
  17. unrealon_browser/stealth/bypass_techniques.pyc +0 -0
  18. unrealon_browser/stealth/manager.pyc +0 -0
  19. unrealon_browser/stealth/nodriver_stealth.pyc +0 -0
  20. unrealon_browser/stealth/playwright_stealth.pyc +0 -0
  21. unrealon_browser/stealth/scanner_tester.pyc +0 -0
  22. unrealon_browser/stealth/undetected_chrome.pyc +0 -0
  23. unrealon_core/__init__.py +160 -0
  24. unrealon_core/config/__init__.py +16 -0
  25. unrealon_core/config/environment.py +98 -0
  26. unrealon_core/config/urls.py +93 -0
  27. unrealon_core/enums/__init__.py +24 -0
  28. unrealon_core/enums/status.py +216 -0
  29. unrealon_core/enums/types.py +240 -0
  30. unrealon_core/error_handling/__init__.py +45 -0
  31. unrealon_core/error_handling/circuit_breaker.py +292 -0
  32. unrealon_core/error_handling/error_context.py +324 -0
  33. unrealon_core/error_handling/recovery.py +371 -0
  34. unrealon_core/error_handling/retry.py +268 -0
  35. unrealon_core/exceptions/__init__.py +46 -0
  36. unrealon_core/exceptions/base.py +292 -0
  37. unrealon_core/exceptions/communication.py +22 -0
  38. unrealon_core/exceptions/driver.py +11 -0
  39. unrealon_core/exceptions/proxy.py +11 -0
  40. unrealon_core/exceptions/task.py +12 -0
  41. unrealon_core/exceptions/validation.py +17 -0
  42. unrealon_core/models/__init__.py +98 -0
  43. unrealon_core/models/arq_context.py +252 -0
  44. unrealon_core/models/arq_responses.py +125 -0
  45. unrealon_core/models/base.py +291 -0
  46. unrealon_core/models/bridge_stats.py +58 -0
  47. unrealon_core/models/communication.py +39 -0
  48. unrealon_core/models/config.py +47 -0
  49. unrealon_core/models/connection_stats.py +47 -0
  50. unrealon_core/models/driver.py +30 -0
  51. unrealon_core/models/driver_details.py +98 -0
  52. unrealon_core/models/logging.py +28 -0
  53. unrealon_core/models/task.py +21 -0
  54. unrealon_core/models/typed_responses.py +210 -0
  55. unrealon_core/models/websocket/__init__.py +91 -0
  56. unrealon_core/models/websocket/base.py +49 -0
  57. unrealon_core/models/websocket/config.py +200 -0
  58. unrealon_core/models/websocket/driver.py +215 -0
  59. unrealon_core/models/websocket/errors.py +138 -0
  60. unrealon_core/models/websocket/heartbeat.py +100 -0
  61. unrealon_core/models/websocket/logging.py +261 -0
  62. unrealon_core/models/websocket/proxy.py +496 -0
  63. unrealon_core/models/websocket/tasks.py +275 -0
  64. unrealon_core/models/websocket/utils.py +153 -0
  65. unrealon_core/models/websocket_session.py +144 -0
  66. unrealon_core/monitoring/__init__.py +43 -0
  67. unrealon_core/monitoring/alerts.py +398 -0
  68. unrealon_core/monitoring/dashboard.py +307 -0
  69. unrealon_core/monitoring/health_check.py +354 -0
  70. unrealon_core/monitoring/metrics.py +352 -0
  71. unrealon_core/utils/__init__.py +11 -0
  72. unrealon_core/utils/time.py +61 -0
  73. unrealon_core/version.py +219 -0
  74. unrealon_driver/__init__.py +90 -51
  75. unrealon_driver/core_module/__init__.py +34 -0
  76. unrealon_driver/core_module/base.py +184 -0
  77. unrealon_driver/core_module/config.py +30 -0
  78. unrealon_driver/core_module/event_manager.py +127 -0
  79. unrealon_driver/core_module/protocols.py +98 -0
  80. unrealon_driver/core_module/registry.py +146 -0
  81. unrealon_driver/decorators/__init__.py +15 -0
  82. unrealon_driver/decorators/retry.py +117 -0
  83. unrealon_driver/decorators/schedule.py +137 -0
  84. unrealon_driver/decorators/task.py +61 -0
  85. unrealon_driver/decorators/timing.py +132 -0
  86. unrealon_driver/driver/__init__.py +20 -0
  87. unrealon_driver/driver/communication/__init__.py +10 -0
  88. unrealon_driver/driver/communication/session.py +203 -0
  89. unrealon_driver/driver/communication/websocket_client.py +197 -0
  90. unrealon_driver/driver/core/__init__.py +10 -0
  91. unrealon_driver/driver/core/config.py +85 -0
  92. unrealon_driver/driver/core/driver.py +221 -0
  93. unrealon_driver/driver/factory/__init__.py +9 -0
  94. unrealon_driver/driver/factory/manager_factory.py +130 -0
  95. unrealon_driver/driver/lifecycle/__init__.py +11 -0
  96. unrealon_driver/driver/lifecycle/daemon.py +76 -0
  97. unrealon_driver/driver/lifecycle/initialization.py +97 -0
  98. unrealon_driver/driver/lifecycle/shutdown.py +48 -0
  99. unrealon_driver/driver/monitoring/__init__.py +9 -0
  100. unrealon_driver/driver/monitoring/health.py +63 -0
  101. unrealon_driver/driver/utilities/__init__.py +10 -0
  102. unrealon_driver/driver/utilities/logging.py +51 -0
  103. unrealon_driver/driver/utilities/serialization.py +61 -0
  104. unrealon_driver/managers/__init__.py +32 -0
  105. unrealon_driver/managers/base.py +174 -0
  106. unrealon_driver/managers/browser.py +98 -0
  107. unrealon_driver/managers/cache.py +116 -0
  108. unrealon_driver/managers/http.py +107 -0
  109. unrealon_driver/managers/logger.py +286 -0
  110. unrealon_driver/managers/proxy.py +99 -0
  111. unrealon_driver/managers/registry.py +87 -0
  112. unrealon_driver/managers/threading.py +54 -0
  113. unrealon_driver/managers/update.py +107 -0
  114. unrealon_driver/utils/__init__.py +9 -0
  115. unrealon_driver/utils/time.py +10 -0
  116. unrealon-1.1.6.dist-info/METADATA +0 -625
  117. unrealon-1.1.6.dist-info/RECORD +0 -55
  118. unrealon-1.1.6.dist-info/entry_points.txt +0 -9
  119. unrealon_browser/managers/stealth.py +0 -388
  120. unrealon_driver/README.md +0 -0
  121. unrealon_driver/exceptions.py +0 -33
  122. unrealon_driver/html_analyzer/__init__.py +0 -32
  123. unrealon_driver/html_analyzer/cleaner.py +0 -657
  124. unrealon_driver/html_analyzer/config.py +0 -64
  125. unrealon_driver/html_analyzer/manager.py +0 -247
  126. unrealon_driver/html_analyzer/models.py +0 -115
  127. unrealon_driver/html_analyzer/websocket_analyzer.py +0 -157
  128. unrealon_driver/models/__init__.py +0 -31
  129. unrealon_driver/models/websocket.py +0 -98
  130. unrealon_driver/parser/__init__.py +0 -36
  131. unrealon_driver/parser/cli_manager.py +0 -142
  132. unrealon_driver/parser/daemon_manager.py +0 -403
  133. unrealon_driver/parser/managers/__init__.py +0 -25
  134. unrealon_driver/parser/managers/config.py +0 -293
  135. unrealon_driver/parser/managers/error.py +0 -412
  136. unrealon_driver/parser/managers/result.py +0 -321
  137. unrealon_driver/parser/parser_manager.py +0 -458
  138. unrealon_driver/smart_logging/__init__.py +0 -24
  139. unrealon_driver/smart_logging/models.py +0 -44
  140. unrealon_driver/smart_logging/smart_logger.py +0 -406
  141. unrealon_driver/smart_logging/unified_logger.py +0 -525
  142. unrealon_driver/websocket/__init__.py +0 -31
  143. unrealon_driver/websocket/client.py +0 -249
  144. unrealon_driver/websocket/config.py +0 -188
  145. unrealon_driver/websocket/manager.py +0 -90
@@ -1,25 +0,0 @@
1
- """
2
- Parser Managers - Specialized management components
3
-
4
- All managers follow strict Pydantic v2 compliance and CRITICAL_REQUIREMENTS.md
5
- """
6
-
7
- from .config import ConfigManager, ParserConfig
8
- from .result import ResultManager, ParseResult, ParseMetrics, OperationStatus
9
- from .error import ErrorManager, RetryConfig, ErrorInfo, ErrorSeverity
10
-
11
- __all__ = [
12
- # Config Manager
13
- "ConfigManager",
14
- "ParserConfig",
15
- # Result Manager
16
- "ResultManager",
17
- "ParseResult",
18
- "ParseMetrics",
19
- "OperationStatus",
20
- # Error Manager
21
- "ErrorManager",
22
- "RetryConfig",
23
- "ErrorInfo",
24
- "ErrorSeverity",
25
- ]
@@ -1,293 +0,0 @@
1
- """
2
- Config Manager - Type-safe configuration management with Pydantic v2
3
-
4
- Strict compliance with CRITICAL_REQUIREMENTS.md:
5
- - No Dict[str, Any] usage
6
- - Complete type annotations
7
- - Pydantic v2 models everywhere
8
- - No mutable defaults
9
-
10
- Features automatic WebSocket URL detection - no manual configuration needed!
11
- """
12
-
13
- from typing import Optional, List
14
- from pathlib import Path
15
- from pydantic import BaseModel, Field, ConfigDict, field_validator
16
- import uuid
17
-
18
-
19
- def _get_auto_websocket_url() -> str:
20
- """Get WebSocket URL automatically based on environment detection"""
21
- try:
22
- from ...websocket import get_websocket_url
23
- return get_websocket_url()
24
- except ImportError:
25
- # Fallback if websocket module not available
26
- return "ws://localhost:8002/ws"
27
-
28
-
29
- class ParserConfig(BaseModel):
30
- """
31
- Parser configuration with smart defaults and strict typing
32
-
33
- Zero configuration approach - everything has sensible defaults
34
- """
35
- model_config = ConfigDict(
36
- validate_assignment=True,
37
- extra="forbid",
38
- str_strip_whitespace=True
39
- )
40
-
41
- # Parser identity
42
- parser_id: str = Field(
43
- default_factory=lambda: f"parser_{uuid.uuid4().hex[:8]}",
44
- description="Unique parser identifier"
45
- )
46
- parser_name: str = Field(
47
- default="UnrealOn Parser",
48
- description="Human-readable parser name"
49
- )
50
- parser_type: str = Field(
51
- default="generic",
52
- description="Parser type for classification"
53
- )
54
-
55
- # Connection settings (auto-detected)
56
- websocket_url: str = Field(
57
- default_factory=lambda: _get_auto_websocket_url(),
58
- description="WebSocket bridge URL (auto-detected based on environment)"
59
- )
60
- api_key: Optional[str] = Field(
61
- default=None,
62
- description="API key for authentication"
63
- )
64
-
65
- # Browser settings
66
- headless: bool = Field(
67
- default=True,
68
- description="Run browser in headless mode"
69
- )
70
- stealth_mode: bool = Field(
71
- default=True,
72
- description="Enable stealth mode"
73
- )
74
- user_agent: Optional[str] = Field(
75
- default=None,
76
- description="Custom user agent"
77
- )
78
-
79
- # HTML cleaning settings
80
- aggressive_cleaning: bool = Field(
81
- default=True,
82
- description="Enable aggressive HTML cleaning"
83
- )
84
- preserve_js_data: bool = Field(
85
- default=True,
86
- description="Preserve JavaScript data during cleaning"
87
- )
88
-
89
- # Timeouts (in milliseconds)
90
- page_timeout: int = Field(
91
- default=30000,
92
- ge=1000,
93
- le=300000,
94
- description="Page load timeout in milliseconds"
95
- )
96
- navigation_timeout: int = Field(
97
- default=30000,
98
- ge=1000,
99
- le=300000,
100
- description="Navigation timeout in milliseconds"
101
- )
102
-
103
- # Directories
104
- system_dir: Optional[Path] = Field(
105
- default=None,
106
- description="System directory for logs and data"
107
- )
108
- screenshots_dir: Optional[Path] = Field(
109
- default=None,
110
- description="Screenshots directory"
111
- )
112
-
113
- # Development settings
114
- debug: bool = Field(
115
- default=False,
116
- description="Enable debug mode"
117
- )
118
- save_html: bool = Field(
119
- default=False,
120
- description="Save HTML files for debugging"
121
- )
122
- save_screenshots: bool = Field(
123
- default=False,
124
- description="Save screenshots for debugging"
125
- )
126
-
127
- @field_validator('parser_name')
128
- @classmethod
129
- def validate_parser_name(cls, v: str) -> str:
130
- """Validate parser name is not empty"""
131
- if not v.strip():
132
- raise ValueError("Parser name cannot be empty")
133
- return v.strip()
134
-
135
- @field_validator('parser_type')
136
- @classmethod
137
- def validate_parser_type(cls, v: str) -> str:
138
- """Validate parser type"""
139
- allowed_types = {
140
- "generic", "ecommerce", "news", "jobs",
141
- "real_estate", "social_media", "reviews",
142
- "events", "directory"
143
- }
144
- if v not in allowed_types:
145
- raise ValueError(f"Parser type must be one of: {', '.join(allowed_types)}")
146
- return v
147
-
148
- @field_validator('websocket_url')
149
- @classmethod
150
- def validate_websocket_url(cls, v: str) -> str:
151
- """Validate WebSocket URL format"""
152
- if not v.startswith(('ws://', 'wss://')):
153
- raise ValueError("WebSocket URL must start with ws:// or wss://")
154
- return v
155
-
156
- def model_post_init(self, __context) -> None:
157
- """Post-initialization setup"""
158
- # Setup system directory if not provided
159
- if self.system_dir is None:
160
- self.system_dir = Path.cwd() / "system"
161
-
162
- # Setup screenshots directory if not provided
163
- if self.screenshots_dir is None:
164
- self.screenshots_dir = self.system_dir / "screenshots"
165
-
166
- # Create directories
167
- self.system_dir.mkdir(parents=True, exist_ok=True)
168
- self.screenshots_dir.mkdir(parents=True, exist_ok=True)
169
-
170
-
171
- class ConfigManager:
172
- """
173
- 🔧 Config Manager - Type-safe configuration management
174
-
175
- Features:
176
- - Pydantic v2 validation
177
- - Environment variable integration
178
- - Configuration profiles
179
- - Hot reloading
180
- - Type safety enforcement
181
- """
182
-
183
- def __init__(self, config: Optional[ParserConfig] = None):
184
- self._config: ParserConfig = config or ParserConfig()
185
- self._profiles: dict[str, ParserConfig] = {}
186
- self._current_profile: Optional[str] = None
187
-
188
- @property
189
- def config(self) -> ParserConfig:
190
- """Get current configuration"""
191
- return self._config
192
-
193
- def update_config(self, **kwargs) -> None:
194
- """Update configuration with new values"""
195
- # Create new config with updated values
196
- current_data = self._config.model_dump()
197
- current_data.update(kwargs)
198
- self._config = ParserConfig.model_validate(current_data)
199
-
200
- def load_from_dict(self, config_dict: dict[str, str]) -> None:
201
- """Load configuration from dictionary"""
202
- self._config = ParserConfig.model_validate(config_dict)
203
-
204
- def load_from_env(self, prefix: str = "PARSER_") -> None:
205
- """Load configuration from environment variables"""
206
- import os
207
-
208
- env_config = {}
209
- for key, value in os.environ.items():
210
- if key.startswith(prefix):
211
- config_key = key[len(prefix):].lower()
212
-
213
- # Convert string values to appropriate types
214
- if config_key in ['headless', 'stealth_mode', 'aggressive_cleaning',
215
- 'preserve_js_data', 'debug', 'save_html', 'save_screenshots']:
216
- env_config[config_key] = value.lower() in ('true', '1', 'yes', 'on')
217
- elif config_key in ['page_timeout', 'navigation_timeout']:
218
- env_config[config_key] = int(value)
219
- elif config_key in ['system_dir', 'screenshots_dir']:
220
- env_config[config_key] = Path(value)
221
- else:
222
- env_config[config_key] = value
223
-
224
- if env_config:
225
- current_data = self._config.model_dump()
226
- current_data.update(env_config)
227
- self._config = ParserConfig.model_validate(current_data)
228
-
229
- def save_profile(self, name: str) -> None:
230
- """Save current configuration as a profile"""
231
- if not name.strip():
232
- raise ValueError("Profile name cannot be empty")
233
- self._profiles[name] = ParserConfig.model_validate(self._config.model_dump())
234
-
235
- def load_profile(self, name: str) -> None:
236
- """Load configuration from a saved profile"""
237
- if name not in self._profiles:
238
- raise ValueError(f"Profile '{name}' not found")
239
- self._config = ParserConfig.model_validate(self._profiles[name].model_dump())
240
- self._current_profile = name
241
-
242
- def get_profiles(self) -> List[str]:
243
- """Get list of available profiles"""
244
- return list(self._profiles.keys())
245
-
246
- def delete_profile(self, name: str) -> None:
247
- """Delete a saved profile"""
248
- if name not in self._profiles:
249
- raise ValueError(f"Profile '{name}' not found")
250
- del self._profiles[name]
251
- if self._current_profile == name:
252
- self._current_profile = None
253
-
254
- def get_current_profile(self) -> Optional[str]:
255
- """Get current profile name"""
256
- return self._current_profile
257
-
258
- def validate_config(self) -> List[str]:
259
- """Validate current configuration and return any issues"""
260
- issues = []
261
-
262
- # Check directory permissions
263
- try:
264
- test_file = self._config.system_dir / ".test"
265
- test_file.touch()
266
- test_file.unlink()
267
- except PermissionError:
268
- issues.append(f"No write permission for system directory: {self._config.system_dir}")
269
- except Exception as e:
270
- issues.append(f"System directory issue: {e}")
271
-
272
- # Check timeouts are reasonable
273
- if self._config.page_timeout < 5000:
274
- issues.append("Page timeout is very low (< 5 seconds)")
275
- if self._config.navigation_timeout < 5000:
276
- issues.append("Navigation timeout is very low (< 5 seconds)")
277
-
278
- return issues
279
-
280
- def to_dict(self) -> dict[str, str]:
281
- """Export configuration as dictionary"""
282
- return self._config.model_dump(mode='json')
283
-
284
- def to_env_format(self, prefix: str = "PARSER_") -> List[str]:
285
- """Export configuration as environment variable format"""
286
- config_dict = self.to_dict()
287
- env_vars = []
288
-
289
- for key, value in config_dict.items():
290
- env_key = f"{prefix}{key.upper()}"
291
- env_vars.append(f"{env_key}={value}")
292
-
293
- return env_vars