unrealon 2.0.34__py3-none-any.whl → 2.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/METADATA +1 -1
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/RECORD +18 -17
- unrealon_browser/core/browser_manager.py +6 -16
- unrealon_browser/dto/models/statistics.py +0 -1
- unrealon_browser/managers/__init__.py +2 -0
- unrealon_browser/managers/captcha.py +3 -3
- unrealon_browser/managers/data_extraction_manager.py +266 -0
- unrealon_browser/managers/logger_bridge.py +0 -12
- unrealon_driver/driver/core/driver.py +2 -1
- unrealon_driver/driver/factory/manager_factory.py +14 -1
- unrealon_driver/managers/__init__.py +2 -1
- unrealon_driver/managers/browser.py +37 -1
- unrealon_driver/managers/http.py +106 -2
- unrealon_driver/managers/threading.py +45 -4
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/LICENSE +0 -0
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/WHEEL +0 -0
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/entry_points.txt +0 -0
- {unrealon-2.0.34.dist-info → unrealon-2.0.35.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ unrealon_browser/cli/cookies_cli.py,sha256=yhZvGrg8bknlH4zlySdi8ue-25Ue-1rI_u1G0
|
|
|
6
6
|
unrealon_browser/cli/interactive_mode.py,sha256=gLn9bMH0h0tPX3dP4i4QQxQK4Htkyg5r4KcqdMBaP6Q,12125
|
|
7
7
|
unrealon_browser/cli/main.py,sha256=XCYcTxJUqaz320KCU_JPKizYMk6bdljb8Boyok3uO-4,1353
|
|
8
8
|
unrealon_browser/core/__init__.py,sha256=uVL_t4sZelUzflWPdgrwoXGnAkSV1WNQ98-eu0QB2eM,151
|
|
9
|
-
unrealon_browser/core/browser_manager.py,sha256=
|
|
9
|
+
unrealon_browser/core/browser_manager.py,sha256=gKx7M5eQQU7B8FYhQsrvJaB11An2ZH1Yc1PnED1HV1Y,31977
|
|
10
10
|
unrealon_browser/dto/__init__.py,sha256=bApqcLz-KanEi0_MCiFPrQmGBoX3VBijP7XtBUyIfjo,1636
|
|
11
11
|
unrealon_browser/dto/bot_detection.py,sha256=qXfC0HghV7m4L6qA87t3STi-166jM-QgoP6OYbCb4o4,6884
|
|
12
12
|
unrealon_browser/dto/models/config.py,sha256=Why5H3rtFclmwbdczuDfhlgf-LDz72Aa8LhDX4_ayfw,1752
|
|
@@ -14,11 +14,12 @@ unrealon_browser/dto/models/core.py,sha256=HvbwYG27rmmWtp401uws7lfalN_9QPad0M6ce
|
|
|
14
14
|
unrealon_browser/dto/models/dataclasses.py,sha256=zqhJVyzp4CvtuTBsZwm6n6TodVWrZf9gkdDG-0_tgeA,2571
|
|
15
15
|
unrealon_browser/dto/models/detection.py,sha256=ma9ZNIjPR7HnjqZaAj6ZoskiewPFiSn_FgFXSkgiQc8,2715
|
|
16
16
|
unrealon_browser/dto/models/enums.py,sha256=Q4WzHdfSKf7dhKyX00i_Pvl2U8w3lBsxOYfSIoaQY3Q,1219
|
|
17
|
-
unrealon_browser/dto/models/statistics.py,sha256=
|
|
18
|
-
unrealon_browser/managers/__init__.py,sha256=
|
|
19
|
-
unrealon_browser/managers/captcha.py,sha256=
|
|
17
|
+
unrealon_browser/dto/models/statistics.py,sha256=RbiMChC6EumFvzIoxfWp2eIqjkW4yOpWZTKsOHNtok8,2685
|
|
18
|
+
unrealon_browser/managers/__init__.py,sha256=YDNpfdA-cRqn1xnX9xurgHC1x1zw0nLiPo7rPjikuzQ,679
|
|
19
|
+
unrealon_browser/managers/captcha.py,sha256=JsAG1gjfwrOrNZd1N1HALtzOuJ6loEhCYZVKTylKubU,21488
|
|
20
20
|
unrealon_browser/managers/cookies.py,sha256=r4VVnKLXH82vhU7qgtY-dF7KPf0Ie3QxGD3FEi6geFA,15085
|
|
21
|
-
unrealon_browser/managers/
|
|
21
|
+
unrealon_browser/managers/data_extraction_manager.py,sha256=dbbNgrqGvtMCgSxpliLxkD0PrAN0NrRrVpbN7iqcQKQ,10575
|
|
22
|
+
unrealon_browser/managers/logger_bridge.py,sha256=aCaDVRS7ZksXYtIKCCQIBqsmD5n0cPpc__0o4c3Iah0,10366
|
|
22
23
|
unrealon_browser/managers/page_wait_manager.py,sha256=UyZqiSfkjzahrxp9x1odXFIT_sFhZGvdECxWuIMCVBY,7876
|
|
23
24
|
unrealon_browser/managers/profile.py,sha256=HjddlSeUry_65WPtF8CMkT7cfJ6X3Jap9kJaaZpwtAA,18956
|
|
24
25
|
unrealon_browser/managers/script_manager.py,sha256=hVnEWDb2LM1rfnptFo1MtE0SGcYCoFA66udykmb5e1g,11581
|
|
@@ -101,9 +102,9 @@ unrealon_driver/driver/communication/session.py,sha256=DYN_Q3Qm3XuOi-dM8aNihJfQJ
|
|
|
101
102
|
unrealon_driver/driver/communication/websocket_client.py,sha256=VPsICBvGHunuCGZvorvPCF01Qdvp7QWyTDX0hkYXRwo,7910
|
|
102
103
|
unrealon_driver/driver/core/__init__.py,sha256=ZvJQp1zO7pj6tBNYTJk2fj-0ZMiQTQEk-I9hXalNsfg,235
|
|
103
104
|
unrealon_driver/driver/core/config.py,sha256=jWJjRll19VlL4iM5Q-J3o9qwYeH89Iuj1_3KayM6fCk,5914
|
|
104
|
-
unrealon_driver/driver/core/driver.py,sha256=
|
|
105
|
+
unrealon_driver/driver/core/driver.py,sha256=8rufKfvE7M1axB7ZyK28OINqsrBJcUyL0HVdqjK47ps,7950
|
|
105
106
|
unrealon_driver/driver/factory/__init__.py,sha256=XrjBhOaLvC3MIG5PAFIYS_xYXFDz5JizpFvmQcwA7mU,189
|
|
106
|
-
unrealon_driver/driver/factory/manager_factory.py,sha256=
|
|
107
|
+
unrealon_driver/driver/factory/manager_factory.py,sha256=BII7cH6-X8i_DMoHvcE72AakD3wuD44gWEK5K8WJJOg,5644
|
|
107
108
|
unrealon_driver/driver/lifecycle/__init__.py,sha256=KnkXklezAOIbXcCzEU_XSOt32z7tz1zIGclXYXTkO8k,286
|
|
108
109
|
unrealon_driver/driver/lifecycle/daemon.py,sha256=KHAzpiWFu3HRElRtzSEStmI74bMivFjfCAFlXha87KU,2609
|
|
109
110
|
unrealon_driver/driver/lifecycle/initialization.py,sha256=R4MgfkSNnfAdMO0Kp1Cx42cfNqq8VIxj_mGX7ECXad4,4406
|
|
@@ -115,21 +116,21 @@ unrealon_driver/driver/utilities/logging.py,sha256=2my2QnkAa6Hdw-TfO4oOQ94yGc-Cj
|
|
|
115
116
|
unrealon_driver/driver/utilities/serialization.py,sha256=wTCSVrEloykiGN4K1JXbk2aqNKm7W90aWXmzhcLyAZc,2123
|
|
116
117
|
unrealon_driver/installer/__init__.py,sha256=PraOjOg-cN1zOtuhPSTE5vCGPSMzWtEBYU8A05GWEf8,227
|
|
117
118
|
unrealon_driver/installer/platform.py,sha256=U_8FJZk0C8M0ujpfzcpOPWEoUrT6asTNEIhsN0n2bCg,5081
|
|
118
|
-
unrealon_driver/managers/__init__.py,sha256=
|
|
119
|
+
unrealon_driver/managers/__init__.py,sha256=LZUQXwpgqjStC5B4fIdwBJTwlB-4om_eQPhdGs7SJeo,1057
|
|
119
120
|
unrealon_driver/managers/base.py,sha256=GkuXillg9uqqnx6RL682fmKgK-7JyqYlH6DFUgyN4F8,5445
|
|
120
|
-
unrealon_driver/managers/browser.py,sha256=
|
|
121
|
+
unrealon_driver/managers/browser.py,sha256=_b6YEOLqrgcD83eiVJOYvYqC5OJw_Nr4b4FmOa0uHaE,6906
|
|
121
122
|
unrealon_driver/managers/cache.py,sha256=c0tPKQ5KFd_Un1U8mw3j1WPuycxg863MMWNMveVF_2I,3506
|
|
122
|
-
unrealon_driver/managers/http.py,sha256=
|
|
123
|
+
unrealon_driver/managers/http.py,sha256=NZ8VRRpVX2EsE_LMV0AYqb3HriOQCY8Qfkojg3pV7sE,7387
|
|
123
124
|
unrealon_driver/managers/logger.py,sha256=PL3rA9ZQl12jJU0EiPAkLwJ6eDHQfIzr8-nc8bVivKQ,10526
|
|
124
125
|
unrealon_driver/managers/proxy.py,sha256=b2w6DteMJWnwxZmL3NfwBMdE_mscchoMwPs-XFKNwnU,3855
|
|
125
126
|
unrealon_driver/managers/registry.py,sha256=--oNPU-65e8J21ubJufyEOc1TirnzJIvpvuY_j7rH7Q,2666
|
|
126
|
-
unrealon_driver/managers/threading.py,sha256=
|
|
127
|
+
unrealon_driver/managers/threading.py,sha256=yw2RlWxc2MBn0ZbPJ9h3eLIC5OFCvVkr8DQM7DhWO8M,3498
|
|
127
128
|
unrealon_driver/managers/update.py,sha256=-hohVxGXpj5bZ6ZTQN6NH1RK9Pd6GVzCMtu3GS2SdcQ,3582
|
|
128
129
|
unrealon_driver/utils/__init__.py,sha256=2Sz3eats5q4O2fDmefDuJt8M_zkN6xrS-9xXntWZWFc,168
|
|
129
130
|
unrealon_driver/utils/time.py,sha256=Oxk1eicKeZl8ZWbf7gu1Ll716k6CpXmVj67FHSnPIsA,184
|
|
130
|
-
unrealon-2.0.
|
|
131
|
-
unrealon-2.0.
|
|
132
|
-
unrealon-2.0.
|
|
133
|
-
unrealon-2.0.
|
|
134
|
-
unrealon-2.0.
|
|
135
|
-
unrealon-2.0.
|
|
131
|
+
unrealon-2.0.35.dist-info/LICENSE,sha256=eEH8mWZW49YMpl4Sh5MtKqkZ8aVTzKQXiNPEnvL14ns,1070
|
|
132
|
+
unrealon-2.0.35.dist-info/METADATA,sha256=5oIiWxcda_6_DCik53Vt5Xd4CsuyGA1yARkZXSvkOIY,15689
|
|
133
|
+
unrealon-2.0.35.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
|
|
134
|
+
unrealon-2.0.35.dist-info/entry_points.txt,sha256=k0qM-eotpajkKUq-almJmxj9afhXprZ6IkvQkSdcKhI,104
|
|
135
|
+
unrealon-2.0.35.dist-info/top_level.txt,sha256=Gu8IeIfIVfUxdi-h-F0nKMQxo15pjhHZ0aTadXTpRE8,47
|
|
136
|
+
unrealon-2.0.35.dist-info/RECORD,,
|
|
@@ -31,6 +31,7 @@ from unrealon_browser.managers import (
|
|
|
31
31
|
create_browser_logger_bridge,
|
|
32
32
|
PageWaitManager,
|
|
33
33
|
ScriptManager,
|
|
34
|
+
DataExtractionManager,
|
|
34
35
|
)
|
|
35
36
|
|
|
36
37
|
|
|
@@ -71,6 +72,7 @@ class BrowserManager:
|
|
|
71
72
|
self.captcha_manager = CaptchaDetector()
|
|
72
73
|
self.page_wait = PageWaitManager(None, self.logger_bridge)
|
|
73
74
|
self.script_manager = ScriptManager(None, self.logger_bridge)
|
|
75
|
+
self.data_extraction = DataExtractionManager(None, self.logger_bridge)
|
|
74
76
|
|
|
75
77
|
# Signal handlers for graceful shutdown
|
|
76
78
|
self._setup_signal_handlers()
|
|
@@ -251,6 +253,9 @@ class BrowserManager:
|
|
|
251
253
|
|
|
252
254
|
# Update script manager with new page
|
|
253
255
|
self.script_manager.update_page(self._page)
|
|
256
|
+
|
|
257
|
+
# Update data extraction manager with new page
|
|
258
|
+
self.data_extraction.update_page(self._page)
|
|
254
259
|
|
|
255
260
|
# 🔥 STEALTH ALWAYS APPLIED TO EVERY PAGE!
|
|
256
261
|
stealth_success = await self.stealth_manager.apply_stealth(self._page)
|
|
@@ -476,17 +481,6 @@ class BrowserManager:
|
|
|
476
481
|
"error": str(e),
|
|
477
482
|
}
|
|
478
483
|
|
|
479
|
-
async def get_page_content_async(self) -> Optional[str]:
|
|
480
|
-
"""Get current page content"""
|
|
481
|
-
if not self._page:
|
|
482
|
-
return None
|
|
483
|
-
|
|
484
|
-
try:
|
|
485
|
-
return await self._page.content()
|
|
486
|
-
except Exception as e:
|
|
487
|
-
self.logger_bridge.log_error(f"❌ Failed to get page content: {e}")
|
|
488
|
-
return None
|
|
489
|
-
|
|
490
484
|
async def execute_script_async(self, script: str) -> Any:
|
|
491
485
|
"""Execute JavaScript on current page"""
|
|
492
486
|
if not self._page:
|
|
@@ -603,11 +597,6 @@ class BrowserManager:
|
|
|
603
597
|
resolution_result = await self.captcha_manager.handle_captcha_interactive(self, detection_result, timeout_seconds)
|
|
604
598
|
|
|
605
599
|
if resolution_result["success"]:
|
|
606
|
-
# Log successful captcha resolution
|
|
607
|
-
if hasattr(self, "_current_proxy") and self._current_proxy:
|
|
608
|
-
proxy_host = self._current_proxy.get("host", "unknown")
|
|
609
|
-
proxy_port = self._current_proxy.get("port", 0)
|
|
610
|
-
self.logger_bridge.log_captcha_solved(proxy_host, proxy_port, manual=True)
|
|
611
600
|
|
|
612
601
|
# Update session status back to active
|
|
613
602
|
self.session_metadata.current_status = BrowserSessionStatus.ACTIVE
|
|
@@ -708,6 +697,7 @@ class BrowserManager:
|
|
|
708
697
|
self._page = None
|
|
709
698
|
self.page_wait.update_page(None)
|
|
710
699
|
self.script_manager.update_page(None)
|
|
700
|
+
self.data_extraction.update_page(None)
|
|
711
701
|
|
|
712
702
|
# Close context with safety checks
|
|
713
703
|
if self._context:
|
|
@@ -9,6 +9,7 @@ from .cookies import CookieManager
|
|
|
9
9
|
from .captcha import CaptchaDetector
|
|
10
10
|
from .page_wait_manager import PageWaitManager
|
|
11
11
|
from .script_manager import ScriptManager
|
|
12
|
+
from .data_extraction_manager import DataExtractionManager
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
__all__ = [
|
|
@@ -20,4 +21,5 @@ __all__ = [
|
|
|
20
21
|
"CaptchaDetector",
|
|
21
22
|
"PageWaitManager",
|
|
22
23
|
"ScriptManager",
|
|
24
|
+
"DataExtractionManager",
|
|
23
25
|
]
|
|
@@ -310,7 +310,7 @@ class CaptchaDetector:
|
|
|
310
310
|
|
|
311
311
|
if cookies_saved:
|
|
312
312
|
self._logger("💾 Cookies saved after captcha resolution", "info")
|
|
313
|
-
self._captchas_solved += 1
|
|
313
|
+
# self._captchas_solved += 1 # Solving disabled
|
|
314
314
|
else:
|
|
315
315
|
self._logger("⚠️ Failed to save cookies after captcha resolution", "warning")
|
|
316
316
|
|
|
@@ -499,8 +499,8 @@ class CaptchaDetector:
|
|
|
499
499
|
|
|
500
500
|
print(f"\n🤖 Captcha Detection Statistics:")
|
|
501
501
|
print(f" Captchas detected: {stats['captchas_detected']}")
|
|
502
|
-
print(f" Captchas solved: {stats['captchas_solved']}")
|
|
503
|
-
print(f" Success rate:
|
|
502
|
+
print(f" Captchas solved: {stats['captchas_solved']} (solving disabled)")
|
|
503
|
+
print(f" Success rate: N/A (solving disabled)")
|
|
504
504
|
print(f" Detection history: {stats['detection_history_count']} events")
|
|
505
505
|
print(f" Supported types: {', '.join(stats['supported_types'])}")
|
|
506
506
|
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Extraction Manager - Extract different types of data from pages
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
from typing import Optional, Dict, Any, Union
|
|
6
|
+
from playwright.async_api import Page
|
|
7
|
+
|
|
8
|
+
from .logger_bridge import BrowserLoggerBridge as LoggingBridge
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DataExtractionManager:
|
|
12
|
+
"""Manager for extracting different types of data from web pages"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, page: Optional[Page], logger_bridge: LoggingBridge):
|
|
15
|
+
self._page = page
|
|
16
|
+
self.logger_bridge = logger_bridge
|
|
17
|
+
|
|
18
|
+
def update_page(self, page: Optional[Page]):
|
|
19
|
+
"""Update the page reference"""
|
|
20
|
+
self._page = page
|
|
21
|
+
|
|
22
|
+
async def get_json_content(self) -> Optional[Dict[str, Any]]:
|
|
23
|
+
"""Extract JSON content from current page (for API endpoints)."""
|
|
24
|
+
if not self._page:
|
|
25
|
+
self.logger_bridge.log_error("No page available for JSON extraction")
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
self.logger_bridge.log_info("🔍 Extracting JSON content from page...")
|
|
30
|
+
|
|
31
|
+
# JavaScript to extract JSON from different page formats
|
|
32
|
+
script = """
|
|
33
|
+
(() => {
|
|
34
|
+
try {
|
|
35
|
+
// Method 1: Try to get from document.body.textContent (for API responses)
|
|
36
|
+
const bodyText = document.body.textContent || document.body.innerText || '';
|
|
37
|
+
const cleanBodyText = bodyText.trim();
|
|
38
|
+
|
|
39
|
+
if (cleanBodyText && (cleanBodyText.startsWith('{') || cleanBodyText.startsWith('['))) {
|
|
40
|
+
return {
|
|
41
|
+
success: true,
|
|
42
|
+
data: JSON.parse(cleanBodyText),
|
|
43
|
+
method: 'body_text'
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Method 2: Try to get from <pre> tag (common for JSON APIs)
|
|
48
|
+
const preElement = document.querySelector('pre');
|
|
49
|
+
if (preElement) {
|
|
50
|
+
const preText = (preElement.textContent || preElement.innerText || '').trim();
|
|
51
|
+
if (preText && (preText.startsWith('{') || preText.startsWith('['))) {
|
|
52
|
+
return {
|
|
53
|
+
success: true,
|
|
54
|
+
data: JSON.parse(preText),
|
|
55
|
+
method: 'pre_element'
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Method 3: Check if entire document is JSON
|
|
61
|
+
const docText = (document.documentElement.textContent || document.documentElement.innerText || '').trim();
|
|
62
|
+
if (docText && (docText.startsWith('{') || docText.startsWith('['))) {
|
|
63
|
+
return {
|
|
64
|
+
success: true,
|
|
65
|
+
data: JSON.parse(docText),
|
|
66
|
+
method: 'document_text'
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Method 4: Look for JSON in script tags
|
|
71
|
+
const scriptTags = document.querySelectorAll('script[type="application/json"]');
|
|
72
|
+
for (const script of scriptTags) {
|
|
73
|
+
const scriptText = (script.textContent || script.innerText || '').trim();
|
|
74
|
+
if (scriptText && (scriptText.startsWith('{') || scriptText.startsWith('['))) {
|
|
75
|
+
return {
|
|
76
|
+
success: true,
|
|
77
|
+
data: JSON.parse(scriptText),
|
|
78
|
+
method: 'script_tag'
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
success: false,
|
|
85
|
+
error: 'No JSON content found',
|
|
86
|
+
page_text_preview: cleanBodyText.substring(0, 200)
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
} catch (e) {
|
|
90
|
+
return {
|
|
91
|
+
success: false,
|
|
92
|
+
error: 'JSON parse failed: ' + e.message,
|
|
93
|
+
page_text_preview: (document.body.textContent || '').substring(0, 200)
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
})();
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
result = await self._page.evaluate(script)
|
|
100
|
+
|
|
101
|
+
if result.get('success'):
|
|
102
|
+
method = result.get('method', 'unknown')
|
|
103
|
+
self.logger_bridge.log_info(f"✅ JSON extracted successfully using method: {method}")
|
|
104
|
+
return result.get('data')
|
|
105
|
+
else:
|
|
106
|
+
error = result.get('error', 'Unknown error')
|
|
107
|
+
preview = result.get('page_text_preview', '')
|
|
108
|
+
self.logger_bridge.log_warning(f"❌ JSON extraction failed: {error}")
|
|
109
|
+
if preview:
|
|
110
|
+
self.logger_bridge.log_info(f"📄 Page preview: {preview}...")
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
self.logger_bridge.log_error(f"JSON extraction error: {e}")
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
async def get_page_text(self) -> Optional[str]:
|
|
118
|
+
"""Get plain text content from current page."""
|
|
119
|
+
if not self._page:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
self.logger_bridge.log_info("📄 Extracting plain text content...")
|
|
124
|
+
|
|
125
|
+
script = """
|
|
126
|
+
(() => {
|
|
127
|
+
return {
|
|
128
|
+
body_text: document.body.textContent || document.body.innerText || '',
|
|
129
|
+
title: document.title || '',
|
|
130
|
+
url: window.location.href
|
|
131
|
+
};
|
|
132
|
+
})();
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
result = await self._page.evaluate(script)
|
|
136
|
+
text = result.get('body_text', '').strip()
|
|
137
|
+
|
|
138
|
+
if text:
|
|
139
|
+
self.logger_bridge.log_info(f"✅ Text extracted: {len(text)} characters")
|
|
140
|
+
return text
|
|
141
|
+
else:
|
|
142
|
+
self.logger_bridge.log_warning("❌ No text content found")
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
self.logger_bridge.log_error(f"Text extraction error: {e}")
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
async def get_structured_data(self) -> Optional[Dict[str, Any]]:
|
|
150
|
+
"""Get structured data including JSON, text, and metadata."""
|
|
151
|
+
if not self._page:
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
self.logger_bridge.log_info("🔍 Extracting structured data...")
|
|
156
|
+
|
|
157
|
+
# Try JSON first
|
|
158
|
+
json_data = await self.get_json_content()
|
|
159
|
+
|
|
160
|
+
# Get page metadata
|
|
161
|
+
script = """
|
|
162
|
+
(() => {
|
|
163
|
+
return {
|
|
164
|
+
url: window.location.href,
|
|
165
|
+
title: document.title || '',
|
|
166
|
+
content_type: document.contentType || '',
|
|
167
|
+
charset: document.characterSet || '',
|
|
168
|
+
ready_state: document.readyState,
|
|
169
|
+
has_pre_element: !!document.querySelector('pre'),
|
|
170
|
+
body_text_length: (document.body.textContent || '').length
|
|
171
|
+
};
|
|
172
|
+
})();
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
metadata = await self._page.evaluate(script)
|
|
176
|
+
|
|
177
|
+
result = {
|
|
178
|
+
"extraction_success": json_data is not None,
|
|
179
|
+
"json_data": json_data,
|
|
180
|
+
"metadata": metadata,
|
|
181
|
+
"extracted_at": self._get_timestamp()
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if json_data:
|
|
185
|
+
self.logger_bridge.log_info("✅ Structured data extraction successful")
|
|
186
|
+
else:
|
|
187
|
+
self.logger_bridge.log_warning("⚠️ No JSON data found, but metadata extracted")
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
except Exception as e:
|
|
192
|
+
self.logger_bridge.log_error(f"Structured data extraction error: {e}")
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
async def detect_content_type(self) -> str:
|
|
196
|
+
"""Detect the type of content on the current page."""
|
|
197
|
+
if not self._page:
|
|
198
|
+
return "unknown"
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
script = """
|
|
202
|
+
(() => {
|
|
203
|
+
const bodyText = (document.body.textContent || '').trim();
|
|
204
|
+
const contentType = document.contentType || '';
|
|
205
|
+
const hasPreElement = !!document.querySelector('pre');
|
|
206
|
+
|
|
207
|
+
// Check for JSON
|
|
208
|
+
if (bodyText.startsWith('{') || bodyText.startsWith('[')) {
|
|
209
|
+
return 'json';
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Check for XML
|
|
213
|
+
if (bodyText.startsWith('<') && contentType.includes('xml')) {
|
|
214
|
+
return 'xml';
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Check for HTML
|
|
218
|
+
if (document.querySelector('html') && document.querySelector('body') && !hasPreElement) {
|
|
219
|
+
return 'html';
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Check for plain text
|
|
223
|
+
if (hasPreElement || contentType.includes('text/plain')) {
|
|
224
|
+
return 'text';
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return 'unknown';
|
|
228
|
+
})();
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
content_type = await self._page.evaluate(script)
|
|
232
|
+
self.logger_bridge.log_info(f"🔍 Detected content type: {content_type}")
|
|
233
|
+
return content_type
|
|
234
|
+
|
|
235
|
+
except Exception as e:
|
|
236
|
+
self.logger_bridge.log_error(f"Content type detection error: {e}")
|
|
237
|
+
return "unknown"
|
|
238
|
+
|
|
239
|
+
async def get_page_html(self) -> Optional[str]:
|
|
240
|
+
"""Get full HTML content from current page."""
|
|
241
|
+
if not self._page:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
try:
|
|
245
|
+
self.logger_bridge.log_info("📄 Extracting HTML content...")
|
|
246
|
+
html = await self._page.content()
|
|
247
|
+
|
|
248
|
+
if html:
|
|
249
|
+
self.logger_bridge.log_info(f"✅ HTML extracted: {len(html)} characters")
|
|
250
|
+
return html
|
|
251
|
+
else:
|
|
252
|
+
self.logger_bridge.log_warning("❌ No HTML content found")
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
self.logger_bridge.log_error(f"HTML extraction error: {e}")
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
def _get_timestamp(self) -> str:
|
|
260
|
+
"""Get current timestamp in ISO format."""
|
|
261
|
+
from datetime import datetime
|
|
262
|
+
return datetime.now().isoformat()
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# Export
|
|
266
|
+
__all__ = ["DataExtractionManager"]
|
|
@@ -81,7 +81,6 @@ class BrowserLoggerBridge:
|
|
|
81
81
|
"navigation_failed": 0,
|
|
82
82
|
"stealth_applied": 0,
|
|
83
83
|
"captcha_detected": 0,
|
|
84
|
-
"captcha_solved": 0,
|
|
85
84
|
"profile_created": 0,
|
|
86
85
|
"cookies_saved": 0,
|
|
87
86
|
}
|
|
@@ -194,17 +193,6 @@ class BrowserLoggerBridge:
|
|
|
194
193
|
detected_at=result.detected_at.isoformat(),
|
|
195
194
|
)
|
|
196
195
|
|
|
197
|
-
def log_captcha_solved(self, proxy_host: str, proxy_port: int, manual: bool = True) -> None:
|
|
198
|
-
"""Log captcha resolution"""
|
|
199
|
-
self._browser_events["captcha_solved"] += 1
|
|
200
|
-
self._log_info(
|
|
201
|
-
f"Captcha solved for proxy {proxy_host}:{proxy_port}",
|
|
202
|
-
proxy_host=proxy_host,
|
|
203
|
-
proxy_port=proxy_port,
|
|
204
|
-
resolution_method="manual" if manual else "automatic",
|
|
205
|
-
cookies_will_be_saved=True,
|
|
206
|
-
)
|
|
207
|
-
|
|
208
196
|
def log_profile_created(self, profile_name: str, proxy_info: Optional[Dict[str, Any]] = None) -> None:
|
|
209
197
|
"""Log profile creation"""
|
|
210
198
|
self._browser_events["profile_created"] += 1
|
|
@@ -21,7 +21,7 @@ from ..utilities.logging import LoggingUtility
|
|
|
21
21
|
from ..utilities.serialization import SerializationUtility
|
|
22
22
|
|
|
23
23
|
from ...managers import (
|
|
24
|
-
LoggerManager, HttpManager, BrowserManager, CacheManager,
|
|
24
|
+
LoggerManager, HttpManager, HttpxManager, BrowserManager, CacheManager,
|
|
25
25
|
ProxyManager, ThreadManager, UpdateManager, ManagerRegistry
|
|
26
26
|
)
|
|
27
27
|
|
|
@@ -77,6 +77,7 @@ class UniversalDriver:
|
|
|
77
77
|
self.manager_registry: Optional[ManagerRegistry] = None
|
|
78
78
|
self.logger_manager: Optional[LoggerManager] = None
|
|
79
79
|
self.http: Optional[HttpManager] = None
|
|
80
|
+
self.httpx: Optional[HttpxManager] = None
|
|
80
81
|
self.browser: Optional[BrowserManager] = None
|
|
81
82
|
self.cache: Optional[CacheManager] = None
|
|
82
83
|
self.proxy: Optional[ProxyManager] = None
|
|
@@ -8,11 +8,12 @@ import logging
|
|
|
8
8
|
from typing import TYPE_CHECKING
|
|
9
9
|
|
|
10
10
|
from ...managers import (
|
|
11
|
-
LoggerManager, HttpManager, BrowserManager, CacheManager,
|
|
11
|
+
LoggerManager, HttpManager, HttpxManager, BrowserManager, CacheManager,
|
|
12
12
|
ProxyManager, ThreadManager, UpdateManager, ManagerRegistry
|
|
13
13
|
)
|
|
14
14
|
from ...managers.logger import LoggerManagerConfig
|
|
15
15
|
from ...managers.http import HttpManagerConfig
|
|
16
|
+
from ...managers.http import HttpxManagerConfig
|
|
16
17
|
from ...managers.browser import BrowserManagerConfig
|
|
17
18
|
from ...managers.cache import CacheManagerConfig
|
|
18
19
|
from ...managers.proxy import ProxyManagerConfig
|
|
@@ -45,6 +46,7 @@ class ManagerFactory:
|
|
|
45
46
|
# Setup each manager
|
|
46
47
|
ManagerFactory._setup_logger_manager(driver, manager_registry)
|
|
47
48
|
ManagerFactory._setup_http_manager(driver, manager_registry)
|
|
49
|
+
ManagerFactory._setup_httpx_manager(driver, manager_registry)
|
|
48
50
|
ManagerFactory._setup_browser_manager(driver, manager_registry)
|
|
49
51
|
ManagerFactory._setup_cache_manager(driver, manager_registry)
|
|
50
52
|
ManagerFactory._setup_proxy_manager(driver, manager_registry)
|
|
@@ -78,6 +80,17 @@ class ManagerFactory:
|
|
|
78
80
|
driver.http = HttpManager(http_config)
|
|
79
81
|
registry.register(driver.http)
|
|
80
82
|
|
|
83
|
+
@staticmethod
|
|
84
|
+
def _setup_httpx_manager(driver: 'UniversalDriver', registry: ManagerRegistry):
|
|
85
|
+
"""Setup HTTPx manager."""
|
|
86
|
+
httpx_config = HttpxManagerConfig(
|
|
87
|
+
enabled=True,
|
|
88
|
+
timeout=driver.config.http_timeout,
|
|
89
|
+
max_retries=driver.config.max_retries
|
|
90
|
+
)
|
|
91
|
+
driver.httpx = HttpxManager(httpx_config)
|
|
92
|
+
registry.register(driver.httpx)
|
|
93
|
+
|
|
81
94
|
@staticmethod
|
|
82
95
|
def _setup_browser_manager(driver: 'UniversalDriver', registry: ManagerRegistry):
|
|
83
96
|
"""Setup browser manager."""
|
|
@@ -4,7 +4,7 @@ Clean manager system for UnrealOn Driver.
|
|
|
4
4
|
|
|
5
5
|
from .base import BaseManager, ManagerConfig, ManagerStatus
|
|
6
6
|
from .logger import LoggerManager, LoggerManagerConfig
|
|
7
|
-
from .http import HttpManager, HttpManagerConfig
|
|
7
|
+
from .http import HttpManager, HttpManagerConfig, HttpxManager, HttpxManagerConfig
|
|
8
8
|
from .browser import BrowserManager, BrowserManagerConfig
|
|
9
9
|
from .cache import CacheManager, CacheManagerConfig
|
|
10
10
|
from .proxy import ProxyManager, ProxyManagerConfig
|
|
@@ -21,6 +21,7 @@ __all__ = [
|
|
|
21
21
|
# Managers
|
|
22
22
|
"LoggerManager", "LoggerManagerConfig",
|
|
23
23
|
"HttpManager", "HttpManagerConfig",
|
|
24
|
+
"HttpxManager", "HttpxManagerConfig",
|
|
24
25
|
"BrowserManager", "BrowserManagerConfig",
|
|
25
26
|
"CacheManager", "CacheManagerConfig",
|
|
26
27
|
"ProxyManager", "ProxyManagerConfig",
|
|
@@ -113,7 +113,7 @@ class BrowserManager(BaseManager):
|
|
|
113
113
|
raise RuntimeError("Failed to initialize browser")
|
|
114
114
|
|
|
115
115
|
try:
|
|
116
|
-
html = await self.browser.
|
|
116
|
+
html = await self.browser.data_extraction.get_page_html()
|
|
117
117
|
self.stats.record_operation(True, 0.0)
|
|
118
118
|
return html
|
|
119
119
|
except Exception as e:
|
|
@@ -121,6 +121,36 @@ class BrowserManager(BaseManager):
|
|
|
121
121
|
self.stats.record_operation(False, 0.0)
|
|
122
122
|
return None
|
|
123
123
|
|
|
124
|
+
async def get_json_content(self) -> Optional[dict]:
|
|
125
|
+
"""Extract JSON content from current page via DataExtractionManager."""
|
|
126
|
+
# Ensure browser is initialized
|
|
127
|
+
if not await self._ensure_browser_initialized():
|
|
128
|
+
raise RuntimeError("Failed to initialize browser")
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
result = await self.browser.data_extraction.get_json_content()
|
|
132
|
+
self.stats.record_operation(True, 0.0)
|
|
133
|
+
return result
|
|
134
|
+
except Exception as e:
|
|
135
|
+
self.logger.error(f"JSON extraction failed: {e}")
|
|
136
|
+
self.stats.record_operation(False, 0.0)
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
async def get_page_text(self) -> Optional[str]:
|
|
140
|
+
"""Get plain text content from current page via DataExtractionManager."""
|
|
141
|
+
# Ensure browser is initialized
|
|
142
|
+
if not await self._ensure_browser_initialized():
|
|
143
|
+
raise RuntimeError("Failed to initialize browser")
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
result = await self.browser.data_extraction.get_page_text()
|
|
147
|
+
self.stats.record_operation(True, 0.0)
|
|
148
|
+
return result
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.logger.error(f"Text extraction failed: {e}")
|
|
151
|
+
self.stats.record_operation(False, 0.0)
|
|
152
|
+
return None
|
|
153
|
+
|
|
124
154
|
async def execute_script_async(self, script: str) -> any:
|
|
125
155
|
"""Execute JavaScript on current page via ScriptManager."""
|
|
126
156
|
# Ensure browser is initialized
|
|
@@ -136,3 +166,9 @@ class BrowserManager(BaseManager):
|
|
|
136
166
|
self.logger.error(f"Script execution failed: {e}")
|
|
137
167
|
self.stats.record_operation(False, 0.0)
|
|
138
168
|
raise
|
|
169
|
+
|
|
170
|
+
async def cleanup(self) -> None:
|
|
171
|
+
"""Cleanup browser resources."""
|
|
172
|
+
if self.browser:
|
|
173
|
+
await self.browser.close_async()
|
|
174
|
+
self.browser = None
|
unrealon_driver/managers/http.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Clean HTTP
|
|
2
|
+
Clean HTTP managers for requests - both aiohttp and httpx with HTTP/2 support.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import aiohttp
|
|
7
|
+
import httpx
|
|
7
8
|
from typing import Dict, Any, Optional
|
|
8
9
|
from pydantic import Field
|
|
9
10
|
|
|
@@ -17,8 +18,16 @@ class HttpManagerConfig(ManagerConfig):
|
|
|
17
18
|
connector_limit: int = Field(default=30, description="Connector limit per host")
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
class HttpxManagerConfig(ManagerConfig):
|
|
22
|
+
"""HTTPx manager configuration with HTTP/2 support."""
|
|
23
|
+
user_agent: str = Field(default="UnrealOn-Driver/1.0", description="User agent string")
|
|
24
|
+
max_connections: int = Field(default=100, description="Max concurrent connections")
|
|
25
|
+
connector_limit: int = Field(default=30, description="Connector limit per host")
|
|
26
|
+
http2: bool = Field(default=True, description="Enable HTTP/2 support")
|
|
27
|
+
|
|
28
|
+
|
|
20
29
|
class HttpManager(BaseManager):
|
|
21
|
-
"""Clean HTTP manager with aiohttp."""
|
|
30
|
+
"""Clean HTTP manager with aiohttp (original)."""
|
|
22
31
|
|
|
23
32
|
def __init__(self, config: HttpManagerConfig):
|
|
24
33
|
super().__init__(config, "http")
|
|
@@ -105,3 +114,98 @@ class HttpManager(BaseManager):
|
|
|
105
114
|
finally:
|
|
106
115
|
duration = asyncio.get_event_loop().time() - start_time
|
|
107
116
|
self.stats.record_operation(success, duration)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class HttpxManager(BaseManager):
|
|
120
|
+
"""Modern HTTP manager with httpx and HTTP/2 support."""
|
|
121
|
+
|
|
122
|
+
def __init__(self, config: HttpxManagerConfig):
|
|
123
|
+
super().__init__(config, "httpx")
|
|
124
|
+
self.config: HttpxManagerConfig = config
|
|
125
|
+
self.client: Optional[httpx.AsyncClient] = None
|
|
126
|
+
|
|
127
|
+
async def _initialize(self) -> bool:
|
|
128
|
+
"""Initialize HTTP client with HTTP/2 support."""
|
|
129
|
+
try:
|
|
130
|
+
# Create limits
|
|
131
|
+
limits = httpx.Limits(
|
|
132
|
+
max_keepalive_connections=self.config.max_connections,
|
|
133
|
+
max_connections=self.config.max_connections,
|
|
134
|
+
keepalive_expiry=300
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Create timeout
|
|
138
|
+
timeout = httpx.Timeout(self.config.timeout)
|
|
139
|
+
|
|
140
|
+
# Default headers
|
|
141
|
+
headers = {"User-Agent": self.config.user_agent}
|
|
142
|
+
|
|
143
|
+
# Create client with HTTP/2 support
|
|
144
|
+
self.client = httpx.AsyncClient(
|
|
145
|
+
limits=limits,
|
|
146
|
+
timeout=timeout,
|
|
147
|
+
headers=headers,
|
|
148
|
+
http2=self.config.http2,
|
|
149
|
+
verify=True,
|
|
150
|
+
follow_redirects=True
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
except Exception as e:
|
|
156
|
+
self.logger.error(f"HTTPx manager initialization failed: {e}")
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
async def _shutdown(self):
|
|
160
|
+
"""Shutdown HTTP client."""
|
|
161
|
+
if self.client:
|
|
162
|
+
await self.client.aclose()
|
|
163
|
+
self.client = None
|
|
164
|
+
|
|
165
|
+
async def get(self, url: str, **kwargs) -> httpx.Response:
|
|
166
|
+
"""Make GET request."""
|
|
167
|
+
if not self.client:
|
|
168
|
+
raise RuntimeError("HTTPx manager not initialized")
|
|
169
|
+
|
|
170
|
+
start_time = asyncio.get_event_loop().time()
|
|
171
|
+
success = False
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
response = await self.client.get(url, **kwargs)
|
|
175
|
+
success = True
|
|
176
|
+
return response
|
|
177
|
+
finally:
|
|
178
|
+
duration = asyncio.get_event_loop().time() - start_time
|
|
179
|
+
self.stats.record_operation(success, duration)
|
|
180
|
+
|
|
181
|
+
async def post(self, url: str, **kwargs) -> httpx.Response:
|
|
182
|
+
"""Make POST request."""
|
|
183
|
+
if not self.client:
|
|
184
|
+
raise RuntimeError("HTTPx manager not initialized")
|
|
185
|
+
|
|
186
|
+
start_time = asyncio.get_event_loop().time()
|
|
187
|
+
success = False
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
response = await self.client.post(url, **kwargs)
|
|
191
|
+
success = True
|
|
192
|
+
return response
|
|
193
|
+
finally:
|
|
194
|
+
duration = asyncio.get_event_loop().time() - start_time
|
|
195
|
+
self.stats.record_operation(success, duration)
|
|
196
|
+
|
|
197
|
+
async def request(self, method: str, url: str, **kwargs) -> httpx.Response:
|
|
198
|
+
"""Make generic request."""
|
|
199
|
+
if not self.client:
|
|
200
|
+
raise RuntimeError("HTTPx manager not initialized")
|
|
201
|
+
|
|
202
|
+
start_time = asyncio.get_event_loop().time()
|
|
203
|
+
success = False
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
response = await self.client.request(method, url, **kwargs)
|
|
207
|
+
success = True
|
|
208
|
+
return response
|
|
209
|
+
finally:
|
|
210
|
+
duration = asyncio.get_event_loop().time() - start_time
|
|
211
|
+
self.stats.record_operation(success, duration)
|
|
@@ -3,8 +3,9 @@ Clean threading manager.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
+
import inspect
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
-
from typing import Any, Callable, Optional, Dict
|
|
8
|
+
from typing import Any, Callable, Optional, Dict, Coroutine, Union
|
|
8
9
|
from pydantic import Field
|
|
9
10
|
|
|
10
11
|
from .base import BaseManager, ManagerConfig
|
|
@@ -38,12 +39,52 @@ class ThreadManager(BaseManager):
|
|
|
38
39
|
self.executor = None
|
|
39
40
|
|
|
40
41
|
async def run_in_thread(self, func: Callable, *args, **kwargs) -> Any:
|
|
41
|
-
"""Run function in thread pool."""
|
|
42
|
+
"""Run function in thread pool (supports both sync and async functions)."""
|
|
42
43
|
if not self.executor:
|
|
43
44
|
raise RuntimeError("Thread manager not initialized")
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
46
|
+
# Check if function is async
|
|
47
|
+
if inspect.iscoroutinefunction(func):
|
|
48
|
+
# For async functions, we need to run them in a new event loop in the thread
|
|
49
|
+
def run_async_in_thread():
|
|
50
|
+
# Create new event loop for this thread
|
|
51
|
+
new_loop = asyncio.new_event_loop()
|
|
52
|
+
asyncio.set_event_loop(new_loop)
|
|
53
|
+
try:
|
|
54
|
+
return new_loop.run_until_complete(func(*args, **kwargs))
|
|
55
|
+
finally:
|
|
56
|
+
new_loop.close()
|
|
57
|
+
|
|
58
|
+
loop = asyncio.get_event_loop()
|
|
59
|
+
return await loop.run_in_executor(self.executor, run_async_in_thread)
|
|
60
|
+
else:
|
|
61
|
+
# For sync functions, use normal executor
|
|
62
|
+
loop = asyncio.get_event_loop()
|
|
63
|
+
return await loop.run_in_executor(self.executor, func, *args, **kwargs)
|
|
64
|
+
|
|
65
|
+
async def run_concurrent_async(self, async_funcs: list[Callable], max_concurrent: int = None) -> list[Any]:
|
|
66
|
+
"""
|
|
67
|
+
Run multiple async functions concurrently using semaphore for control.
|
|
68
|
+
|
|
69
|
+
This is more efficient than ThreadManager for pure async operations,
|
|
70
|
+
but provides controlled concurrency.
|
|
71
|
+
"""
|
|
72
|
+
if not async_funcs:
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
# Use max_workers as default concurrency limit
|
|
76
|
+
max_concurrent = max_concurrent or self.config.max_workers
|
|
77
|
+
|
|
78
|
+
# Create semaphore to limit concurrency
|
|
79
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
80
|
+
|
|
81
|
+
async def run_with_semaphore(func):
|
|
82
|
+
async with semaphore:
|
|
83
|
+
return await func()
|
|
84
|
+
|
|
85
|
+
# Execute all functions concurrently with semaphore control
|
|
86
|
+
tasks = [run_with_semaphore(func) for func in async_funcs]
|
|
87
|
+
return await asyncio.gather(*tasks, return_exceptions=True)
|
|
47
88
|
|
|
48
89
|
async def _health_check(self) -> Dict[str, Any]:
|
|
49
90
|
"""Thread manager health check."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|