webscout 8.2.8__py3-none-any.whl → 8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (197) hide show
  1. webscout/AIauto.py +34 -16
  2. webscout/AIbase.py +96 -37
  3. webscout/AIutel.py +491 -87
  4. webscout/Bard.py +441 -323
  5. webscout/Extra/GitToolkit/__init__.py +10 -10
  6. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  7. webscout/Litlogger/README.md +10 -0
  8. webscout/Litlogger/__init__.py +7 -59
  9. webscout/Litlogger/formats.py +4 -0
  10. webscout/Litlogger/handlers.py +103 -0
  11. webscout/Litlogger/levels.py +13 -0
  12. webscout/Litlogger/logger.py +92 -0
  13. webscout/Provider/AISEARCH/Perplexity.py +332 -358
  14. webscout/Provider/AISEARCH/felo_search.py +9 -35
  15. webscout/Provider/AISEARCH/genspark_search.py +30 -56
  16. webscout/Provider/AISEARCH/hika_search.py +4 -16
  17. webscout/Provider/AISEARCH/iask_search.py +410 -436
  18. webscout/Provider/AISEARCH/monica_search.py +4 -30
  19. webscout/Provider/AISEARCH/scira_search.py +6 -32
  20. webscout/Provider/AISEARCH/webpilotai_search.py +38 -64
  21. webscout/Provider/Blackboxai.py +155 -35
  22. webscout/Provider/ChatSandbox.py +2 -1
  23. webscout/Provider/Deepinfra.py +339 -339
  24. webscout/Provider/ExaChat.py +358 -358
  25. webscout/Provider/Gemini.py +169 -169
  26. webscout/Provider/GithubChat.py +1 -2
  27. webscout/Provider/Glider.py +3 -3
  28. webscout/Provider/HeckAI.py +172 -82
  29. webscout/Provider/LambdaChat.py +1 -0
  30. webscout/Provider/MCPCore.py +7 -3
  31. webscout/Provider/OPENAI/BLACKBOXAI.py +421 -139
  32. webscout/Provider/OPENAI/Cloudflare.py +38 -21
  33. webscout/Provider/OPENAI/FalconH1.py +457 -0
  34. webscout/Provider/OPENAI/FreeGemini.py +35 -18
  35. webscout/Provider/OPENAI/NEMOTRON.py +34 -34
  36. webscout/Provider/OPENAI/PI.py +427 -0
  37. webscout/Provider/OPENAI/Qwen3.py +304 -0
  38. webscout/Provider/OPENAI/README.md +952 -1253
  39. webscout/Provider/OPENAI/TwoAI.py +374 -0
  40. webscout/Provider/OPENAI/__init__.py +7 -1
  41. webscout/Provider/OPENAI/ai4chat.py +73 -63
  42. webscout/Provider/OPENAI/api.py +869 -644
  43. webscout/Provider/OPENAI/base.py +2 -0
  44. webscout/Provider/OPENAI/c4ai.py +34 -13
  45. webscout/Provider/OPENAI/chatgpt.py +575 -556
  46. webscout/Provider/OPENAI/chatgptclone.py +512 -487
  47. webscout/Provider/OPENAI/chatsandbox.py +11 -6
  48. webscout/Provider/OPENAI/copilot.py +258 -0
  49. webscout/Provider/OPENAI/deepinfra.py +327 -318
  50. webscout/Provider/OPENAI/e2b.py +140 -104
  51. webscout/Provider/OPENAI/exaai.py +420 -411
  52. webscout/Provider/OPENAI/exachat.py +448 -443
  53. webscout/Provider/OPENAI/flowith.py +7 -3
  54. webscout/Provider/OPENAI/freeaichat.py +12 -8
  55. webscout/Provider/OPENAI/glider.py +15 -8
  56. webscout/Provider/OPENAI/groq.py +5 -2
  57. webscout/Provider/OPENAI/heckai.py +311 -307
  58. webscout/Provider/OPENAI/llmchatco.py +9 -7
  59. webscout/Provider/OPENAI/mcpcore.py +18 -9
  60. webscout/Provider/OPENAI/multichat.py +7 -5
  61. webscout/Provider/OPENAI/netwrck.py +16 -11
  62. webscout/Provider/OPENAI/oivscode.py +290 -0
  63. webscout/Provider/OPENAI/opkfc.py +507 -496
  64. webscout/Provider/OPENAI/pydantic_imports.py +172 -0
  65. webscout/Provider/OPENAI/scirachat.py +29 -17
  66. webscout/Provider/OPENAI/sonus.py +308 -303
  67. webscout/Provider/OPENAI/standardinput.py +442 -433
  68. webscout/Provider/OPENAI/textpollinations.py +18 -11
  69. webscout/Provider/OPENAI/toolbaz.py +419 -413
  70. webscout/Provider/OPENAI/typefully.py +17 -10
  71. webscout/Provider/OPENAI/typegpt.py +21 -11
  72. webscout/Provider/OPENAI/uncovrAI.py +477 -462
  73. webscout/Provider/OPENAI/utils.py +90 -79
  74. webscout/Provider/OPENAI/venice.py +435 -425
  75. webscout/Provider/OPENAI/wisecat.py +387 -381
  76. webscout/Provider/OPENAI/writecream.py +166 -163
  77. webscout/Provider/OPENAI/x0gpt.py +26 -37
  78. webscout/Provider/OPENAI/yep.py +384 -356
  79. webscout/Provider/PI.py +2 -1
  80. webscout/Provider/TTI/README.md +55 -101
  81. webscout/Provider/TTI/__init__.py +4 -9
  82. webscout/Provider/TTI/aiarta.py +365 -0
  83. webscout/Provider/TTI/artbit.py +0 -0
  84. webscout/Provider/TTI/base.py +64 -0
  85. webscout/Provider/TTI/fastflux.py +200 -0
  86. webscout/Provider/TTI/magicstudio.py +201 -0
  87. webscout/Provider/TTI/piclumen.py +203 -0
  88. webscout/Provider/TTI/pixelmuse.py +225 -0
  89. webscout/Provider/TTI/pollinations.py +221 -0
  90. webscout/Provider/TTI/utils.py +11 -0
  91. webscout/Provider/TTS/__init__.py +2 -1
  92. webscout/Provider/TTS/base.py +159 -159
  93. webscout/Provider/TTS/openai_fm.py +129 -0
  94. webscout/Provider/TextPollinationsAI.py +308 -308
  95. webscout/Provider/TwoAI.py +239 -44
  96. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  97. webscout/Provider/UNFINISHED/puterjs.py +635 -0
  98. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  99. webscout/Provider/Writecream.py +246 -246
  100. webscout/Provider/__init__.py +2 -2
  101. webscout/Provider/ai4chat.py +33 -8
  102. webscout/Provider/granite.py +41 -6
  103. webscout/Provider/koala.py +169 -169
  104. webscout/Provider/oivscode.py +309 -0
  105. webscout/Provider/samurai.py +3 -2
  106. webscout/Provider/scnet.py +1 -0
  107. webscout/Provider/typegpt.py +3 -3
  108. webscout/Provider/uncovr.py +368 -368
  109. webscout/client.py +70 -0
  110. webscout/litprinter/__init__.py +58 -58
  111. webscout/optimizers.py +419 -419
  112. webscout/scout/README.md +3 -1
  113. webscout/scout/core/crawler.py +134 -64
  114. webscout/scout/core/scout.py +148 -109
  115. webscout/scout/element.py +106 -88
  116. webscout/swiftcli/Readme.md +323 -323
  117. webscout/swiftcli/plugins/manager.py +9 -2
  118. webscout/version.py +1 -1
  119. webscout/zeroart/__init__.py +134 -134
  120. webscout/zeroart/effects.py +100 -100
  121. webscout/zeroart/fonts.py +1238 -1238
  122. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/METADATA +160 -35
  123. webscout-8.3.dist-info/RECORD +290 -0
  124. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/WHEEL +1 -1
  125. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/entry_points.txt +1 -0
  126. webscout/Litlogger/Readme.md +0 -175
  127. webscout/Litlogger/core/__init__.py +0 -6
  128. webscout/Litlogger/core/level.py +0 -23
  129. webscout/Litlogger/core/logger.py +0 -165
  130. webscout/Litlogger/handlers/__init__.py +0 -12
  131. webscout/Litlogger/handlers/console.py +0 -33
  132. webscout/Litlogger/handlers/file.py +0 -143
  133. webscout/Litlogger/handlers/network.py +0 -173
  134. webscout/Litlogger/styles/__init__.py +0 -7
  135. webscout/Litlogger/styles/colors.py +0 -249
  136. webscout/Litlogger/styles/formats.py +0 -458
  137. webscout/Litlogger/styles/text.py +0 -87
  138. webscout/Litlogger/utils/__init__.py +0 -6
  139. webscout/Litlogger/utils/detectors.py +0 -153
  140. webscout/Litlogger/utils/formatters.py +0 -200
  141. webscout/Provider/ChatGPTGratis.py +0 -194
  142. webscout/Provider/TTI/AiForce/README.md +0 -159
  143. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  144. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  145. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  146. webscout/Provider/TTI/FreeAIPlayground/README.md +0 -99
  147. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  148. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  149. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  150. webscout/Provider/TTI/ImgSys/README.md +0 -174
  151. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  152. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  153. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  154. webscout/Provider/TTI/MagicStudio/README.md +0 -101
  155. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  156. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  157. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  158. webscout/Provider/TTI/Nexra/README.md +0 -155
  159. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  160. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  161. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  162. webscout/Provider/TTI/PollinationsAI/README.md +0 -146
  163. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  164. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  165. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  166. webscout/Provider/TTI/aiarta/README.md +0 -134
  167. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  168. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  169. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  170. webscout/Provider/TTI/artbit/README.md +0 -100
  171. webscout/Provider/TTI/artbit/__init__.py +0 -22
  172. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  173. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  174. webscout/Provider/TTI/fastflux/README.md +0 -129
  175. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  176. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  177. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  178. webscout/Provider/TTI/huggingface/README.md +0 -114
  179. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  180. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  181. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  182. webscout/Provider/TTI/piclumen/README.md +0 -161
  183. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  184. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  185. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  186. webscout/Provider/TTI/pixelmuse/README.md +0 -79
  187. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  188. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  189. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  190. webscout/Provider/TTI/talkai/README.md +0 -139
  191. webscout/Provider/TTI/talkai/__init__.py +0 -4
  192. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  193. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  194. webscout/Provider/UNFINISHED/oivscode.py +0 -351
  195. webscout-8.2.8.dist-info/RECORD +0 -334
  196. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/licenses/LICENSE.md +0 -0
  197. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/top_level.txt +0 -0
webscout/scout/README.md CHANGED
@@ -148,6 +148,7 @@ Scout provides powerful tools for navigating and manipulating HTML/XML documents
148
148
  - **Tree Traversal**: Navigate parent-child relationships and sibling elements
149
149
  - **Content Extraction**: Extract text, attributes, and structured data
150
150
  - **Document Manipulation**: Modify, replace, or remove elements
151
+ - **Dynamic Building**: Easily append or insert new nodes
151
152
 
152
153
  ```python
153
154
  # CSS selector support
@@ -159,6 +160,7 @@ results = scout.find_all('a', attrs={'class': 'external', 'rel': 'nofollow'})
159
160
  # Tree traversal
160
161
  parent = element.find_parent('div')
161
162
  siblings = element.find_next_siblings('p')
163
+ prev_sibling = element.find_previous_sibling('p')
162
164
  ```
163
165
 
164
166
  ### 🧠 Intelligent Analysis
@@ -363,7 +365,7 @@ For detailed API documentation, please refer to the [documentation](https://gith
363
365
 
364
366
  ## 🔧 Dependencies
365
367
 
366
- - `requests`: HTTP library for making web requests
368
+ - `curl_cffi`: HTTP library used for web requests
367
369
  - `lxml`: XML and HTML processing library (optional, recommended)
368
370
  - `html5lib`: Standards-compliant HTML parser (optional)
369
371
  - `markdownify`: HTML to Markdown conversion
@@ -4,19 +4,26 @@ Scout Crawler Module
4
4
 
5
5
  import concurrent.futures
6
6
  import urllib.parse
7
- from typing import Union, List, Dict
8
- import requests
7
+ import time
8
+ import hashlib
9
+ import re
10
+ from urllib import robotparser
11
+ from datetime import datetime
12
+ from typing import Dict, List, Optional, Union
13
+ from webscout.litagent import LitAgent
14
+ from curl_cffi.requests import Session
9
15
 
10
16
  from .scout import Scout
11
17
 
18
+
12
19
  class ScoutCrawler:
13
20
  """
14
21
  Advanced web crawling utility for Scout library.
15
22
  """
16
- def __init__(self, base_url: str, max_pages: int = 50, tags_to_remove: List[str] = None):
23
+ def __init__(self, base_url: str, max_pages: int = 50, tags_to_remove: List[str] = None, session: Optional[Session] = None, delay: float = 0.5, obey_robots: bool = True, allowed_domains: Optional[List[str]] = None):
17
24
  """
18
25
  Initialize the web crawler.
19
-
26
+
20
27
  Args:
21
28
  base_url (str): Starting URL to crawl
22
29
  max_pages (int, optional): Maximum number of pages to crawl
@@ -24,117 +31,180 @@ class ScoutCrawler:
24
31
  """
25
32
  self.base_url = base_url
26
33
  self.max_pages = max_pages
27
- self.tags_to_remove = tags_to_remove if tags_to_remove is not None else ["script", "style", "header", "footer", "nav", "aside", "form", "button"]
34
+ self.tags_to_remove = tags_to_remove if tags_to_remove is not None else [
35
+ "script",
36
+ "style",
37
+ "header",
38
+ "footer",
39
+ "nav",
40
+ "aside",
41
+ "form",
42
+ "button",
43
+ ]
28
44
  self.visited_urls = set()
29
45
  self.crawled_pages = []
30
-
46
+ self.session = session or Session()
47
+ self.agent = LitAgent()
48
+ # Use all headers and generate fingerprint
49
+ self.session.headers = self.agent.generate_fingerprint()
50
+ self.session.headers.setdefault("User-Agent", self.agent.chrome())
51
+ self.delay = delay
52
+ self.obey_robots = obey_robots
53
+ self.allowed_domains = allowed_domains or [urllib.parse.urlparse(base_url).netloc]
54
+ self.last_request_time = 0
55
+ self.url_hashes = set()
56
+ if obey_robots:
57
+ self.robots = robotparser.RobotFileParser()
58
+ robots_url = urllib.parse.urljoin(base_url, '/robots.txt')
59
+ try:
60
+ self.robots.set_url(robots_url)
61
+ self.robots.read()
62
+ except Exception:
63
+ self.robots = None
64
+ else:
65
+ self.robots = None
66
+
67
+ def _normalize_url(self, url: str) -> str:
68
+ url = url.split('#')[0]
69
+ url = re.sub(r'\?.*$', '', url) # Remove query params
70
+ return url.rstrip('/')
71
+
31
72
  def _is_valid_url(self, url: str) -> bool:
32
73
  """
33
74
  Check if a URL is valid and within the same domain.
34
-
75
+
35
76
  Args:
36
77
  url (str): URL to validate
37
-
78
+
38
79
  Returns:
39
80
  bool: Whether the URL is valid
40
81
  """
41
82
  try:
42
83
  parsed_base = urllib.parse.urlparse(self.base_url)
43
84
  parsed_url = urllib.parse.urlparse(url)
44
-
45
- return (
46
- parsed_url.scheme in ['http', 'https'] and
47
- parsed_base.netloc == parsed_url.netloc and
48
- len(self.visited_urls) < self.max_pages
49
- )
85
+ if parsed_url.scheme not in ["http", "https"]:
86
+ return False
87
+ if parsed_url.netloc not in self.allowed_domains:
88
+ return False
89
+ if self.obey_robots and self.robots:
90
+ return self.robots.can_fetch("*", url)
91
+ return True
50
92
  except Exception:
51
93
  return False
52
-
94
+
95
+ def _is_duplicate(self, url: str) -> bool:
96
+ norm = self._normalize_url(url)
97
+ url_hash = hashlib.md5(norm.encode()).hexdigest()
98
+ if url_hash in self.url_hashes:
99
+ return True
100
+ self.url_hashes.add(url_hash)
101
+ return False
102
+
103
+ def _extract_main_text(self, soup):
104
+ # Try to extract main content (simple heuristic)
105
+ main = soup.find('main')
106
+ if main:
107
+ return main.get_text(separator=" ", strip=True)
108
+ article = soup.find('article')
109
+ if article:
110
+ return article.get_text(separator=" ", strip=True)
111
+ # fallback to body
112
+ body = soup.find('body')
113
+ if body:
114
+ return body.get_text(separator=" ", strip=True)
115
+ return soup.get_text(separator=" ", strip=True)
116
+
53
117
  def _crawl_page(self, url: str, depth: int = 0) -> Dict[str, Union[str, List[str]]]:
54
118
  """
55
119
  Crawl a single page and extract information.
56
-
120
+
57
121
  Args:
58
122
  url (str): URL to crawl
59
123
  depth (int, optional): Current crawl depth
60
-
124
+
61
125
  Returns:
62
126
  Dict[str, Union[str, List[str]]]: Crawled page information
63
127
  """
64
- if url in self.visited_urls:
128
+ if url in self.visited_urls or self._is_duplicate(url):
65
129
  return {}
66
-
130
+ # Throttle requests
131
+ now = time.time()
132
+ if self.last_request_time:
133
+ elapsed = now - self.last_request_time
134
+ if elapsed < self.delay:
135
+ time.sleep(self.delay - elapsed)
136
+ self.last_request_time = time.time()
67
137
  try:
68
- response = requests.get(url, timeout=10)
138
+ response = self.session.get(url, timeout=10)
69
139
  response.raise_for_status()
70
-
71
- scout = Scout(response.content, features='lxml')
72
-
73
- title_result = scout.find('title')
74
- title = title_result[0].get_text() if title_result else ''
75
-
76
- visible_text = scout._soup.get_text(strip=True)
77
-
78
- for tag in scout._soup(self.tags_to_remove):
79
- tag.extract()
80
-
140
+ if not response.headers.get('Content-Type', '').startswith('text/html'):
141
+ return {}
142
+ scout = Scout(response.content, features="lxml")
143
+ title_result = scout.find("title")
144
+ title = title_result[0].get_text() if title_result else ""
145
+ for tag_name in self.tags_to_remove:
146
+ for tag in scout._soup.find_all(tag_name):
147
+ tag.extract()
148
+ visible_text = self._extract_main_text(scout._soup)
81
149
  page_info = {
82
150
  'url': url,
83
151
  'title': title,
84
152
  'links': [
85
- urllib.parse.urljoin(url, link.get('href'))
86
- for link in scout.find_all('a', href=True)
153
+ urllib.parse.urljoin(url, link.get('href'))
154
+ for link in scout.find_all('a', href=True)
87
155
  if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
88
156
  ],
89
157
  'text': visible_text,
90
- 'depth': depth
158
+ 'depth': depth,
159
+ 'timestamp': datetime.utcnow().isoformat(),
160
+ 'headers': dict(response.headers),
91
161
  }
92
-
93
162
  self.visited_urls.add(url)
94
163
  self.crawled_pages.append(page_info)
95
-
96
164
  return page_info
97
165
  except Exception as e:
98
166
  print(f"Error crawling {url}: {e}")
99
167
  return {}
100
-
101
- def crawl(self) -> List[Dict[str, Union[str, List[str]]]]:
168
+
169
+ def crawl(self):
102
170
  """
103
- Start web crawling from base URL.
104
-
105
- Returns:
106
- List[Dict[str, Union[str, List[str]]]]: List of crawled pages
171
+ Start web crawling from base URL and yield each crawled page in real time.
172
+
173
+ Yields:
174
+ Dict[str, Union[str, List[str]]]: Crawled page information
107
175
  """
108
176
  with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
109
177
  futures = {executor.submit(self._crawl_page, self.base_url, 0)}
110
-
178
+ submitted_links: set[str] = set()
179
+
111
180
  while futures:
112
- done, futures = concurrent.futures.wait(
181
+ if len(self.visited_urls) >= self.max_pages:
182
+ break
183
+ done, not_done = concurrent.futures.wait(
113
184
  futures, return_when=concurrent.futures.FIRST_COMPLETED
114
185
  )
115
-
186
+ futures = not_done
187
+
116
188
  for future in done:
117
189
  page_info = future.result()
118
-
190
+
191
+ if page_info:
192
+ yield page_info
193
+
119
194
  if len(self.visited_urls) >= self.max_pages:
120
- break
121
-
122
- submitted_links = set() # New set to track submitted links
123
- for link in page_info.get('links', []):
195
+ return
196
+
197
+ for link in page_info.get("links", []):
124
198
  if (
125
- len(self.visited_urls) < self.max_pages and
126
- link not in self.visited_urls
199
+ len(self.visited_urls) < self.max_pages
200
+ and link not in self.visited_urls
201
+ and link not in submitted_links
127
202
  ):
128
- if link not in submitted_links: # Check against submitted links
129
- submitted_links.add(link) # Add to submitted links
130
- futures.add(
131
- executor.submit(
132
- self._crawl_page,
133
- link,
134
- page_info.get('depth', 0) + 1
135
- )
203
+ submitted_links.add(link)
204
+ futures.add(
205
+ executor.submit(
206
+ self._crawl_page,
207
+ link,
208
+ page_info.get("depth", 0) + 1,
136
209
  )
137
- if len(self.visited_urls) >= self.max_pages:
138
- break
139
-
140
- return self.crawled_pages
210
+ )