webscout 8.3.5__py3-none-any.whl → 8.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Bard.py +12 -6
- webscout/DWEBS.py +66 -57
- webscout/Provider/{UNFINISHED → AISEARCH}/PERPLEXED_search.py +34 -74
- webscout/Provider/AISEARCH/__init__.py +1 -1
- webscout/Provider/Deepinfra.py +6 -0
- webscout/Provider/Flowith.py +6 -1
- webscout/Provider/GithubChat.py +1 -0
- webscout/Provider/GptOss.py +207 -0
- webscout/Provider/Kimi.py +445 -0
- webscout/Provider/Netwrck.py +3 -6
- webscout/Provider/OPENAI/README.md +2 -1
- webscout/Provider/OPENAI/TogetherAI.py +50 -55
- webscout/Provider/OPENAI/__init__.py +4 -2
- webscout/Provider/OPENAI/copilot.py +20 -4
- webscout/Provider/OPENAI/deepinfra.py +6 -0
- webscout/Provider/OPENAI/e2b.py +60 -8
- webscout/Provider/OPENAI/flowith.py +4 -3
- webscout/Provider/OPENAI/generate_api_key.py +48 -0
- webscout/Provider/OPENAI/gptoss.py +288 -0
- webscout/Provider/OPENAI/kimi.py +469 -0
- webscout/Provider/OPENAI/netwrck.py +8 -12
- webscout/Provider/OPENAI/refact.py +274 -0
- webscout/Provider/OPENAI/textpollinations.py +3 -6
- webscout/Provider/OPENAI/toolbaz.py +1 -0
- webscout/Provider/TTI/bing.py +14 -2
- webscout/Provider/TTI/together.py +10 -9
- webscout/Provider/TTS/README.md +0 -1
- webscout/Provider/TTS/__init__.py +0 -1
- webscout/Provider/TTS/base.py +479 -159
- webscout/Provider/TTS/deepgram.py +409 -156
- webscout/Provider/TTS/elevenlabs.py +425 -111
- webscout/Provider/TTS/freetts.py +317 -140
- webscout/Provider/TTS/gesserit.py +192 -128
- webscout/Provider/TTS/murfai.py +248 -113
- webscout/Provider/TTS/openai_fm.py +347 -129
- webscout/Provider/TTS/speechma.py +620 -586
- webscout/Provider/TextPollinationsAI.py +3 -6
- webscout/Provider/TogetherAI.py +50 -55
- webscout/Provider/UNFINISHED/VercelAIGateway.py +339 -0
- webscout/Provider/__init__.py +2 -90
- webscout/Provider/cerebras.py +83 -33
- webscout/Provider/copilot.py +42 -23
- webscout/Provider/toolbaz.py +1 -0
- webscout/conversation.py +22 -20
- webscout/sanitize.py +14 -10
- webscout/scout/README.md +20 -23
- webscout/scout/core/crawler.py +125 -38
- webscout/scout/core/scout.py +26 -5
- webscout/version.py +1 -1
- webscout/webscout_search.py +13 -6
- webscout/webscout_search_async.py +10 -8
- webscout/yep_search.py +13 -5
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/METADATA +2 -1
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/RECORD +59 -56
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/OPENAI/c4ai.py +0 -394
- webscout/Provider/OPENAI/glider.py +0 -330
- webscout/Provider/TTS/sthir.py +0 -94
- /webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/WHEEL +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.5.dist-info → webscout-8.3.6.dist-info}/top_level.txt +0 -0
webscout/scout/core/crawler.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Scout Crawler Module
|
|
2
|
+
Scout Crawler Module - Ultra Advanced Web Crawling System
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import concurrent.futures
|
|
@@ -7,18 +7,82 @@ import urllib.parse
|
|
|
7
7
|
import time
|
|
8
8
|
import hashlib
|
|
9
9
|
import re
|
|
10
|
+
import json
|
|
11
|
+
import sqlite3
|
|
12
|
+
import threading
|
|
13
|
+
import queue
|
|
14
|
+
import logging
|
|
15
|
+
import mimetypes
|
|
16
|
+
import pickle
|
|
17
|
+
import asyncio
|
|
18
|
+
import aiohttp
|
|
19
|
+
import random
|
|
10
20
|
from urllib import robotparser
|
|
11
|
-
from datetime import datetime
|
|
12
|
-
from typing import Dict, List, Optional, Union
|
|
13
|
-
from
|
|
14
|
-
from
|
|
21
|
+
from datetime import datetime, timedelta
|
|
22
|
+
from typing import Dict, List, Optional, Union, Set, Tuple, Callable, Any
|
|
23
|
+
from collections import defaultdict, deque
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from enum import Enum
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
from webscout.litagent import LitAgent
|
|
30
|
+
except ImportError:
|
|
31
|
+
LitAgent = None
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from curl_cffi.requests import Session
|
|
35
|
+
except ImportError:
|
|
36
|
+
import requests
|
|
37
|
+
Session = requests.Session
|
|
15
38
|
|
|
16
39
|
from .scout import Scout
|
|
40
|
+
from .text_analyzer import ScoutTextAnalyzer
|
|
41
|
+
|
|
17
42
|
|
|
43
|
+
@dataclass
|
|
44
|
+
class CrawlConfig:
|
|
45
|
+
"""Configuration for the crawler."""
|
|
46
|
+
max_pages: int = 1000
|
|
47
|
+
max_depth: int = 10
|
|
48
|
+
delay: float = 0.5
|
|
49
|
+
obey_robots: bool = True
|
|
50
|
+
crawl_subdomains: bool = True
|
|
51
|
+
max_workers: int = 10
|
|
52
|
+
timeout: int = 30
|
|
53
|
+
retry_attempts: int = 3
|
|
54
|
+
include_external_links: bool = False
|
|
55
|
+
extract_metadata: bool = True
|
|
56
|
+
extract_structured_data: bool = True
|
|
57
|
+
extract_semantic_content: bool = True
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class PageData:
|
|
62
|
+
"""Comprehensive page data for LLM training."""
|
|
63
|
+
url: str
|
|
64
|
+
title: str
|
|
65
|
+
text: str
|
|
66
|
+
clean_text: str
|
|
67
|
+
markdown_text: str
|
|
68
|
+
links: List[str]
|
|
69
|
+
internal_links: List[str]
|
|
70
|
+
external_links: List[str]
|
|
71
|
+
metadata: Dict[str, Any]
|
|
72
|
+
structured_data: Dict[str, Any]
|
|
73
|
+
semantic_content: Dict[str, Any]
|
|
74
|
+
headers: Dict[str, str]
|
|
75
|
+
status_code: int
|
|
76
|
+
content_type: str
|
|
77
|
+
language: str
|
|
78
|
+
timestamp: str
|
|
79
|
+
depth: int
|
|
80
|
+
word_count: int
|
|
81
|
+
|
|
18
82
|
|
|
19
83
|
class ScoutCrawler:
|
|
20
84
|
"""
|
|
21
|
-
|
|
85
|
+
Ultra-advanced web crawling utility optimized for LLM data collection.
|
|
22
86
|
"""
|
|
23
87
|
def __init__(self, base_url: str, max_pages: int = 50, tags_to_remove: List[str] = None, session: Optional[Session] = None, delay: float = 0.5, obey_robots: bool = True, allowed_domains: Optional[List[str]] = None):
|
|
24
88
|
"""
|
|
@@ -33,13 +97,7 @@ class ScoutCrawler:
|
|
|
33
97
|
self.max_pages = max_pages
|
|
34
98
|
self.tags_to_remove = tags_to_remove if tags_to_remove is not None else [
|
|
35
99
|
"script",
|
|
36
|
-
"style"
|
|
37
|
-
"header",
|
|
38
|
-
"footer",
|
|
39
|
-
"nav",
|
|
40
|
-
"aside",
|
|
41
|
-
"form",
|
|
42
|
-
"button",
|
|
100
|
+
"style"
|
|
43
101
|
]
|
|
44
102
|
self.visited_urls = set()
|
|
45
103
|
self.crawled_pages = []
|
|
@@ -50,7 +108,10 @@ class ScoutCrawler:
|
|
|
50
108
|
self.session.headers.setdefault("User-Agent", self.agent.chrome())
|
|
51
109
|
self.delay = delay
|
|
52
110
|
self.obey_robots = obey_robots
|
|
53
|
-
|
|
111
|
+
# Allow crawling of subdomains by default
|
|
112
|
+
base_domain = urllib.parse.urlparse(base_url).netloc.split('.')
|
|
113
|
+
self.base_domain = '.'.join(base_domain[-2:]) if len(base_domain) > 1 else base_domain[0]
|
|
114
|
+
self.allowed_domains = allowed_domains or [self.base_domain]
|
|
54
115
|
self.last_request_time = 0
|
|
55
116
|
self.url_hashes = set()
|
|
56
117
|
if obey_robots:
|
|
@@ -84,7 +145,8 @@ class ScoutCrawler:
|
|
|
84
145
|
parsed_url = urllib.parse.urlparse(url)
|
|
85
146
|
if parsed_url.scheme not in ["http", "https"]:
|
|
86
147
|
return False
|
|
87
|
-
|
|
148
|
+
# Allow crawling subdomains
|
|
149
|
+
if not parsed_url.netloc.endswith(self.base_domain):
|
|
88
150
|
return False
|
|
89
151
|
if self.obey_robots and self.robots:
|
|
90
152
|
return self.robots.can_fetch("*", url)
|
|
@@ -127,6 +189,9 @@ class ScoutCrawler:
|
|
|
127
189
|
"""
|
|
128
190
|
if url in self.visited_urls or self._is_duplicate(url):
|
|
129
191
|
return {}
|
|
192
|
+
# Log URL to crawl
|
|
193
|
+
print(f"Attempting to crawl URL: {url} (depth: {depth})")
|
|
194
|
+
|
|
130
195
|
# Throttle requests
|
|
131
196
|
now = time.time()
|
|
132
197
|
if self.last_request_time:
|
|
@@ -142,18 +207,38 @@ class ScoutCrawler:
|
|
|
142
207
|
scout = Scout(response.content, features="lxml")
|
|
143
208
|
title_result = scout.find("title")
|
|
144
209
|
title = title_result[0].get_text() if title_result else ""
|
|
210
|
+
|
|
211
|
+
# Remove only script and style tags before extracting text
|
|
145
212
|
for tag_name in self.tags_to_remove:
|
|
146
213
|
for tag in scout._soup.find_all(tag_name):
|
|
147
|
-
tag.
|
|
214
|
+
tag.decompose()
|
|
215
|
+
|
|
148
216
|
visible_text = self._extract_main_text(scout._soup)
|
|
217
|
+
|
|
218
|
+
# Extract links from header, footer, nav, etc.
|
|
219
|
+
essential_links = []
|
|
220
|
+
for essential_tag in ['header', 'nav', 'footer']:
|
|
221
|
+
elements = scout.find_all(essential_tag)
|
|
222
|
+
for element in elements:
|
|
223
|
+
links = element.find_all('a', href=True)
|
|
224
|
+
essential_links.extend(
|
|
225
|
+
urllib.parse.urljoin(url, link.get('href'))
|
|
226
|
+
for link in links
|
|
227
|
+
if link.get('href') and self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
all_links = [
|
|
231
|
+
urllib.parse.urljoin(url, link.get('href'))
|
|
232
|
+
for link in scout.find_all('a', href=True)
|
|
233
|
+
if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
combined_links = list(set(all_links + essential_links))
|
|
237
|
+
|
|
149
238
|
page_info = {
|
|
150
239
|
'url': url,
|
|
151
240
|
'title': title,
|
|
152
|
-
'links':
|
|
153
|
-
urllib.parse.urljoin(url, link.get('href'))
|
|
154
|
-
for link in scout.find_all('a', href=True)
|
|
155
|
-
if self._is_valid_url(urllib.parse.urljoin(url, link.get('href')))
|
|
156
|
-
],
|
|
241
|
+
'links': combined_links,
|
|
157
242
|
'text': visible_text,
|
|
158
243
|
'depth': depth,
|
|
159
244
|
'timestamp': datetime.utcnow().isoformat(),
|
|
@@ -178,7 +263,7 @@ class ScoutCrawler:
|
|
|
178
263
|
submitted_links: set[str] = set()
|
|
179
264
|
|
|
180
265
|
while futures:
|
|
181
|
-
if len(self.visited_urls) >= self.max_pages:
|
|
266
|
+
if self.max_pages is not None and len(self.visited_urls) >= self.max_pages:
|
|
182
267
|
break
|
|
183
268
|
done, not_done = concurrent.futures.wait(
|
|
184
269
|
futures, return_when=concurrent.futures.FIRST_COMPLETED
|
|
@@ -190,21 +275,23 @@ class ScoutCrawler:
|
|
|
190
275
|
|
|
191
276
|
if page_info:
|
|
192
277
|
yield page_info
|
|
278
|
+
|
|
279
|
+
if self.max_pages is not None and len(self.visited_urls) >= self.max_pages:
|
|
280
|
+
return
|
|
193
281
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
link,
|
|
208
|
-
page_info.get("depth", 0) + 1,
|
|
282
|
+
for link in page_info.get("links", []):
|
|
283
|
+
if (
|
|
284
|
+
(self.max_pages is None or len(self.visited_urls) < self.max_pages)
|
|
285
|
+
and link not in self.visited_urls
|
|
286
|
+
and link not in submitted_links
|
|
287
|
+
):
|
|
288
|
+
submitted_links.add(link)
|
|
289
|
+
futures.add(
|
|
290
|
+
executor.submit(
|
|
291
|
+
self._crawl_page,
|
|
292
|
+
link,
|
|
293
|
+
page_info.get("depth", 0) + 1,
|
|
294
|
+
)
|
|
209
295
|
)
|
|
210
|
-
|
|
296
|
+
else:
|
|
297
|
+
print(f"No page info retrieved from crawling")
|
webscout/scout/core/scout.py
CHANGED
|
@@ -24,7 +24,8 @@ class Scout:
|
|
|
24
24
|
Enhanced with advanced features and intelligent parsing.
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
-
def __init__(self, markup="", features='html.parser', from_encoding=None,
|
|
27
|
+
def __init__(self, markup="", features='html.parser', from_encoding=None,
|
|
28
|
+
exclude_encodings=None, element_classes=None, **kwargs):
|
|
28
29
|
"""
|
|
29
30
|
Initialize Scout with HTML content.
|
|
30
31
|
|
|
@@ -32,8 +33,17 @@ class Scout:
|
|
|
32
33
|
markup (str): HTML content to parse
|
|
33
34
|
features (str): Parser to use ('html.parser', 'lxml', 'html5lib', 'lxml-xml')
|
|
34
35
|
from_encoding (str): Source encoding (if known)
|
|
36
|
+
exclude_encodings (list): Encodings to avoid
|
|
37
|
+
element_classes (dict): Custom classes for different element types
|
|
35
38
|
**kwargs: Additional parsing options
|
|
36
39
|
"""
|
|
40
|
+
# Store original markup and settings
|
|
41
|
+
self.original_encoding = from_encoding
|
|
42
|
+
self.exclude_encodings = exclude_encodings or []
|
|
43
|
+
self.element_classes = element_classes or {}
|
|
44
|
+
self.builder_features = features
|
|
45
|
+
self.contains_replacement_characters = False
|
|
46
|
+
|
|
37
47
|
# Intelligent markup handling
|
|
38
48
|
self.markup = self._preprocess_markup(markup, from_encoding)
|
|
39
49
|
self.features = features
|
|
@@ -50,13 +60,24 @@ class Scout:
|
|
|
50
60
|
|
|
51
61
|
# Parse that HTML! 🎯
|
|
52
62
|
self._soup = self.parser.parse(self.markup)
|
|
53
|
-
|
|
63
|
+
|
|
64
|
+
# Set up the root element properly
|
|
65
|
+
if hasattr(self._soup, 'name'):
|
|
66
|
+
self.name = self._soup.name
|
|
67
|
+
else:
|
|
68
|
+
self.name = '[document]'
|
|
69
|
+
|
|
54
70
|
# BeautifulSoup-like attributes
|
|
55
|
-
self.name = self._soup.name if hasattr(self._soup, 'name') else None
|
|
56
71
|
self.attrs = self._soup.attrs if hasattr(self._soup, 'attrs') else {}
|
|
57
|
-
|
|
58
|
-
|
|
72
|
+
self.contents = self._soup.contents if hasattr(self._soup, 'contents') else []
|
|
73
|
+
self.parent = None
|
|
74
|
+
self.next_sibling = None
|
|
75
|
+
self.previous_sibling = None
|
|
76
|
+
|
|
77
|
+
# Advanced parsing options and caching
|
|
59
78
|
self._cache = {}
|
|
79
|
+
self._tag_name_cache = {}
|
|
80
|
+
self._css_selector_cache = {}
|
|
60
81
|
|
|
61
82
|
# Text and web analyzers
|
|
62
83
|
self.text_analyzer = ScoutTextAnalyzer()
|
webscout/version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "8.3.
|
|
1
|
+
__version__ = "8.3.6"
|
|
2
2
|
__prog__ = "webscout"
|
webscout/webscout_search.py
CHANGED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
# import logging
|
|
4
4
|
import json
|
|
5
|
-
|
|
5
|
+
import os
|
|
6
6
|
import warnings
|
|
7
7
|
from concurrent.futures import ThreadPoolExecutor
|
|
8
8
|
from datetime import datetime, timezone
|
|
@@ -13,10 +13,17 @@ from random import choice, shuffle
|
|
|
13
13
|
from threading import Event
|
|
14
14
|
from time import sleep, time
|
|
15
15
|
from types import TracebackType
|
|
16
|
-
from typing import Any,
|
|
17
|
-
import
|
|
18
|
-
|
|
16
|
+
from typing import Any, Literal
|
|
17
|
+
from urllib.parse import quote
|
|
18
|
+
|
|
19
19
|
from webscout.litagent import LitAgent
|
|
20
|
+
|
|
21
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
22
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
23
|
+
try:
|
|
24
|
+
import trio # noqa: F401
|
|
25
|
+
except ImportError:
|
|
26
|
+
pass # trio is optional, ignore if not available
|
|
20
27
|
import curl_cffi.requests # type: ignore
|
|
21
28
|
|
|
22
29
|
try:
|
|
@@ -28,7 +35,7 @@ try:
|
|
|
28
35
|
except ImportError:
|
|
29
36
|
LXML_AVAILABLE = False
|
|
30
37
|
|
|
31
|
-
from .exceptions import
|
|
38
|
+
from .exceptions import RatelimitE, TimeoutE, WebscoutE
|
|
32
39
|
from .utils import (
|
|
33
40
|
_calculate_distance,
|
|
34
41
|
_expand_proxy_tb_alias,
|
|
@@ -1173,4 +1180,4 @@ class WEBS:
|
|
|
1173
1180
|
"visibility_m": hour.get("visibility"),
|
|
1174
1181
|
})
|
|
1175
1182
|
|
|
1176
|
-
return formatted_data
|
|
1183
|
+
return formatted_data
|
|
@@ -3,14 +3,19 @@ from __future__ import annotations
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import os
|
|
5
5
|
import warnings
|
|
6
|
-
from datetime import datetime, timezone
|
|
7
6
|
from functools import cached_property
|
|
8
7
|
from itertools import cycle
|
|
9
8
|
from random import choice, shuffle
|
|
10
9
|
from time import time
|
|
11
10
|
from types import TracebackType
|
|
12
|
-
from typing import Any, Dict, List, Optional, Type, Union
|
|
13
|
-
|
|
11
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
12
|
+
|
|
13
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
14
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
15
|
+
try:
|
|
16
|
+
import trio # noqa: F401
|
|
17
|
+
except ImportError:
|
|
18
|
+
pass # trio is optional, ignore if not available
|
|
14
19
|
import curl_cffi.requests
|
|
15
20
|
from lxml.etree import _Element
|
|
16
21
|
from lxml.html import HTMLParser as LHTMLParser
|
|
@@ -18,18 +23,15 @@ from lxml.html import document_fromstring
|
|
|
18
23
|
|
|
19
24
|
from webscout.litagent.agent import LitAgent
|
|
20
25
|
|
|
21
|
-
from .exceptions import
|
|
26
|
+
from .exceptions import RatelimitE, TimeoutE, WebscoutE
|
|
22
27
|
from .utils import (
|
|
23
28
|
_expand_proxy_tb_alias,
|
|
24
29
|
_extract_vqd,
|
|
25
30
|
_normalize,
|
|
26
31
|
_normalize_url,
|
|
27
|
-
json_loads,
|
|
28
32
|
)
|
|
29
33
|
|
|
30
34
|
|
|
31
|
-
|
|
32
|
-
|
|
33
35
|
class AsyncWEBS:
|
|
34
36
|
"""Asynchronous webscout class to get search results."""
|
|
35
37
|
|
|
@@ -644,4 +646,4 @@ class AsyncWEBS:
|
|
|
644
646
|
TimeoutE: Inherits from WebscoutE, raised for API request timeouts.
|
|
645
647
|
"""
|
|
646
648
|
# These methods are not implemented in the async version yet
|
|
647
|
-
raise NotImplementedError("aweather method is not implemented yet")
|
|
649
|
+
raise NotImplementedError("aweather method is not implemented yet")
|
webscout/yep_search.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
# Import trio before curl_cffi to prevent eventlet socket monkey-patching conflicts
|
|
2
|
+
# See: https://github.com/python-trio/trio/issues/3015
|
|
3
|
+
try:
|
|
4
|
+
import trio # noqa: F401
|
|
5
|
+
except ImportError:
|
|
6
|
+
pass # trio is optional, ignore if not available
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
8
|
+
from typing import Dict, List, Optional
|
|
2
9
|
from urllib.parse import urlencode
|
|
10
|
+
|
|
11
|
+
from curl_cffi.requests import Session
|
|
12
|
+
|
|
3
13
|
from webscout.litagent import LitAgent
|
|
4
|
-
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
-
import json
|
|
14
|
+
|
|
7
15
|
|
|
8
16
|
class YepSearch:
|
|
9
17
|
"""Yep.com search class to get search results."""
|
|
@@ -335,4 +343,4 @@ if __name__ == "__main__":
|
|
|
335
343
|
print("---" * 30)
|
|
336
344
|
print(image_results)
|
|
337
345
|
print("---" * 30)
|
|
338
|
-
print(suggestions)
|
|
346
|
+
print(suggestions)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 8.3.
|
|
3
|
+
Version: 8.3.6
|
|
4
4
|
Summary: Search for anything using Google, DuckDuckGo, phind.com, Contains AI models, can transcribe yt videos, temporary email and phone number generation, has TTS support, webai (terminal gpt and open interpreter) and offline LLMs and more
|
|
5
5
|
Author-email: OEvortex <helpingai5@gmail.com>
|
|
6
6
|
License: HelpingAI
|
|
@@ -73,6 +73,7 @@ Requires-Dist: tiktoken; extra == "api"
|
|
|
73
73
|
Requires-Dist: motor; extra == "api"
|
|
74
74
|
Requires-Dist: jinja2; extra == "api"
|
|
75
75
|
Requires-Dist: supabase; extra == "api"
|
|
76
|
+
Requires-Dist: websockets>=11.0; extra == "api"
|
|
76
77
|
Dynamic: license-file
|
|
77
78
|
|
|
78
79
|
<div align="center">
|