webscout 5.1__py3-none-any.whl → 5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +83 -277
- webscout/AIbase.py +106 -4
- webscout/AIutel.py +41 -10
- webscout/Agents/Onlinesearcher.py +91 -104
- webscout/Agents/__init__.py +2 -1
- webscout/Agents/ai.py +186 -0
- webscout/Agents/functioncall.py +57 -27
- webscout/Bing_search.py +73 -43
- webscout/DWEBS.py +99 -77
- webscout/Local/_version.py +1 -1
- webscout/Provider/AI21.py +177 -0
- webscout/Provider/Chatify.py +174 -0
- webscout/Provider/Cloudflare.py +0 -4
- webscout/Provider/EDITEE.py +215 -0
- webscout/Provider/{Berlin4h.py → NetFly.py} +81 -82
- webscout/Provider/RUBIKSAI.py +11 -5
- webscout/Provider/TTI/PollinationsAI.py +138 -0
- webscout/Provider/TTI/__init__.py +2 -0
- webscout/Provider/TTI/deepinfra.py +148 -0
- webscout/Provider/TTS/__init__.py +2 -0
- webscout/Provider/TTS/streamElements.py +292 -0
- webscout/Provider/TTS/voicepod.py +118 -0
- webscout/Provider/{liaobots.py → TeachAnything.py} +31 -122
- webscout/Provider/__init__.py +14 -4
- webscout/Provider/ai4chat.py +14 -8
- webscout/Provider/cerebras.py +199 -0
- webscout/Provider/felo_search.py +28 -68
- webscout/Provider/x0gpt.py +181 -0
- webscout/__init__.py +4 -2
- webscout/exceptions.py +2 -1
- webscout/transcriber.py +195 -140
- webscout/version.py +1 -1
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/METADATA +41 -82
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/RECORD +38 -28
- webscout/async_providers.py +0 -21
- webscout/voice.py +0 -34
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/LICENSE.md +0 -0
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/WHEEL +0 -0
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/entry_points.txt +0 -0
- {webscout-5.1.dist-info → webscout-5.3.dist-info}/top_level.txt +0 -0
webscout/Bing_search.py
CHANGED
|
@@ -2,10 +2,12 @@ from bs4 import BeautifulSoup
|
|
|
2
2
|
import requests
|
|
3
3
|
from typing import Dict, List, Optional, Union
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from urllib.parse import urlparse
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
|
|
5
|
+
from urllib.parse import quote, urlparse, parse_qs
|
|
6
|
+
import base64
|
|
7
|
+
import urllib3
|
|
8
|
+
|
|
9
|
+
# Disable SSL warnings
|
|
10
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
9
11
|
|
|
10
12
|
class BingS:
|
|
11
13
|
"""Bing search class to get search results from bing.com."""
|
|
@@ -21,7 +23,7 @@ class BingS:
|
|
|
21
23
|
"""Initialize the BingS object."""
|
|
22
24
|
self.proxy: Optional[str] = proxy
|
|
23
25
|
self.headers = headers if headers else {
|
|
24
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
26
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
25
27
|
}
|
|
26
28
|
self.headers["Referer"] = "https://www.bing.com/"
|
|
27
29
|
self.client = requests.Session()
|
|
@@ -43,82 +45,110 @@ class BingS:
|
|
|
43
45
|
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
44
46
|
) -> bytes:
|
|
45
47
|
try:
|
|
46
|
-
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
|
|
48
|
+
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout, verify=False)
|
|
47
49
|
except Exception as ex:
|
|
48
50
|
raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
49
51
|
if resp.status_code == 200:
|
|
50
52
|
return resp.content
|
|
51
53
|
raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
|
|
52
54
|
|
|
55
|
+
def extract_text_from_webpage(self, html_content, max_characters=None):
|
|
56
|
+
"""Extracts visible text from HTML content using BeautifulSoup."""
|
|
57
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
58
|
+
# Remove unwanted tags
|
|
59
|
+
for tag in soup(["script", "style", "header", "footer", "nav"]):
|
|
60
|
+
tag.extract()
|
|
61
|
+
# Get the remaining visible text
|
|
62
|
+
visible_text = soup.get_text(separator=' ', strip=True)
|
|
63
|
+
if max_characters:
|
|
64
|
+
visible_text = visible_text[:max_characters]
|
|
65
|
+
return visible_text
|
|
66
|
+
|
|
53
67
|
def search(
|
|
54
68
|
self,
|
|
55
69
|
keywords: str,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
timelimit: Optional[str] = None, # Not directly supported by Bing
|
|
60
|
-
max_results: Optional[int] = None,
|
|
70
|
+
max_results: Optional[int] = 10,
|
|
71
|
+
extract_webpage_text: bool = False,
|
|
72
|
+
max_extract_characters: Optional[int] = 100,
|
|
61
73
|
) -> List[Dict[str, str]]:
|
|
62
74
|
"""Bing text search."""
|
|
63
75
|
assert keywords, "keywords is mandatory"
|
|
64
76
|
|
|
65
77
|
results = []
|
|
66
|
-
|
|
67
|
-
|
|
78
|
+
futures = []
|
|
79
|
+
start = 1
|
|
80
|
+
while len(results) < max_results:
|
|
68
81
|
params = {
|
|
69
82
|
"q": keywords,
|
|
70
|
-
"
|
|
71
|
-
"mkt": region,
|
|
72
|
-
"setlang": lang,
|
|
73
|
-
"safeSearch": safe,
|
|
74
|
-
"first": start, # Bing uses 'first' for pagination
|
|
83
|
+
"first": start
|
|
75
84
|
}
|
|
85
|
+
futures.append(self._executor.submit(self._get_url, "GET", "https://www.bing.com/search", params=params))
|
|
86
|
+
start += 10
|
|
76
87
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if not result_block:
|
|
83
|
-
break
|
|
88
|
+
for future in as_completed(futures):
|
|
89
|
+
try:
|
|
90
|
+
resp_content = future.result()
|
|
91
|
+
soup = BeautifulSoup(resp_content, "html.parser")
|
|
92
|
+
result_block = soup.select('li.b_algo')
|
|
84
93
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
link = result.find("a", href=True)
|
|
88
|
-
if link:
|
|
89
|
-
initial_url = link["href"]
|
|
94
|
+
if not result_block:
|
|
95
|
+
break
|
|
90
96
|
|
|
91
|
-
|
|
92
|
-
|
|
97
|
+
for result in result_block:
|
|
98
|
+
try:
|
|
99
|
+
link = result.select_one('h2 a')
|
|
100
|
+
title = link.text if link else ""
|
|
101
|
+
url = link['href'] if link else ""
|
|
102
|
+
abstract = result.select_one('.b_caption p')
|
|
103
|
+
description = abstract.text if abstract else ""
|
|
93
104
|
|
|
94
|
-
# Remove
|
|
105
|
+
# Remove "WEB" from the beginning of the description if it exists
|
|
95
106
|
if description.startswith("WEB"):
|
|
96
|
-
description = description[
|
|
107
|
+
description = description[3:].strip()
|
|
108
|
+
|
|
109
|
+
visible_text = ""
|
|
110
|
+
if extract_webpage_text:
|
|
111
|
+
try:
|
|
112
|
+
actual_url = self._decode_bing_url(url)
|
|
113
|
+
page_content = self._get_url("GET", actual_url)
|
|
114
|
+
visible_text = self.extract_text_from_webpage(
|
|
115
|
+
page_content, max_characters=max_extract_characters
|
|
116
|
+
)
|
|
117
|
+
except Exception as e:
|
|
118
|
+
print(f"Error extracting text from {url}: {e}")
|
|
97
119
|
|
|
98
120
|
results.append({
|
|
99
121
|
"title": title,
|
|
100
|
-
"href":
|
|
122
|
+
"href": url,
|
|
101
123
|
"abstract": description,
|
|
102
124
|
"index": len(results),
|
|
103
125
|
"type": "web",
|
|
126
|
+
"visible_text": visible_text,
|
|
104
127
|
})
|
|
105
128
|
|
|
106
129
|
if len(results) >= max_results:
|
|
107
130
|
return results
|
|
108
131
|
|
|
109
|
-
|
|
110
|
-
|
|
132
|
+
except Exception as e:
|
|
133
|
+
print(f"Error extracting result: {e}")
|
|
111
134
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
start += 10
|
|
135
|
+
except Exception as e:
|
|
136
|
+
print(f"Error fetching URL: {e}")
|
|
116
137
|
|
|
117
138
|
return results
|
|
118
139
|
|
|
140
|
+
def _decode_bing_url(self, url):
|
|
141
|
+
if 'bing.com/ck/a' in url:
|
|
142
|
+
parsed_url = urlparse(url)
|
|
143
|
+
query_params = parse_qs(parsed_url.query)
|
|
144
|
+
if 'u' in query_params:
|
|
145
|
+
encoded_url = query_params['u'][0]
|
|
146
|
+
return base64.b64decode(encoded_url).decode('utf-8')
|
|
147
|
+
return url
|
|
148
|
+
|
|
119
149
|
if __name__ == "__main__":
|
|
120
150
|
from rich import print
|
|
121
151
|
searcher = BingS()
|
|
122
|
-
results = searcher.search("Python development tools", max_results=
|
|
152
|
+
results = searcher.search("Python development tools", max_results=5, extract_webpage_text=True, max_extract_characters=2000)
|
|
123
153
|
for result in results:
|
|
124
|
-
print(result)
|
|
154
|
+
print(result)
|
webscout/DWEBS.py
CHANGED
|
@@ -8,18 +8,19 @@ import time
|
|
|
8
8
|
import random
|
|
9
9
|
|
|
10
10
|
class GoogleS:
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
"""
|
|
12
|
+
Class to perform Google searches and retrieve results.
|
|
13
|
+
"""
|
|
14
14
|
|
|
15
15
|
def __init__(
|
|
16
16
|
self,
|
|
17
17
|
headers: Optional[Dict[str, str]] = None,
|
|
18
18
|
proxy: Optional[str] = None,
|
|
19
19
|
timeout: Optional[int] = 10,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
max_workers: int = 20 # Increased max workers for thread pool
|
|
21
|
+
):
|
|
22
|
+
"""Initializes the GoogleS object."""
|
|
23
|
+
self.proxy = proxy
|
|
23
24
|
self.headers = headers if headers else {
|
|
24
25
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62"
|
|
25
26
|
}
|
|
@@ -28,20 +29,19 @@ class GoogleS:
|
|
|
28
29
|
self.client.headers.update(self.headers)
|
|
29
30
|
self.client.proxies.update({"http": self.proxy, "https": self.proxy})
|
|
30
31
|
self.timeout = timeout
|
|
32
|
+
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
31
33
|
|
|
32
|
-
def __enter__(self)
|
|
34
|
+
def __enter__(self):
|
|
33
35
|
return self
|
|
34
36
|
|
|
35
37
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
36
38
|
self.client.close()
|
|
37
39
|
|
|
38
|
-
def _get_url(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
44
|
-
) -> bytes:
|
|
40
|
+
def _get_url(self, method: str, url: str, params: Optional[Dict[str, str]] = None,
|
|
41
|
+
data: Optional[Union[Dict[str, str], bytes]] = None) -> bytes:
|
|
42
|
+
"""
|
|
43
|
+
Makes an HTTP request and returns the response content.
|
|
44
|
+
"""
|
|
45
45
|
try:
|
|
46
46
|
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
|
|
47
47
|
except Exception as ex:
|
|
@@ -50,13 +50,13 @@ class GoogleS:
|
|
|
50
50
|
return resp.content
|
|
51
51
|
raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
|
|
52
52
|
|
|
53
|
-
def
|
|
54
|
-
"""
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
def _extract_text_from_webpage(self, html_content: bytes, max_characters: Optional[int] = None) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Extracts visible text from HTML content using lxml parser.
|
|
56
|
+
"""
|
|
57
|
+
soup = BeautifulSoup(html_content, 'lxml') # Use lxml parser
|
|
57
58
|
for tag in soup(["script", "style", "header", "footer", "nav"]):
|
|
58
59
|
tag.extract()
|
|
59
|
-
# Get the remaining visible text
|
|
60
60
|
visible_text = soup.get_text(strip=True)
|
|
61
61
|
if max_characters:
|
|
62
62
|
visible_text = visible_text[:max_characters]
|
|
@@ -64,32 +64,56 @@ class GoogleS:
|
|
|
64
64
|
|
|
65
65
|
def search(
|
|
66
66
|
self,
|
|
67
|
-
|
|
67
|
+
query: str,
|
|
68
68
|
region: str = "us-en",
|
|
69
|
-
|
|
69
|
+
language: str = "en",
|
|
70
70
|
safe: str = "off",
|
|
71
|
-
|
|
72
|
-
max_results:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
) -> List[Dict[str, str]]:
|
|
76
|
-
"""
|
|
77
|
-
|
|
71
|
+
time_period: Optional[str] = None,
|
|
72
|
+
max_results: int = 10,
|
|
73
|
+
extract_text: bool = False,
|
|
74
|
+
max_text_length: Optional[int] = 100,
|
|
75
|
+
) -> List[Dict[str, Union[str, int]]]:
|
|
76
|
+
"""
|
|
77
|
+
Performs a Google search and returns the results.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
query (str): The search query.
|
|
81
|
+
region (str, optional): The region to search in (e.g., "us-en"). Defaults to "us-en".
|
|
82
|
+
language (str, optional): The language of the search results (e.g., "en"). Defaults to "en".
|
|
83
|
+
safe (str, optional): Safe search setting ("off", "active"). Defaults to "off".
|
|
84
|
+
time_period (Optional[str], optional): Time period filter (e.g., "h" for past hour, "d" for past day).
|
|
85
|
+
Defaults to None.
|
|
86
|
+
max_results (int, optional): The maximum number of results to retrieve. Defaults to 10.
|
|
87
|
+
extract_text (bool, optional): Whether to extract text from the linked web pages. Defaults to False.
|
|
88
|
+
max_text_length (Optional[int], optional): The maximum length of the extracted text (in characters).
|
|
89
|
+
Defaults to 100.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
List[Dict[str, Union[str, int]]]: A list of dictionaries, each representing a search result, containing:
|
|
93
|
+
- 'title': The title of the result.
|
|
94
|
+
- 'href': The URL of the result.
|
|
95
|
+
- 'abstract': The description snippet of the result.
|
|
96
|
+
- 'index': The index of the result in the list.
|
|
97
|
+
- 'type': The type of result (currently always "web").
|
|
98
|
+
- 'visible_text': The extracted text from the web page (if `extract_text` is True).
|
|
99
|
+
"""
|
|
100
|
+
assert query, "Query cannot be empty."
|
|
78
101
|
|
|
79
102
|
results = []
|
|
80
103
|
futures = []
|
|
81
104
|
start = 0
|
|
105
|
+
|
|
82
106
|
while len(results) < max_results:
|
|
83
107
|
params = {
|
|
84
|
-
"q":
|
|
85
|
-
"num": 10,
|
|
86
|
-
"hl":
|
|
108
|
+
"q": query,
|
|
109
|
+
"num": 10,
|
|
110
|
+
"hl": language,
|
|
87
111
|
"start": start,
|
|
88
112
|
"safe": safe,
|
|
89
113
|
"gl": region,
|
|
90
114
|
}
|
|
91
|
-
if
|
|
92
|
-
params["tbs"] = f"qdr:{
|
|
115
|
+
if time_period:
|
|
116
|
+
params["tbs"] = f"qdr:{time_period}"
|
|
93
117
|
|
|
94
118
|
futures.append(self._executor.submit(self._get_url, "GET", "https://www.google.com/search", params=params))
|
|
95
119
|
start += 10
|
|
@@ -97,54 +121,52 @@ class GoogleS:
|
|
|
97
121
|
for future in as_completed(futures):
|
|
98
122
|
try:
|
|
99
123
|
resp_content = future.result()
|
|
100
|
-
soup = BeautifulSoup(resp_content,
|
|
101
|
-
|
|
124
|
+
soup = BeautifulSoup(resp_content, 'lxml') # Use lxml parser
|
|
125
|
+
result_blocks = soup.find_all("div", class_="g")
|
|
102
126
|
|
|
103
|
-
if not
|
|
127
|
+
if not result_blocks:
|
|
104
128
|
break
|
|
105
129
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
except Exception as e:
|
|
144
|
-
print(f"Error extracting result: {e}")
|
|
130
|
+
# Extract links and titles first
|
|
131
|
+
for result_block in result_blocks:
|
|
132
|
+
link = result_block.find("a", href=True)
|
|
133
|
+
title = result_block.find("h3")
|
|
134
|
+
description_box = result_block.find(
|
|
135
|
+
"div", {"style": "-webkit-line-clamp:2"}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if link and title and description_box:
|
|
139
|
+
url = link["href"]
|
|
140
|
+
results.append({
|
|
141
|
+
"title": title.text,
|
|
142
|
+
"href": url,
|
|
143
|
+
"abstract": description_box.text,
|
|
144
|
+
"index": len(results),
|
|
145
|
+
"type": "web",
|
|
146
|
+
"visible_text": "" # Initialize visible_text as empty string
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
if len(results) >= max_results:
|
|
150
|
+
break # Stop if we have enough results
|
|
151
|
+
|
|
152
|
+
# Parallelize text extraction if needed
|
|
153
|
+
if extract_text:
|
|
154
|
+
with ThreadPoolExecutor(max_workers=self._executor._max_workers) as text_extractor:
|
|
155
|
+
extraction_futures = [
|
|
156
|
+
text_extractor.submit(self._extract_text_from_webpage,
|
|
157
|
+
self._get_url("GET", result['href']),
|
|
158
|
+
max_characters=max_text_length)
|
|
159
|
+
for result in results
|
|
160
|
+
if 'href' in result
|
|
161
|
+
]
|
|
162
|
+
for i, future in enumerate(as_completed(extraction_futures)):
|
|
163
|
+
try:
|
|
164
|
+
results[i]['visible_text'] = future.result()
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f"Error extracting text: {e}")
|
|
145
167
|
|
|
146
168
|
except Exception as e:
|
|
147
|
-
print(f"Error
|
|
169
|
+
print(f"Error: {e}")
|
|
148
170
|
|
|
149
171
|
return results
|
|
150
172
|
|
|
@@ -152,6 +174,6 @@ class GoogleS:
|
|
|
152
174
|
if __name__ == "__main__":
|
|
153
175
|
from rich import print
|
|
154
176
|
searcher = GoogleS()
|
|
155
|
-
results = searcher.search("HelpingAI-9B", max_results=20,
|
|
177
|
+
results = searcher.search("HelpingAI-9B", max_results=20, extract_text=False, max_text_length=200)
|
|
156
178
|
for result in results:
|
|
157
179
|
print(result)
|
webscout/Local/_version.py
CHANGED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import json
|
|
3
|
+
from typing import Dict, Any
|
|
4
|
+
|
|
5
|
+
from webscout.AIutel import Optimizers
|
|
6
|
+
from webscout.AIutel import Conversation
|
|
7
|
+
from webscout.AIutel import AwesomePrompts
|
|
8
|
+
from webscout.AIbase import Provider
|
|
9
|
+
from webscout import exceptions
|
|
10
|
+
|
|
11
|
+
class AI21(Provider):
|
|
12
|
+
"""
|
|
13
|
+
A class to interact with the AI21 Studio API.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
api_key: str,
|
|
19
|
+
model: str = "jamba-1.5-large",
|
|
20
|
+
max_tokens: int = 1024,
|
|
21
|
+
temperature: float = 0.4,
|
|
22
|
+
top_p: float = 1,
|
|
23
|
+
is_conversation: bool = True,
|
|
24
|
+
timeout: int = 30,
|
|
25
|
+
intro: str = None,
|
|
26
|
+
filepath: str = None,
|
|
27
|
+
update_file: bool = True,
|
|
28
|
+
proxies: dict = {},
|
|
29
|
+
history_offset: int = 10250,
|
|
30
|
+
act: str = None,
|
|
31
|
+
system_prompt: str = "You are a helpful and informative AI assistant."
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Initializes the AI21 Studio API with given parameters.
|
|
35
|
+
"""
|
|
36
|
+
self.api_key = api_key
|
|
37
|
+
self.api_endpoint = "https://api.ai21.com/studio/v1/chat/completions"
|
|
38
|
+
self.model = model
|
|
39
|
+
self.max_tokens = max_tokens
|
|
40
|
+
self.temperature = temperature
|
|
41
|
+
self.top_p = top_p
|
|
42
|
+
self.system_prompt = system_prompt
|
|
43
|
+
self.session = requests.Session()
|
|
44
|
+
self.is_conversation = is_conversation
|
|
45
|
+
self.max_tokens_to_sample = max_tokens
|
|
46
|
+
self.timeout = timeout
|
|
47
|
+
self.last_response = {}
|
|
48
|
+
self.headers = {
|
|
49
|
+
'Accept': 'application/json, text/plain, */*',
|
|
50
|
+
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
|
51
|
+
'Accept-Language': 'en-US,en;q=0.9,en-IN;q=0.8',
|
|
52
|
+
'Authorization': f"Bearer {self.api_key}",
|
|
53
|
+
'Content-Type': 'application/json',
|
|
54
|
+
'DNT': '1',
|
|
55
|
+
'Origin': 'https://studio.ai21.com',
|
|
56
|
+
'Referer': 'https://studio.ai21.com/',
|
|
57
|
+
'Sec-CH-UA': '"Chromium";v="128", "Not;A=Brand";v="24", "Microsoft Edge";v="128"',
|
|
58
|
+
'Sec-CH-UA-Mobile': '?0',
|
|
59
|
+
'Sec-CH-UA-Platform': '"Windows"',
|
|
60
|
+
'Sec-Fetch-Dest': 'empty',
|
|
61
|
+
'Sec-Fetch-Mode': 'cors',
|
|
62
|
+
'Sec-Fetch-Site': 'same-site',
|
|
63
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
self.__available_optimizers = (
|
|
67
|
+
method
|
|
68
|
+
for method in dir(Optimizers)
|
|
69
|
+
if callable(getattr(Optimizers, method)) and not method.startswith("__")
|
|
70
|
+
)
|
|
71
|
+
self.session.headers.update(self.headers)
|
|
72
|
+
Conversation.intro = (
|
|
73
|
+
AwesomePrompts().get_act(
|
|
74
|
+
act, raise_not_found=True, default=None, case_insensitive=True
|
|
75
|
+
)
|
|
76
|
+
if act
|
|
77
|
+
else intro or Conversation.intro
|
|
78
|
+
)
|
|
79
|
+
self.conversation = Conversation(
|
|
80
|
+
is_conversation, self.max_tokens_to_sample, filepath, update_file
|
|
81
|
+
)
|
|
82
|
+
self.conversation.history_offset = history_offset
|
|
83
|
+
self.session.proxies = proxies
|
|
84
|
+
|
|
85
|
+
def ask(
|
|
86
|
+
self,
|
|
87
|
+
prompt: str,
|
|
88
|
+
stream: bool = False,
|
|
89
|
+
raw: bool = False,
|
|
90
|
+
optimizer: str = None,
|
|
91
|
+
conversationally: bool = False,
|
|
92
|
+
) -> Dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Sends a prompt to the AI21 Studio API and returns the response.
|
|
95
|
+
"""
|
|
96
|
+
conversation_prompt = self.conversation.gen_complete_prompt(prompt)
|
|
97
|
+
if optimizer:
|
|
98
|
+
if optimizer in self.__available_optimizers:
|
|
99
|
+
conversation_prompt = getattr(Optimizers, optimizer)(
|
|
100
|
+
conversation_prompt if conversationally else prompt
|
|
101
|
+
)
|
|
102
|
+
else:
|
|
103
|
+
raise Exception(
|
|
104
|
+
f"Optimizer is not one of {self.__available_optimizers}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
payload = {
|
|
108
|
+
"messages": [
|
|
109
|
+
{"role": "system", "content": self.system_prompt},
|
|
110
|
+
{"role": "user", "content": conversation_prompt}
|
|
111
|
+
],
|
|
112
|
+
"n": 1,
|
|
113
|
+
"max_tokens": self.max_tokens,
|
|
114
|
+
"model": self.model,
|
|
115
|
+
"stop": [],
|
|
116
|
+
"temperature": self.temperature,
|
|
117
|
+
"top_p": self.top_p,
|
|
118
|
+
"documents": [],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
response = self.session.post(self.api_endpoint, headers=self.headers, json=payload, timeout=self.timeout)
|
|
122
|
+
|
|
123
|
+
if not response.ok:
|
|
124
|
+
raise exceptions.FailedToGenerateResponseError(
|
|
125
|
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
resp = response.json()
|
|
129
|
+
self.last_response.update(resp)
|
|
130
|
+
self.conversation.update_chat_history(
|
|
131
|
+
prompt, self.get_message(self.last_response)
|
|
132
|
+
)
|
|
133
|
+
return self.last_response
|
|
134
|
+
|
|
135
|
+
def chat(
|
|
136
|
+
self,
|
|
137
|
+
prompt: str,
|
|
138
|
+
stream: bool = False,
|
|
139
|
+
optimizer: str = None,
|
|
140
|
+
conversationally: bool = False,
|
|
141
|
+
) -> str:
|
|
142
|
+
"""
|
|
143
|
+
Generates a response from the AI21 API.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def for_stream():
|
|
147
|
+
for response in self.ask(
|
|
148
|
+
prompt, True, optimizer=optimizer, conversationally=conversationally
|
|
149
|
+
):
|
|
150
|
+
yield self.get_message(response)
|
|
151
|
+
|
|
152
|
+
def for_non_stream():
|
|
153
|
+
return self.get_message(
|
|
154
|
+
self.ask(
|
|
155
|
+
prompt,
|
|
156
|
+
False,
|
|
157
|
+
optimizer=optimizer,
|
|
158
|
+
conversationally=conversationally,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
return for_stream() if stream else for_non_stream()
|
|
163
|
+
|
|
164
|
+
def get_message(self, response: dict) -> str:
|
|
165
|
+
"""
|
|
166
|
+
Extracts the message from the API response.
|
|
167
|
+
"""
|
|
168
|
+
assert isinstance(response, dict), "Response should be of dict data-type only"
|
|
169
|
+
return response['choices'][0]['message']['content']
|
|
170
|
+
|
|
171
|
+
# Example usage
|
|
172
|
+
if __name__ == "__main__":
|
|
173
|
+
from rich import print
|
|
174
|
+
ai = AI21(api_key="api_key")
|
|
175
|
+
response = ai.chat(input(">>> "))
|
|
176
|
+
for line in response:
|
|
177
|
+
print(line, end="", flush=True)
|