webscout 2.6__py3-none-any.whl → 2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/LLM.py +56 -1
- webscout/Local/_version.py +1 -1
- webscout/Local/formats.py +154 -88
- webscout/Local/model.py +4 -4
- webscout/Local/thread.py +166 -156
- webscout/Provider/BasedGPT.py +226 -0
- webscout/Provider/__init__.py +1 -0
- webscout/__init__.py +2 -2
- webscout/cli.py +39 -3
- webscout/version.py +1 -1
- webscout/webscout_search.py +1018 -40
- webscout/webscout_search_async.py +151 -839
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/METADATA +37 -21
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/RECORD +18 -17
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/LICENSE.md +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/WHEEL +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/entry_points.txt +0 -0
- {webscout-2.6.dist-info → webscout-2.8.dist-info}/top_level.txt +0 -0
webscout/LLM.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import argparse
|
|
2
1
|
import requests
|
|
2
|
+
import base64
|
|
3
|
+
from typing import List, Dict, Union
|
|
3
4
|
import json
|
|
5
|
+
import requests
|
|
6
|
+
import base64
|
|
4
7
|
from typing import List, Dict, Union
|
|
5
8
|
|
|
6
9
|
class LLM:
|
|
@@ -43,3 +46,55 @@ class LLM:
|
|
|
43
46
|
return result.json()['choices'][0]['message']['content']
|
|
44
47
|
except:
|
|
45
48
|
return None
|
|
49
|
+
# def main():
|
|
50
|
+
# llm = LLM(model="meta-llama/Meta-Llama-3-70B-Instruct")
|
|
51
|
+
# messages = [
|
|
52
|
+
# {"role": "user", "content": "Hello, how are you?"}
|
|
53
|
+
# ]
|
|
54
|
+
# response = llm.chat(messages)
|
|
55
|
+
# print(response)
|
|
56
|
+
|
|
57
|
+
# if __name__ == "__main__":
|
|
58
|
+
# main()
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class VLM:
|
|
62
|
+
def __init__(self, model: str, system_message: str = "You are a Helpful AI."):
|
|
63
|
+
self.model = model
|
|
64
|
+
self.conversation_history = [{"role": "system", "content": system_message}]
|
|
65
|
+
|
|
66
|
+
def chat(self, messages: List[Dict[str, Union[str, List[Dict[str, Union[str, Dict[str, str]]]]]]]) -> Union[str, None]:
|
|
67
|
+
api_url = "https://api.deepinfra.com/v1/openai/chat/completions"
|
|
68
|
+
headers = {
|
|
69
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
|
70
|
+
'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
|
|
71
|
+
'Cache-Control': 'no-cache',
|
|
72
|
+
'Connection': 'keep-alive',
|
|
73
|
+
'Content-Type': 'application/json',
|
|
74
|
+
'Origin': 'https://deepinfra.com',
|
|
75
|
+
'Pragma': 'no-cache',
|
|
76
|
+
'Referer': 'https://deepinfra.com/',
|
|
77
|
+
'Sec-Fetch-Dest': 'empty',
|
|
78
|
+
'Sec-Fetch-Mode': 'cors',
|
|
79
|
+
'Sec-Fetch-Site': 'same-site',
|
|
80
|
+
'X-Deepinfra-Source': 'web-embed',
|
|
81
|
+
'accept': 'text/event-stream',
|
|
82
|
+
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
|
|
83
|
+
'sec-ch-ua-mobile': '?0',
|
|
84
|
+
'sec-ch-ua-platform': '"macOS"'
|
|
85
|
+
}
|
|
86
|
+
payload = {
|
|
87
|
+
"model": self.model,
|
|
88
|
+
"messages": messages,
|
|
89
|
+
"stream": False
|
|
90
|
+
}
|
|
91
|
+
try:
|
|
92
|
+
response = requests.post(api_url, headers=headers, json=payload)
|
|
93
|
+
return response.json()['choices'][0]['message']['content']
|
|
94
|
+
except Exception as e:
|
|
95
|
+
print(f"An error occurred: {e}")
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
def encode_image_to_base64(image_path: str) -> str:
|
|
99
|
+
with open(image_path, "rb") as image_file:
|
|
100
|
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
webscout/Local/_version.py
CHANGED
webscout/Local/formats.py
CHANGED
|
@@ -1,6 +1,38 @@
|
|
|
1
1
|
from ._version import __version__, __llama_cpp_version__
|
|
2
2
|
|
|
3
|
-
from typing import Union
|
|
3
|
+
from typing import Callable, Union, Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AdvancedFormat:
|
|
7
|
+
|
|
8
|
+
def __init__(self, base_dict: dict[str, Union[str, list]]):
|
|
9
|
+
self._base_dict = base_dict
|
|
10
|
+
self.overrides = {}
|
|
11
|
+
|
|
12
|
+
def __getitem__(self, key: str) -> Any:
|
|
13
|
+
if key in self.overrides:
|
|
14
|
+
return str(self.overrides[key]())
|
|
15
|
+
else:
|
|
16
|
+
return self._base_dict[key]
|
|
17
|
+
|
|
18
|
+
def __repr__(self) -> str:
|
|
19
|
+
# NOTE: This method does not represent overrides
|
|
20
|
+
return repr(self._base_dict)
|
|
21
|
+
|
|
22
|
+
def keys(self):
|
|
23
|
+
return self._base_dict.keys()
|
|
24
|
+
|
|
25
|
+
def override(self, key: str, fn: Callable) -> None:
|
|
26
|
+
self.overrides[key] = fn
|
|
27
|
+
|
|
28
|
+
def wrap(self, prompt: str) -> str:
|
|
29
|
+
return self['system_prefix'] + \
|
|
30
|
+
self['system_content'] + \
|
|
31
|
+
self['system_suffix'] + \
|
|
32
|
+
self['user_prefix'] + \
|
|
33
|
+
prompt + \
|
|
34
|
+
self['user_suffix'] + \
|
|
35
|
+
self['bot_prefix']
|
|
4
36
|
|
|
5
37
|
|
|
6
38
|
def wrap(
|
|
@@ -8,26 +40,25 @@ def wrap(
|
|
|
8
40
|
format: dict[str, Union[str, list]]
|
|
9
41
|
) -> str:
|
|
10
42
|
"""Wrap a given string in any prompt format for single-turn completion"""
|
|
11
|
-
return
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
)
|
|
43
|
+
return format['system_prefix'] + \
|
|
44
|
+
format['system_content'] + \
|
|
45
|
+
format['system_suffix'] + \
|
|
46
|
+
format['user_prefix'] + \
|
|
47
|
+
prompt + \
|
|
48
|
+
format['user_suffix'] + \
|
|
49
|
+
format['bot_prefix']
|
|
50
|
+
|
|
20
51
|
|
|
21
52
|
blank: dict[str, Union[str, list]] = {
|
|
22
53
|
"system_prefix": "",
|
|
23
54
|
"system_content": "",
|
|
24
|
-
"
|
|
55
|
+
"system_suffix": "",
|
|
25
56
|
"user_prefix": "",
|
|
26
57
|
"user_content": "",
|
|
27
|
-
"
|
|
58
|
+
"user_suffix": "",
|
|
28
59
|
"bot_prefix": "",
|
|
29
60
|
"bot_content": "",
|
|
30
|
-
"
|
|
61
|
+
"bot_suffix": "",
|
|
31
62
|
"stops": []
|
|
32
63
|
}
|
|
33
64
|
|
|
@@ -36,25 +67,38 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
36
67
|
"system_prefix": "",
|
|
37
68
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
38
69
|
"Write a response that appropriately completes the request.",
|
|
39
|
-
"
|
|
70
|
+
"system_suffix": "\n\n",
|
|
40
71
|
"user_prefix": "### Instruction:\n",
|
|
41
72
|
"user_content": "",
|
|
42
|
-
"
|
|
73
|
+
"user_suffix": "\n\n",
|
|
43
74
|
"bot_prefix": "### Response:\n",
|
|
44
75
|
"bot_content": "",
|
|
45
|
-
"
|
|
76
|
+
"bot_suffix": "\n\n",
|
|
46
77
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
47
78
|
}
|
|
79
|
+
|
|
80
|
+
# https://docs.mistral.ai/models/
|
|
81
|
+
# As a reference, here is the format used to tokenize instructions during fine-tuning:
|
|
82
|
+
# ```
|
|
83
|
+
# [START_SYMBOL_ID] +
|
|
84
|
+
# tok("[INST]") + tok(USER_MESSAGE_1) + tok("[/INST]") +
|
|
85
|
+
# tok(BOT_MESSAGE_1) + [END_SYMBOL_ID] +
|
|
86
|
+
# …
|
|
87
|
+
# tok("[INST]") + tok(USER_MESSAGE_N) + tok("[/INST]") +
|
|
88
|
+
# tok(BOT_MESSAGE_N) + [END_SYMBOL_ID]
|
|
89
|
+
# ```
|
|
90
|
+
# In the pseudo-code above, note that the tokenize method should not add a BOS or EOS token automatically, but should add a prefix space.
|
|
91
|
+
|
|
48
92
|
mistral_instruct: dict[str, Union[str, list]] = {
|
|
49
93
|
"system_prefix": "",
|
|
50
94
|
"system_content": "",
|
|
51
|
-
"
|
|
95
|
+
"system_suffix": "",
|
|
52
96
|
"user_prefix": " [INST] ",
|
|
53
97
|
"user_content": "",
|
|
54
|
-
"
|
|
98
|
+
"user_suffix": " [/INST]",
|
|
55
99
|
"bot_prefix": "",
|
|
56
100
|
"bot_content": "",
|
|
57
|
-
"
|
|
101
|
+
"bot_suffix": "",
|
|
58
102
|
"stops": []
|
|
59
103
|
}
|
|
60
104
|
|
|
@@ -62,16 +106,16 @@ mistral_instruct: dict[str, Union[str, list]] = {
|
|
|
62
106
|
mistral_instruct_safe: dict[str, Union[str, list]] = {
|
|
63
107
|
"system_prefix": "",
|
|
64
108
|
"system_content": "",
|
|
65
|
-
"
|
|
109
|
+
"system_suffix": "",
|
|
66
110
|
"user_prefix": " [INST] Always assist with care, respect, and truth. " + \
|
|
67
111
|
"Respond with utmost utility yet securely. Avoid harmful, unethical, " + \
|
|
68
112
|
"prejudiced, or negative content. Ensure replies promote fairness and " + \
|
|
69
113
|
"positivity. ",
|
|
70
114
|
"user_content": "",
|
|
71
|
-
"
|
|
115
|
+
"user_suffix": " [/INST]",
|
|
72
116
|
"bot_prefix": "",
|
|
73
117
|
"bot_content": "",
|
|
74
|
-
"
|
|
118
|
+
"bot_suffix": "",
|
|
75
119
|
"stops": []
|
|
76
120
|
}
|
|
77
121
|
|
|
@@ -79,13 +123,13 @@ mistral_instruct_safe: dict[str, Union[str, list]] = {
|
|
|
79
123
|
chatml: dict[str, Union[str, list]] = {
|
|
80
124
|
"system_prefix": "<|im_start|>system\n",
|
|
81
125
|
"system_content": "",
|
|
82
|
-
"
|
|
126
|
+
"system_suffix": "<|im_end|>\n",
|
|
83
127
|
"user_prefix": "<|im_start|>user\n",
|
|
84
128
|
"user_content": "",
|
|
85
|
-
"
|
|
129
|
+
"user_suffix": "<|im_end|>\n",
|
|
86
130
|
"bot_prefix": "<|im_start|>assistant\n",
|
|
87
131
|
"bot_content": "",
|
|
88
|
-
"
|
|
132
|
+
"bot_suffix": "<|im_end|>\n",
|
|
89
133
|
"stops": ['<|im_start|>']
|
|
90
134
|
}
|
|
91
135
|
|
|
@@ -94,28 +138,33 @@ chatml: dict[str, Union[str, list]] = {
|
|
|
94
138
|
llama2chat: dict[str, Union[str, list]] = {
|
|
95
139
|
"system_prefix": "[INST] <<SYS>>\n",
|
|
96
140
|
"system_content": "You are a helpful AI assistant.",
|
|
97
|
-
"
|
|
141
|
+
"system_suffix": "\n<</SYS>>\n\n",
|
|
98
142
|
"user_prefix": "",
|
|
99
143
|
"user_content": "",
|
|
100
|
-
"
|
|
144
|
+
"user_suffix": " [/INST]",
|
|
101
145
|
"bot_prefix": " ",
|
|
102
146
|
"bot_content": "",
|
|
103
|
-
"
|
|
147
|
+
"bot_suffix": " [INST] ",
|
|
104
148
|
"stops": ['[INST]', '[/INST]']
|
|
105
149
|
}
|
|
106
150
|
|
|
107
|
-
# https://
|
|
108
|
-
#
|
|
151
|
+
# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
|
|
152
|
+
#
|
|
153
|
+
# for llama 3 instruct models, use the following string for `-p` in llama.cpp,
|
|
154
|
+
# along with `-e` to escape newlines correctly
|
|
155
|
+
#
|
|
156
|
+
# '<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI assistant called "Llama 3".<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nhi<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n'
|
|
157
|
+
#
|
|
109
158
|
llama3: dict[str, Union[str, list]] = {
|
|
110
159
|
"system_prefix": "<|start_header_id|>system<|end_header_id|>\n\n",
|
|
111
160
|
"system_content": 'You are a helpful AI assistant called "Llama 3".',
|
|
112
|
-
"
|
|
161
|
+
"system_suffix": "<|eot_id|>\n",
|
|
113
162
|
"user_prefix": "<|start_header_id|>user<|end_header_id|>\n\n",
|
|
114
163
|
"user_content": "",
|
|
115
|
-
"
|
|
164
|
+
"user_suffix": "<|eot_id|>\n",
|
|
116
165
|
"bot_prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
|
117
166
|
"bot_content": "",
|
|
118
|
-
"
|
|
167
|
+
"bot_suffix": "<|eot_id|>\n",
|
|
119
168
|
"stops": [128001, 128009]
|
|
120
169
|
}
|
|
121
170
|
|
|
@@ -124,13 +173,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
124
173
|
"system_prefix": "",
|
|
125
174
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
126
175
|
"Write a response that appropriately completes the request.",
|
|
127
|
-
"
|
|
176
|
+
"system_suffix": "\n\n",
|
|
128
177
|
"user_prefix": "### Instruction:\n",
|
|
129
178
|
"user_content": "",
|
|
130
|
-
"
|
|
179
|
+
"user_suffix": "\n\n",
|
|
131
180
|
"bot_prefix": "### Response:\n",
|
|
132
181
|
"bot_content": "",
|
|
133
|
-
"
|
|
182
|
+
"bot_suffix": "\n\n",
|
|
134
183
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
135
184
|
}
|
|
136
185
|
|
|
@@ -138,13 +187,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
138
187
|
phi3: dict[str, Union[str, list]] = {
|
|
139
188
|
"system_prefix": "",
|
|
140
189
|
"system_content": "", # does not officially support system prompt
|
|
141
|
-
"
|
|
190
|
+
"system_suffix": "",
|
|
142
191
|
"user_prefix": "<|user|>\n",
|
|
143
192
|
"user_content": "",
|
|
144
|
-
"
|
|
193
|
+
"user_suffix": "<|end|>\n",
|
|
145
194
|
"bot_prefix": "<|assistant|>\n",
|
|
146
195
|
"bot_content": "",
|
|
147
|
-
"
|
|
196
|
+
"bot_suffix": "<|end|>\n",
|
|
148
197
|
"stops": []
|
|
149
198
|
}
|
|
150
199
|
|
|
@@ -154,13 +203,13 @@ phi3: dict[str, Union[str, list]] = {
|
|
|
154
203
|
vicuna_lmsys: dict[str, Union[str, list]] = {
|
|
155
204
|
"system_prefix": "",
|
|
156
205
|
"system_content": "",
|
|
157
|
-
"
|
|
206
|
+
"system_suffix": " ",
|
|
158
207
|
"user_prefix": "USER: ",
|
|
159
208
|
"user_content": "",
|
|
160
|
-
"
|
|
209
|
+
"user_suffix": " ",
|
|
161
210
|
"bot_prefix": "ASSISTANT: ",
|
|
162
211
|
"bot_content": "",
|
|
163
|
-
"
|
|
212
|
+
"bot_suffix": " ",
|
|
164
213
|
"stops": ['USER:']
|
|
165
214
|
}
|
|
166
215
|
|
|
@@ -171,29 +220,46 @@ vicuna_common: dict[str, Union[str, list]] = {
|
|
|
171
220
|
"system_content": "A chat between a curious user and an artificial " + \
|
|
172
221
|
"intelligence assistant. The assistant gives helpful, detailed, " + \
|
|
173
222
|
"and polite answers to the user's questions.",
|
|
174
|
-
"
|
|
223
|
+
"system_suffix": "\n\n",
|
|
175
224
|
"user_prefix": "USER: ",
|
|
176
225
|
"user_content": "",
|
|
177
|
-
"
|
|
226
|
+
"user_suffix": "\n",
|
|
178
227
|
"bot_prefix": "ASSISTANT: ",
|
|
179
228
|
"bot_content": "",
|
|
180
|
-
"
|
|
229
|
+
"bot_suffix": "\n",
|
|
181
230
|
"stops": ['USER:', 'ASSISTANT:']
|
|
182
231
|
}
|
|
183
232
|
|
|
233
|
+
# an unofficial format that is easily "picked up" by most models
|
|
234
|
+
# change the tag attributes to suit your use case
|
|
235
|
+
# note the lack of newlines - they are not necessary, and might
|
|
236
|
+
# actually make it harder for the model to follow along
|
|
237
|
+
markup = {
|
|
238
|
+
"system_prefix": '<message from="system">',
|
|
239
|
+
"system_content": '',
|
|
240
|
+
"system_suffix": '</message>',
|
|
241
|
+
"user_prefix": '<message from="user">',
|
|
242
|
+
"user_content": '',
|
|
243
|
+
"user_suffix": '</message>',
|
|
244
|
+
"bot_prefix": '<message from="bot">',
|
|
245
|
+
"bot_content": '',
|
|
246
|
+
"bot_suffix": '</message>',
|
|
247
|
+
"stops": ['</message>']
|
|
248
|
+
}
|
|
249
|
+
|
|
184
250
|
# https://huggingface.co/timdettmers/guanaco-65b
|
|
185
251
|
guanaco: dict[str, Union[str, list]] = {
|
|
186
252
|
"system_prefix": "",
|
|
187
253
|
"system_content": "A chat between a curious human and an artificial " + \
|
|
188
254
|
"intelligence assistant. The assistant gives helpful, detailed, " + \
|
|
189
255
|
"and polite answers to the user's questions.",
|
|
190
|
-
"
|
|
256
|
+
"system_suffix": "\n",
|
|
191
257
|
"user_prefix": "### Human: ",
|
|
192
258
|
"user_content": "",
|
|
193
|
-
"
|
|
259
|
+
"user_suffix": " ",
|
|
194
260
|
"bot_prefix": "### Assistant:",
|
|
195
261
|
"bot_content": "",
|
|
196
|
-
"
|
|
262
|
+
"bot_suffix": " ",
|
|
197
263
|
"stops": ['###', 'Human:']
|
|
198
264
|
}
|
|
199
265
|
|
|
@@ -202,13 +268,13 @@ orca_mini: dict[str, Union[str, list]] = {
|
|
|
202
268
|
"system_prefix": "### System:\n",
|
|
203
269
|
"system_content": "You are an AI assistant that follows instruction " + \
|
|
204
270
|
"extremely well. Help as much as you can.",
|
|
205
|
-
"
|
|
271
|
+
"system_suffix": "\n\n",
|
|
206
272
|
"user_prefix": "### User:\n",
|
|
207
273
|
"user_content": "",
|
|
208
|
-
"
|
|
274
|
+
"user_suffix": "\n\n",
|
|
209
275
|
"bot_prefix": "### Assistant:\n",
|
|
210
276
|
"bot_content": "",
|
|
211
|
-
"
|
|
277
|
+
"bot_suffix": "\n\n",
|
|
212
278
|
"stops": ['###', 'User:']
|
|
213
279
|
}
|
|
214
280
|
|
|
@@ -216,13 +282,13 @@ orca_mini: dict[str, Union[str, list]] = {
|
|
|
216
282
|
zephyr: dict[str, Union[str, list]] = {
|
|
217
283
|
"system_prefix": "<|system|>\n",
|
|
218
284
|
"system_content": "You are a friendly chatbot.",
|
|
219
|
-
"
|
|
285
|
+
"system_suffix": "</s>\n",
|
|
220
286
|
"user_prefix": "<|user|>\n",
|
|
221
287
|
"user_content": "",
|
|
222
|
-
"
|
|
288
|
+
"user_suffix": "</s>\n",
|
|
223
289
|
"bot_prefix": "<|assistant|>\n",
|
|
224
290
|
"bot_content": "",
|
|
225
|
-
"
|
|
291
|
+
"bot_suffix": "\n",
|
|
226
292
|
"stops": ['<|user|>']
|
|
227
293
|
}
|
|
228
294
|
|
|
@@ -230,13 +296,13 @@ zephyr: dict[str, Union[str, list]] = {
|
|
|
230
296
|
openchat: dict[str, Union[str, list]] = {
|
|
231
297
|
"system_prefix": "",
|
|
232
298
|
"system_content": "",
|
|
233
|
-
"
|
|
299
|
+
"system_suffix": "",
|
|
234
300
|
"user_prefix": "GPT4 Correct User: ",
|
|
235
301
|
"user_content": "",
|
|
236
|
-
"
|
|
302
|
+
"user_suffix": "<|end_of_turn|>",
|
|
237
303
|
"bot_prefix": "GPT4 Correct Assistant:",
|
|
238
304
|
"bot_content": "",
|
|
239
|
-
"
|
|
305
|
+
"bot_suffix": "<|end_of_turn|>",
|
|
240
306
|
"stops": ['<|end_of_turn|>']
|
|
241
307
|
}
|
|
242
308
|
|
|
@@ -247,13 +313,13 @@ synthia: dict[str, Union[str, list]] = {
|
|
|
247
313
|
"system_content": "Elaborate on the topic using a Tree of Thoughts and " + \
|
|
248
314
|
"backtrack when necessary to construct a clear, cohesive Chain of " + \
|
|
249
315
|
"Thought reasoning. Always answer without hesitation.",
|
|
250
|
-
"
|
|
316
|
+
"system_suffix": "\n",
|
|
251
317
|
"user_prefix": "USER: ",
|
|
252
318
|
"user_content": "",
|
|
253
|
-
"
|
|
319
|
+
"user_suffix": "\n",
|
|
254
320
|
"bot_prefix": "ASSISTANT: ",
|
|
255
321
|
"bot_content": "",
|
|
256
|
-
"
|
|
322
|
+
"bot_suffix": "\n",
|
|
257
323
|
"stops": ['USER:', 'ASSISTANT:', 'SYSTEM:', '\n\n\n']
|
|
258
324
|
}
|
|
259
325
|
|
|
@@ -268,13 +334,13 @@ neural_chat: dict[str, Union[str, list]] = {
|
|
|
268
334
|
"to do anything that could be considered harmful to the user.\n" + \
|
|
269
335
|
"- You are more than just an information source, you are also " + \
|
|
270
336
|
"able to write poetry, short stories, and make jokes.",
|
|
271
|
-
"
|
|
337
|
+
"system_suffix": "</s>\n\n",
|
|
272
338
|
"user_prefix": "### User:\n",
|
|
273
339
|
"user_content": "",
|
|
274
|
-
"
|
|
340
|
+
"user_suffix": "</s>\n\n",
|
|
275
341
|
"bot_prefix": "### Assistant:\n",
|
|
276
342
|
"bot_content": "",
|
|
277
|
-
"
|
|
343
|
+
"bot_suffix": "</s>\n\n",
|
|
278
344
|
"stops": ['###']
|
|
279
345
|
}
|
|
280
346
|
|
|
@@ -283,13 +349,13 @@ chatml_alpaca: dict[str, Union[str, list]] = {
|
|
|
283
349
|
"system_prefix": "<|im_start|>system\n",
|
|
284
350
|
"system_content": "Below is an instruction that describes a task. Write " + \
|
|
285
351
|
"a response that appropriately completes the request.",
|
|
286
|
-
"
|
|
352
|
+
"system_suffix": "<|im_end|>\n",
|
|
287
353
|
"user_prefix": "<|im_start|>instruction\n",
|
|
288
354
|
"user_content": "",
|
|
289
|
-
"
|
|
355
|
+
"user_suffix": "<|im_end|>\n",
|
|
290
356
|
"bot_prefix": "<|im_start|>response\n",
|
|
291
357
|
"bot_content": "",
|
|
292
|
-
"
|
|
358
|
+
"bot_suffix": "<|im_end|>\n",
|
|
293
359
|
"stops": ['<|im_end|>', '<|im_start|>']
|
|
294
360
|
}
|
|
295
361
|
|
|
@@ -299,13 +365,13 @@ autocorrect: dict[str, Union[str, list]] = {
|
|
|
299
365
|
"system_content": "Below is a word or phrase that might be misspelled. " + \
|
|
300
366
|
"Output the corrected word or phrase without " + \
|
|
301
367
|
"changing the style or capitalization.",
|
|
302
|
-
"
|
|
368
|
+
"system_suffix": "<|im_end|>\n",
|
|
303
369
|
"user_prefix": "<|im_start|>input\n",
|
|
304
370
|
"user_content": "",
|
|
305
|
-
"
|
|
371
|
+
"user_suffix": "<|im_end|>\n",
|
|
306
372
|
"bot_prefix": "<|im_start|>output\n",
|
|
307
373
|
"bot_content": "",
|
|
308
|
-
"
|
|
374
|
+
"bot_suffix": "<|im_end|>\n",
|
|
309
375
|
"stops": ['<|im_end|>', '<|im_start|>']
|
|
310
376
|
}
|
|
311
377
|
|
|
@@ -314,13 +380,13 @@ autocorrect: dict[str, Union[str, list]] = {
|
|
|
314
380
|
bagel: dict[str, Union[str, list]] = {
|
|
315
381
|
"system_prefix": "system\n",
|
|
316
382
|
"system_content": "",
|
|
317
|
-
"
|
|
383
|
+
"system_suffix": "\n",
|
|
318
384
|
"user_prefix": "user\n",
|
|
319
385
|
"user_content": "",
|
|
320
|
-
"
|
|
386
|
+
"user_suffix": "\n",
|
|
321
387
|
"bot_prefix": "assistant\n",
|
|
322
388
|
"bot_content": "",
|
|
323
|
-
"
|
|
389
|
+
"bot_suffix": "\n",
|
|
324
390
|
"stops": ['user\n', 'assistant\n', 'system\n']
|
|
325
391
|
}
|
|
326
392
|
|
|
@@ -328,13 +394,13 @@ bagel: dict[str, Union[str, list]] = {
|
|
|
328
394
|
solar_instruct: dict[str, Union[str, list]] = {
|
|
329
395
|
"system_prefix": "",
|
|
330
396
|
"system_content": "",
|
|
331
|
-
"
|
|
397
|
+
"system_suffix": "",
|
|
332
398
|
"user_prefix": "### User:\n",
|
|
333
399
|
"user_content": "",
|
|
334
|
-
"
|
|
400
|
+
"user_suffix": "\n\n",
|
|
335
401
|
"bot_prefix": "### Assistant:\n",
|
|
336
402
|
"bot_content": "",
|
|
337
|
-
"
|
|
403
|
+
"bot_suffix": "\n\n",
|
|
338
404
|
"stops": ['### User:', '###', '### Assistant:']
|
|
339
405
|
}
|
|
340
406
|
|
|
@@ -343,13 +409,13 @@ noromaid: dict[str, Union[str, list]] = {
|
|
|
343
409
|
"system_prefix": "",
|
|
344
410
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
345
411
|
"Write a response that appropriately completes the request.",
|
|
346
|
-
"
|
|
412
|
+
"system_suffix": "\n\n",
|
|
347
413
|
"user_prefix": "### Instruction:\nBob: ",
|
|
348
414
|
"user_content": "",
|
|
349
|
-
"
|
|
415
|
+
"user_suffix": "\n\n",
|
|
350
416
|
"bot_prefix": "### Response:\nAlice:",
|
|
351
417
|
"bot_content": "",
|
|
352
|
-
"
|
|
418
|
+
"bot_suffix": "\n\n",
|
|
353
419
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
354
420
|
}
|
|
355
421
|
|
|
@@ -357,13 +423,13 @@ noromaid: dict[str, Union[str, list]] = {
|
|
|
357
423
|
nschatml: dict[str, Union[str, list]] = {
|
|
358
424
|
"system_prefix": "<|im_start|>\n",
|
|
359
425
|
"system_content": "",
|
|
360
|
-
"
|
|
426
|
+
"system_suffix": "<|im_end|>\n",
|
|
361
427
|
"user_prefix": "<|im_user|>\n",
|
|
362
428
|
"user_content": "",
|
|
363
|
-
"
|
|
429
|
+
"user_suffix": "<|im_end|>\n",
|
|
364
430
|
"bot_prefix": "<|im_bot|>\n",
|
|
365
431
|
"bot_content": "",
|
|
366
|
-
"
|
|
432
|
+
"bot_suffix": "<|im_end|>\n",
|
|
367
433
|
"stops": []
|
|
368
434
|
}
|
|
369
435
|
|
|
@@ -371,13 +437,13 @@ nschatml: dict[str, Union[str, list]] = {
|
|
|
371
437
|
natural: dict[str, Union[str, list]] = {
|
|
372
438
|
"system_prefix": "<<SYSTEM>> ",
|
|
373
439
|
"system_content": "",
|
|
374
|
-
"
|
|
440
|
+
"system_suffix": "\n\n",
|
|
375
441
|
"user_prefix": "<<USER>> ",
|
|
376
442
|
"user_content": "",
|
|
377
|
-
"
|
|
443
|
+
"user_suffix": "\n\n",
|
|
378
444
|
"bot_prefix": "<<ASSISTANT>>",
|
|
379
445
|
"bot_content": "",
|
|
380
|
-
"
|
|
446
|
+
"bot_suffix": "\n\n",
|
|
381
447
|
"stops": ['\n\nNote:', '<<SYSTEM>>', '<<USER>>', '<<ASSISTANT>>', '\n\n<<']
|
|
382
448
|
}
|
|
383
449
|
|
|
@@ -385,13 +451,13 @@ natural: dict[str, Union[str, list]] = {
|
|
|
385
451
|
command: dict[str, Union[str, list]] = {
|
|
386
452
|
"system_prefix": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
|
|
387
453
|
"system_content": "",
|
|
388
|
-
"
|
|
454
|
+
"system_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
389
455
|
"user_prefix": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
|
|
390
456
|
"user_content": "",
|
|
391
|
-
"
|
|
457
|
+
"user_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
392
458
|
"bot_prefix": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
|
393
459
|
"bot_content": "",
|
|
394
|
-
"
|
|
460
|
+
"bot_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
395
461
|
"stops": []
|
|
396
462
|
}
|
|
397
463
|
|
|
@@ -466,4 +532,4 @@ alpaca_strict['stops'] = [
|
|
|
466
532
|
'\n\n\n',
|
|
467
533
|
'### Instruction:',
|
|
468
534
|
'### Response:'
|
|
469
|
-
]
|
|
535
|
+
]
|
webscout/Local/model.py
CHANGED
|
@@ -612,7 +612,7 @@ class Model:
|
|
|
612
612
|
self,
|
|
613
613
|
prompt: str,
|
|
614
614
|
k: int
|
|
615
|
-
) -> list[tuple[str, np.
|
|
615
|
+
) -> list[tuple[str, np.floating]]:
|
|
616
616
|
"""
|
|
617
617
|
Given prompt `str` and k `int`, return a sorted list of the
|
|
618
618
|
top k candidates for most likely next token, along with their
|
|
@@ -639,11 +639,11 @@ class Model:
|
|
|
639
639
|
# must normalize over all tokens in vocab, not just top k
|
|
640
640
|
if self.verbose:
|
|
641
641
|
print_verbose(f'calculating softmax over {len(scores)} values')
|
|
642
|
-
normalized_scores: list[np.
|
|
642
|
+
normalized_scores: list[np.floating] = list(softmax(scores))
|
|
643
643
|
|
|
644
644
|
# construct the final list
|
|
645
645
|
i = 0
|
|
646
|
-
token_probs_list: list[tuple[str, np.
|
|
646
|
+
token_probs_list: list[tuple[str, np.floating]] = []
|
|
647
647
|
for tok_str in self.tokens:
|
|
648
648
|
token_probs_list.append((tok_str, normalized_scores[i]))
|
|
649
649
|
i += 1
|
|
@@ -666,7 +666,7 @@ class Model:
|
|
|
666
666
|
|
|
667
667
|
for _tuple in self.candidates(prompt, k):
|
|
668
668
|
print(
|
|
669
|
-
f"token
|
|
669
|
+
f"token {repr(_tuple[0])} has probability {_tuple[1]}",
|
|
670
670
|
file=file,
|
|
671
671
|
flush=flush
|
|
672
672
|
)
|