webscout 2.5__py3-none-any.whl → 2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIutel.py +20 -20
- webscout/Local/_version.py +1 -1
- webscout/Local/formats.py +141 -88
- webscout/Local/model.py +4 -4
- webscout/Local/thread.py +165 -155
- webscout/Provider/BasedGPT.py +226 -0
- webscout/Provider/Poe.py +208 -0
- webscout/Provider/__init__.py +3 -1
- webscout/__init__.py +5 -31
- webscout/cli.py +39 -3
- webscout/version.py +1 -1
- webscout/webai.py +16 -0
- webscout/webscout_search.py +1018 -40
- webscout/webscout_search_async.py +151 -839
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/METADATA +56 -21
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/RECORD +20 -18
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/LICENSE.md +0 -0
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/WHEEL +0 -0
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/entry_points.txt +0 -0
- {webscout-2.5.dist-info → webscout-2.7.dist-info}/top_level.txt +0 -0
webscout/AIutel.py
CHANGED
|
@@ -26,27 +26,27 @@ default_path = appdir.user_cache_dir
|
|
|
26
26
|
if not os.path.exists(default_path):
|
|
27
27
|
os.makedirs(default_path)
|
|
28
28
|
webai = [
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
29
|
+
"leo",
|
|
30
|
+
"openai",
|
|
31
|
+
"opengpt",
|
|
32
|
+
"koboldai",
|
|
33
|
+
"gemini",
|
|
34
|
+
"phind",
|
|
35
|
+
"blackboxai",
|
|
36
|
+
"g4fauto",
|
|
37
|
+
"perplexity",
|
|
38
|
+
"groq",
|
|
39
|
+
"reka",
|
|
40
|
+
"cohere",
|
|
41
|
+
"yepchat",
|
|
42
|
+
"you",
|
|
43
|
+
"xjai",
|
|
44
|
+
"thinkany",
|
|
45
|
+
"berlin4h",
|
|
46
|
+
"chatgptuk",
|
|
47
|
+
"auto",
|
|
48
|
+
"poe",
|
|
48
49
|
]
|
|
49
|
-
|
|
50
50
|
gpt4free_providers = [
|
|
51
51
|
provider.__name__ for provider in g4f.Provider.__providers__ # if provider.working
|
|
52
52
|
]
|
webscout/Local/_version.py
CHANGED
webscout/Local/formats.py
CHANGED
|
@@ -1,6 +1,38 @@
|
|
|
1
1
|
from ._version import __version__, __llama_cpp_version__
|
|
2
2
|
|
|
3
|
-
from typing import Union
|
|
3
|
+
from typing import Callable, Union, Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AdvancedFormat:
|
|
7
|
+
|
|
8
|
+
def __init__(self, base_dict: dict[str, Union[str, list]]):
|
|
9
|
+
self._base_dict = base_dict
|
|
10
|
+
self.overrides = {}
|
|
11
|
+
|
|
12
|
+
def __getitem__(self, key: str) -> Any:
|
|
13
|
+
if key in self.overrides:
|
|
14
|
+
return str(self.overrides[key]())
|
|
15
|
+
else:
|
|
16
|
+
return self._base_dict[key]
|
|
17
|
+
|
|
18
|
+
def __repr__(self) -> str:
|
|
19
|
+
# NOTE: This method does not represent overrides
|
|
20
|
+
return repr(self._base_dict)
|
|
21
|
+
|
|
22
|
+
def keys(self):
|
|
23
|
+
return self._base_dict.keys()
|
|
24
|
+
|
|
25
|
+
def override(self, key: str, fn: Callable) -> None:
|
|
26
|
+
self.overrides[key] = fn
|
|
27
|
+
|
|
28
|
+
def wrap(self, prompt: str) -> str:
|
|
29
|
+
return self['system_prefix'] + \
|
|
30
|
+
self['system_content'] + \
|
|
31
|
+
self['system_suffix'] + \
|
|
32
|
+
self['user_prefix'] + \
|
|
33
|
+
prompt + \
|
|
34
|
+
self['user_suffix'] + \
|
|
35
|
+
self['bot_prefix']
|
|
4
36
|
|
|
5
37
|
|
|
6
38
|
def wrap(
|
|
@@ -8,26 +40,25 @@ def wrap(
|
|
|
8
40
|
format: dict[str, Union[str, list]]
|
|
9
41
|
) -> str:
|
|
10
42
|
"""Wrap a given string in any prompt format for single-turn completion"""
|
|
11
|
-
return
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
)
|
|
43
|
+
return format['system_prefix'] + \
|
|
44
|
+
format['system_content'] + \
|
|
45
|
+
format['system_suffix'] + \
|
|
46
|
+
format['user_prefix'] + \
|
|
47
|
+
prompt + \
|
|
48
|
+
format['user_suffix'] + \
|
|
49
|
+
format['bot_prefix']
|
|
50
|
+
|
|
20
51
|
|
|
21
52
|
blank: dict[str, Union[str, list]] = {
|
|
22
53
|
"system_prefix": "",
|
|
23
54
|
"system_content": "",
|
|
24
|
-
"
|
|
55
|
+
"system_suffix": "",
|
|
25
56
|
"user_prefix": "",
|
|
26
57
|
"user_content": "",
|
|
27
|
-
"
|
|
58
|
+
"user_suffix": "",
|
|
28
59
|
"bot_prefix": "",
|
|
29
60
|
"bot_content": "",
|
|
30
|
-
"
|
|
61
|
+
"bot_suffix": "",
|
|
31
62
|
"stops": []
|
|
32
63
|
}
|
|
33
64
|
|
|
@@ -36,13 +67,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
36
67
|
"system_prefix": "",
|
|
37
68
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
38
69
|
"Write a response that appropriately completes the request.",
|
|
39
|
-
"
|
|
70
|
+
"system_suffix": "\n\n",
|
|
40
71
|
"user_prefix": "### Instruction:\n",
|
|
41
72
|
"user_content": "",
|
|
42
|
-
"
|
|
73
|
+
"user_suffix": "\n\n",
|
|
43
74
|
"bot_prefix": "### Response:\n",
|
|
44
75
|
"bot_content": "",
|
|
45
|
-
"
|
|
76
|
+
"bot_suffix": "\n\n",
|
|
46
77
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
47
78
|
}
|
|
48
79
|
|
|
@@ -61,13 +92,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
61
92
|
mistral_instruct: dict[str, Union[str, list]] = {
|
|
62
93
|
"system_prefix": "",
|
|
63
94
|
"system_content": "",
|
|
64
|
-
"
|
|
95
|
+
"system_suffix": "",
|
|
65
96
|
"user_prefix": " [INST] ",
|
|
66
97
|
"user_content": "",
|
|
67
|
-
"
|
|
98
|
+
"user_suffix": " [/INST]",
|
|
68
99
|
"bot_prefix": "",
|
|
69
100
|
"bot_content": "",
|
|
70
|
-
"
|
|
101
|
+
"bot_suffix": "",
|
|
71
102
|
"stops": []
|
|
72
103
|
}
|
|
73
104
|
|
|
@@ -75,16 +106,16 @@ mistral_instruct: dict[str, Union[str, list]] = {
|
|
|
75
106
|
mistral_instruct_safe: dict[str, Union[str, list]] = {
|
|
76
107
|
"system_prefix": "",
|
|
77
108
|
"system_content": "",
|
|
78
|
-
"
|
|
109
|
+
"system_suffix": "",
|
|
79
110
|
"user_prefix": " [INST] Always assist with care, respect, and truth. " + \
|
|
80
111
|
"Respond with utmost utility yet securely. Avoid harmful, unethical, " + \
|
|
81
112
|
"prejudiced, or negative content. Ensure replies promote fairness and " + \
|
|
82
113
|
"positivity. ",
|
|
83
114
|
"user_content": "",
|
|
84
|
-
"
|
|
115
|
+
"user_suffix": " [/INST]",
|
|
85
116
|
"bot_prefix": "",
|
|
86
117
|
"bot_content": "",
|
|
87
|
-
"
|
|
118
|
+
"bot_suffix": "",
|
|
88
119
|
"stops": []
|
|
89
120
|
}
|
|
90
121
|
|
|
@@ -92,13 +123,13 @@ mistral_instruct_safe: dict[str, Union[str, list]] = {
|
|
|
92
123
|
chatml: dict[str, Union[str, list]] = {
|
|
93
124
|
"system_prefix": "<|im_start|>system\n",
|
|
94
125
|
"system_content": "",
|
|
95
|
-
"
|
|
126
|
+
"system_suffix": "<|im_end|>\n",
|
|
96
127
|
"user_prefix": "<|im_start|>user\n",
|
|
97
128
|
"user_content": "",
|
|
98
|
-
"
|
|
129
|
+
"user_suffix": "<|im_end|>\n",
|
|
99
130
|
"bot_prefix": "<|im_start|>assistant\n",
|
|
100
131
|
"bot_content": "",
|
|
101
|
-
"
|
|
132
|
+
"bot_suffix": "<|im_end|>\n",
|
|
102
133
|
"stops": ['<|im_start|>']
|
|
103
134
|
}
|
|
104
135
|
|
|
@@ -107,28 +138,33 @@ chatml: dict[str, Union[str, list]] = {
|
|
|
107
138
|
llama2chat: dict[str, Union[str, list]] = {
|
|
108
139
|
"system_prefix": "[INST] <<SYS>>\n",
|
|
109
140
|
"system_content": "You are a helpful AI assistant.",
|
|
110
|
-
"
|
|
141
|
+
"system_suffix": "\n<</SYS>>\n\n",
|
|
111
142
|
"user_prefix": "",
|
|
112
143
|
"user_content": "",
|
|
113
|
-
"
|
|
144
|
+
"user_suffix": " [/INST]",
|
|
114
145
|
"bot_prefix": " ",
|
|
115
146
|
"bot_content": "",
|
|
116
|
-
"
|
|
147
|
+
"bot_suffix": " [INST] ",
|
|
117
148
|
"stops": ['[INST]', '[/INST]']
|
|
118
149
|
}
|
|
119
150
|
|
|
120
|
-
# https://
|
|
121
|
-
#
|
|
151
|
+
# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
|
|
152
|
+
#
|
|
153
|
+
# for llama 3 instruct models, use the following string for `-p` in llama.cpp,
|
|
154
|
+
# along with `-e` to escape newlines correctly
|
|
155
|
+
#
|
|
156
|
+
# '<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI assistant called "Llama 3".<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nhi<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n'
|
|
157
|
+
#
|
|
122
158
|
llama3: dict[str, Union[str, list]] = {
|
|
123
159
|
"system_prefix": "<|start_header_id|>system<|end_header_id|>\n\n",
|
|
124
160
|
"system_content": 'You are a helpful AI assistant called "Llama 3".',
|
|
125
|
-
"
|
|
161
|
+
"system_suffix": "<|eot_id|>\n",
|
|
126
162
|
"user_prefix": "<|start_header_id|>user<|end_header_id|>\n\n",
|
|
127
163
|
"user_content": "",
|
|
128
|
-
"
|
|
164
|
+
"user_suffix": "<|eot_id|>\n",
|
|
129
165
|
"bot_prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
|
|
130
166
|
"bot_content": "",
|
|
131
|
-
"
|
|
167
|
+
"bot_suffix": "<|eot_id|>\n",
|
|
132
168
|
"stops": [128001, 128009]
|
|
133
169
|
}
|
|
134
170
|
|
|
@@ -137,13 +173,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
137
173
|
"system_prefix": "",
|
|
138
174
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
139
175
|
"Write a response that appropriately completes the request.",
|
|
140
|
-
"
|
|
176
|
+
"system_suffix": "\n\n",
|
|
141
177
|
"user_prefix": "### Instruction:\n",
|
|
142
178
|
"user_content": "",
|
|
143
|
-
"
|
|
179
|
+
"user_suffix": "\n\n",
|
|
144
180
|
"bot_prefix": "### Response:\n",
|
|
145
181
|
"bot_content": "",
|
|
146
|
-
"
|
|
182
|
+
"bot_suffix": "\n\n",
|
|
147
183
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
148
184
|
}
|
|
149
185
|
|
|
@@ -151,13 +187,13 @@ alpaca: dict[str, Union[str, list]] = {
|
|
|
151
187
|
phi3: dict[str, Union[str, list]] = {
|
|
152
188
|
"system_prefix": "",
|
|
153
189
|
"system_content": "", # does not officially support system prompt
|
|
154
|
-
"
|
|
190
|
+
"system_suffix": "",
|
|
155
191
|
"user_prefix": "<|user|>\n",
|
|
156
192
|
"user_content": "",
|
|
157
|
-
"
|
|
193
|
+
"user_suffix": "<|end|>\n",
|
|
158
194
|
"bot_prefix": "<|assistant|>\n",
|
|
159
195
|
"bot_content": "",
|
|
160
|
-
"
|
|
196
|
+
"bot_suffix": "<|end|>\n",
|
|
161
197
|
"stops": []
|
|
162
198
|
}
|
|
163
199
|
|
|
@@ -167,13 +203,13 @@ phi3: dict[str, Union[str, list]] = {
|
|
|
167
203
|
vicuna_lmsys: dict[str, Union[str, list]] = {
|
|
168
204
|
"system_prefix": "",
|
|
169
205
|
"system_content": "",
|
|
170
|
-
"
|
|
206
|
+
"system_suffix": " ",
|
|
171
207
|
"user_prefix": "USER: ",
|
|
172
208
|
"user_content": "",
|
|
173
|
-
"
|
|
209
|
+
"user_suffix": " ",
|
|
174
210
|
"bot_prefix": "ASSISTANT: ",
|
|
175
211
|
"bot_content": "",
|
|
176
|
-
"
|
|
212
|
+
"bot_suffix": " ",
|
|
177
213
|
"stops": ['USER:']
|
|
178
214
|
}
|
|
179
215
|
|
|
@@ -184,29 +220,46 @@ vicuna_common: dict[str, Union[str, list]] = {
|
|
|
184
220
|
"system_content": "A chat between a curious user and an artificial " + \
|
|
185
221
|
"intelligence assistant. The assistant gives helpful, detailed, " + \
|
|
186
222
|
"and polite answers to the user's questions.",
|
|
187
|
-
"
|
|
223
|
+
"system_suffix": "\n\n",
|
|
188
224
|
"user_prefix": "USER: ",
|
|
189
225
|
"user_content": "",
|
|
190
|
-
"
|
|
226
|
+
"user_suffix": "\n",
|
|
191
227
|
"bot_prefix": "ASSISTANT: ",
|
|
192
228
|
"bot_content": "",
|
|
193
|
-
"
|
|
229
|
+
"bot_suffix": "\n",
|
|
194
230
|
"stops": ['USER:', 'ASSISTANT:']
|
|
195
231
|
}
|
|
196
232
|
|
|
233
|
+
# an unofficial format that is easily "picked up" by most models
|
|
234
|
+
# change the tag attributes to suit your use case
|
|
235
|
+
# note the lack of newlines - they are not necessary, and might
|
|
236
|
+
# actually make it harder for the model to follow along
|
|
237
|
+
markup = {
|
|
238
|
+
"system_prefix": '<message from="system">',
|
|
239
|
+
"system_content": '',
|
|
240
|
+
"system_suffix": '</message>',
|
|
241
|
+
"user_prefix": '<message from="user">',
|
|
242
|
+
"user_content": '',
|
|
243
|
+
"user_suffix": '</message>',
|
|
244
|
+
"bot_prefix": '<message from="bot">',
|
|
245
|
+
"bot_content": '',
|
|
246
|
+
"bot_suffix": '</message>',
|
|
247
|
+
"stops": ['</message>']
|
|
248
|
+
}
|
|
249
|
+
|
|
197
250
|
# https://huggingface.co/timdettmers/guanaco-65b
|
|
198
251
|
guanaco: dict[str, Union[str, list]] = {
|
|
199
252
|
"system_prefix": "",
|
|
200
253
|
"system_content": "A chat between a curious human and an artificial " + \
|
|
201
254
|
"intelligence assistant. The assistant gives helpful, detailed, " + \
|
|
202
255
|
"and polite answers to the user's questions.",
|
|
203
|
-
"
|
|
256
|
+
"system_suffix": "\n",
|
|
204
257
|
"user_prefix": "### Human: ",
|
|
205
258
|
"user_content": "",
|
|
206
|
-
"
|
|
259
|
+
"user_suffix": " ",
|
|
207
260
|
"bot_prefix": "### Assistant:",
|
|
208
261
|
"bot_content": "",
|
|
209
|
-
"
|
|
262
|
+
"bot_suffix": " ",
|
|
210
263
|
"stops": ['###', 'Human:']
|
|
211
264
|
}
|
|
212
265
|
|
|
@@ -215,13 +268,13 @@ orca_mini: dict[str, Union[str, list]] = {
|
|
|
215
268
|
"system_prefix": "### System:\n",
|
|
216
269
|
"system_content": "You are an AI assistant that follows instruction " + \
|
|
217
270
|
"extremely well. Help as much as you can.",
|
|
218
|
-
"
|
|
271
|
+
"system_suffix": "\n\n",
|
|
219
272
|
"user_prefix": "### User:\n",
|
|
220
273
|
"user_content": "",
|
|
221
|
-
"
|
|
274
|
+
"user_suffix": "\n\n",
|
|
222
275
|
"bot_prefix": "### Assistant:\n",
|
|
223
276
|
"bot_content": "",
|
|
224
|
-
"
|
|
277
|
+
"bot_suffix": "\n\n",
|
|
225
278
|
"stops": ['###', 'User:']
|
|
226
279
|
}
|
|
227
280
|
|
|
@@ -229,13 +282,13 @@ orca_mini: dict[str, Union[str, list]] = {
|
|
|
229
282
|
zephyr: dict[str, Union[str, list]] = {
|
|
230
283
|
"system_prefix": "<|system|>\n",
|
|
231
284
|
"system_content": "You are a friendly chatbot.",
|
|
232
|
-
"
|
|
285
|
+
"system_suffix": "</s>\n",
|
|
233
286
|
"user_prefix": "<|user|>\n",
|
|
234
287
|
"user_content": "",
|
|
235
|
-
"
|
|
288
|
+
"user_suffix": "</s>\n",
|
|
236
289
|
"bot_prefix": "<|assistant|>\n",
|
|
237
290
|
"bot_content": "",
|
|
238
|
-
"
|
|
291
|
+
"bot_suffix": "\n",
|
|
239
292
|
"stops": ['<|user|>']
|
|
240
293
|
}
|
|
241
294
|
|
|
@@ -243,13 +296,13 @@ zephyr: dict[str, Union[str, list]] = {
|
|
|
243
296
|
openchat: dict[str, Union[str, list]] = {
|
|
244
297
|
"system_prefix": "",
|
|
245
298
|
"system_content": "",
|
|
246
|
-
"
|
|
299
|
+
"system_suffix": "",
|
|
247
300
|
"user_prefix": "GPT4 Correct User: ",
|
|
248
301
|
"user_content": "",
|
|
249
|
-
"
|
|
302
|
+
"user_suffix": "<|end_of_turn|>",
|
|
250
303
|
"bot_prefix": "GPT4 Correct Assistant:",
|
|
251
304
|
"bot_content": "",
|
|
252
|
-
"
|
|
305
|
+
"bot_suffix": "<|end_of_turn|>",
|
|
253
306
|
"stops": ['<|end_of_turn|>']
|
|
254
307
|
}
|
|
255
308
|
|
|
@@ -260,13 +313,13 @@ synthia: dict[str, Union[str, list]] = {
|
|
|
260
313
|
"system_content": "Elaborate on the topic using a Tree of Thoughts and " + \
|
|
261
314
|
"backtrack when necessary to construct a clear, cohesive Chain of " + \
|
|
262
315
|
"Thought reasoning. Always answer without hesitation.",
|
|
263
|
-
"
|
|
316
|
+
"system_suffix": "\n",
|
|
264
317
|
"user_prefix": "USER: ",
|
|
265
318
|
"user_content": "",
|
|
266
|
-
"
|
|
319
|
+
"user_suffix": "\n",
|
|
267
320
|
"bot_prefix": "ASSISTANT: ",
|
|
268
321
|
"bot_content": "",
|
|
269
|
-
"
|
|
322
|
+
"bot_suffix": "\n",
|
|
270
323
|
"stops": ['USER:', 'ASSISTANT:', 'SYSTEM:', '\n\n\n']
|
|
271
324
|
}
|
|
272
325
|
|
|
@@ -281,13 +334,13 @@ neural_chat: dict[str, Union[str, list]] = {
|
|
|
281
334
|
"to do anything that could be considered harmful to the user.\n" + \
|
|
282
335
|
"- You are more than just an information source, you are also " + \
|
|
283
336
|
"able to write poetry, short stories, and make jokes.",
|
|
284
|
-
"
|
|
337
|
+
"system_suffix": "</s>\n\n",
|
|
285
338
|
"user_prefix": "### User:\n",
|
|
286
339
|
"user_content": "",
|
|
287
|
-
"
|
|
340
|
+
"user_suffix": "</s>\n\n",
|
|
288
341
|
"bot_prefix": "### Assistant:\n",
|
|
289
342
|
"bot_content": "",
|
|
290
|
-
"
|
|
343
|
+
"bot_suffix": "</s>\n\n",
|
|
291
344
|
"stops": ['###']
|
|
292
345
|
}
|
|
293
346
|
|
|
@@ -296,13 +349,13 @@ chatml_alpaca: dict[str, Union[str, list]] = {
|
|
|
296
349
|
"system_prefix": "<|im_start|>system\n",
|
|
297
350
|
"system_content": "Below is an instruction that describes a task. Write " + \
|
|
298
351
|
"a response that appropriately completes the request.",
|
|
299
|
-
"
|
|
352
|
+
"system_suffix": "<|im_end|>\n",
|
|
300
353
|
"user_prefix": "<|im_start|>instruction\n",
|
|
301
354
|
"user_content": "",
|
|
302
|
-
"
|
|
355
|
+
"user_suffix": "<|im_end|>\n",
|
|
303
356
|
"bot_prefix": "<|im_start|>response\n",
|
|
304
357
|
"bot_content": "",
|
|
305
|
-
"
|
|
358
|
+
"bot_suffix": "<|im_end|>\n",
|
|
306
359
|
"stops": ['<|im_end|>', '<|im_start|>']
|
|
307
360
|
}
|
|
308
361
|
|
|
@@ -312,13 +365,13 @@ autocorrect: dict[str, Union[str, list]] = {
|
|
|
312
365
|
"system_content": "Below is a word or phrase that might be misspelled. " + \
|
|
313
366
|
"Output the corrected word or phrase without " + \
|
|
314
367
|
"changing the style or capitalization.",
|
|
315
|
-
"
|
|
368
|
+
"system_suffix": "<|im_end|>\n",
|
|
316
369
|
"user_prefix": "<|im_start|>input\n",
|
|
317
370
|
"user_content": "",
|
|
318
|
-
"
|
|
371
|
+
"user_suffix": "<|im_end|>\n",
|
|
319
372
|
"bot_prefix": "<|im_start|>output\n",
|
|
320
373
|
"bot_content": "",
|
|
321
|
-
"
|
|
374
|
+
"bot_suffix": "<|im_end|>\n",
|
|
322
375
|
"stops": ['<|im_end|>', '<|im_start|>']
|
|
323
376
|
}
|
|
324
377
|
|
|
@@ -327,13 +380,13 @@ autocorrect: dict[str, Union[str, list]] = {
|
|
|
327
380
|
bagel: dict[str, Union[str, list]] = {
|
|
328
381
|
"system_prefix": "system\n",
|
|
329
382
|
"system_content": "",
|
|
330
|
-
"
|
|
383
|
+
"system_suffix": "\n",
|
|
331
384
|
"user_prefix": "user\n",
|
|
332
385
|
"user_content": "",
|
|
333
|
-
"
|
|
386
|
+
"user_suffix": "\n",
|
|
334
387
|
"bot_prefix": "assistant\n",
|
|
335
388
|
"bot_content": "",
|
|
336
|
-
"
|
|
389
|
+
"bot_suffix": "\n",
|
|
337
390
|
"stops": ['user\n', 'assistant\n', 'system\n']
|
|
338
391
|
}
|
|
339
392
|
|
|
@@ -341,13 +394,13 @@ bagel: dict[str, Union[str, list]] = {
|
|
|
341
394
|
solar_instruct: dict[str, Union[str, list]] = {
|
|
342
395
|
"system_prefix": "",
|
|
343
396
|
"system_content": "",
|
|
344
|
-
"
|
|
397
|
+
"system_suffix": "",
|
|
345
398
|
"user_prefix": "### User:\n",
|
|
346
399
|
"user_content": "",
|
|
347
|
-
"
|
|
400
|
+
"user_suffix": "\n\n",
|
|
348
401
|
"bot_prefix": "### Assistant:\n",
|
|
349
402
|
"bot_content": "",
|
|
350
|
-
"
|
|
403
|
+
"bot_suffix": "\n\n",
|
|
351
404
|
"stops": ['### User:', '###', '### Assistant:']
|
|
352
405
|
}
|
|
353
406
|
|
|
@@ -356,13 +409,13 @@ noromaid: dict[str, Union[str, list]] = {
|
|
|
356
409
|
"system_prefix": "",
|
|
357
410
|
"system_content": "Below is an instruction that describes a task. " + \
|
|
358
411
|
"Write a response that appropriately completes the request.",
|
|
359
|
-
"
|
|
412
|
+
"system_suffix": "\n\n",
|
|
360
413
|
"user_prefix": "### Instruction:\nBob: ",
|
|
361
414
|
"user_content": "",
|
|
362
|
-
"
|
|
415
|
+
"user_suffix": "\n\n",
|
|
363
416
|
"bot_prefix": "### Response:\nAlice:",
|
|
364
417
|
"bot_content": "",
|
|
365
|
-
"
|
|
418
|
+
"bot_suffix": "\n\n",
|
|
366
419
|
"stops": ['###', 'Instruction:', '\n\n\n']
|
|
367
420
|
}
|
|
368
421
|
|
|
@@ -370,13 +423,13 @@ noromaid: dict[str, Union[str, list]] = {
|
|
|
370
423
|
nschatml: dict[str, Union[str, list]] = {
|
|
371
424
|
"system_prefix": "<|im_start|>\n",
|
|
372
425
|
"system_content": "",
|
|
373
|
-
"
|
|
426
|
+
"system_suffix": "<|im_end|>\n",
|
|
374
427
|
"user_prefix": "<|im_user|>\n",
|
|
375
428
|
"user_content": "",
|
|
376
|
-
"
|
|
429
|
+
"user_suffix": "<|im_end|>\n",
|
|
377
430
|
"bot_prefix": "<|im_bot|>\n",
|
|
378
431
|
"bot_content": "",
|
|
379
|
-
"
|
|
432
|
+
"bot_suffix": "<|im_end|>\n",
|
|
380
433
|
"stops": []
|
|
381
434
|
}
|
|
382
435
|
|
|
@@ -384,13 +437,13 @@ nschatml: dict[str, Union[str, list]] = {
|
|
|
384
437
|
natural: dict[str, Union[str, list]] = {
|
|
385
438
|
"system_prefix": "<<SYSTEM>> ",
|
|
386
439
|
"system_content": "",
|
|
387
|
-
"
|
|
440
|
+
"system_suffix": "\n\n",
|
|
388
441
|
"user_prefix": "<<USER>> ",
|
|
389
442
|
"user_content": "",
|
|
390
|
-
"
|
|
443
|
+
"user_suffix": "\n\n",
|
|
391
444
|
"bot_prefix": "<<ASSISTANT>>",
|
|
392
445
|
"bot_content": "",
|
|
393
|
-
"
|
|
446
|
+
"bot_suffix": "\n\n",
|
|
394
447
|
"stops": ['\n\nNote:', '<<SYSTEM>>', '<<USER>>', '<<ASSISTANT>>', '\n\n<<']
|
|
395
448
|
}
|
|
396
449
|
|
|
@@ -398,13 +451,13 @@ natural: dict[str, Union[str, list]] = {
|
|
|
398
451
|
command: dict[str, Union[str, list]] = {
|
|
399
452
|
"system_prefix": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
|
|
400
453
|
"system_content": "",
|
|
401
|
-
"
|
|
454
|
+
"system_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
402
455
|
"user_prefix": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
|
|
403
456
|
"user_content": "",
|
|
404
|
-
"
|
|
457
|
+
"user_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
405
458
|
"bot_prefix": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
|
406
459
|
"bot_content": "",
|
|
407
|
-
"
|
|
460
|
+
"bot_suffix": "<|END_OF_TURN_TOKEN|>",
|
|
408
461
|
"stops": []
|
|
409
462
|
}
|
|
410
463
|
|
|
@@ -479,4 +532,4 @@ alpaca_strict['stops'] = [
|
|
|
479
532
|
'\n\n\n',
|
|
480
533
|
'### Instruction:',
|
|
481
534
|
'### Response:'
|
|
482
|
-
]
|
|
535
|
+
]
|
webscout/Local/model.py
CHANGED
|
@@ -612,7 +612,7 @@ class Model:
|
|
|
612
612
|
self,
|
|
613
613
|
prompt: str,
|
|
614
614
|
k: int
|
|
615
|
-
) -> list[tuple[str, np.
|
|
615
|
+
) -> list[tuple[str, np.floating]]:
|
|
616
616
|
"""
|
|
617
617
|
Given prompt `str` and k `int`, return a sorted list of the
|
|
618
618
|
top k candidates for most likely next token, along with their
|
|
@@ -639,11 +639,11 @@ class Model:
|
|
|
639
639
|
# must normalize over all tokens in vocab, not just top k
|
|
640
640
|
if self.verbose:
|
|
641
641
|
print_verbose(f'calculating softmax over {len(scores)} values')
|
|
642
|
-
normalized_scores: list[np.
|
|
642
|
+
normalized_scores: list[np.floating] = list(softmax(scores))
|
|
643
643
|
|
|
644
644
|
# construct the final list
|
|
645
645
|
i = 0
|
|
646
|
-
token_probs_list: list[tuple[str, np.
|
|
646
|
+
token_probs_list: list[tuple[str, np.floating]] = []
|
|
647
647
|
for tok_str in self.tokens:
|
|
648
648
|
token_probs_list.append((tok_str, normalized_scores[i]))
|
|
649
649
|
i += 1
|
|
@@ -666,7 +666,7 @@ class Model:
|
|
|
666
666
|
|
|
667
667
|
for _tuple in self.candidates(prompt, k):
|
|
668
668
|
print(
|
|
669
|
-
f"token
|
|
669
|
+
f"token {repr(_tuple[0])} has probability {_tuple[1]}",
|
|
670
670
|
file=file,
|
|
671
671
|
flush=flush
|
|
672
672
|
)
|