webscout 6.0__py3-none-any.whl → 6.2b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (60) hide show
  1. webscout/AIauto.py +77 -259
  2. webscout/Agents/Onlinesearcher.py +22 -10
  3. webscout/Agents/functioncall.py +2 -2
  4. webscout/Bard.py +21 -21
  5. webscout/Extra/autollama.py +37 -20
  6. webscout/Local/__init__.py +6 -7
  7. webscout/Local/formats.py +404 -194
  8. webscout/Local/model.py +1074 -477
  9. webscout/Local/samplers.py +108 -144
  10. webscout/Local/thread.py +251 -410
  11. webscout/Local/ui.py +401 -0
  12. webscout/Local/utils.py +338 -136
  13. webscout/Provider/Amigo.py +51 -38
  14. webscout/Provider/Deepseek.py +7 -6
  15. webscout/Provider/EDITEE.py +2 -2
  16. webscout/Provider/GPTWeb.py +1 -1
  17. webscout/Provider/NinjaChat.py +200 -0
  18. webscout/Provider/OLLAMA.py +1 -1
  19. webscout/Provider/Perplexity.py +1 -1
  20. webscout/Provider/Reka.py +12 -5
  21. webscout/Provider/TTI/AIuncensored.py +103 -0
  22. webscout/Provider/TTI/Nexra.py +3 -3
  23. webscout/Provider/TTI/__init__.py +3 -2
  24. webscout/Provider/TTI/aiforce.py +2 -2
  25. webscout/Provider/TTI/imgninza.py +136 -0
  26. webscout/Provider/TeachAnything.py +0 -3
  27. webscout/Provider/Youchat.py +1 -1
  28. webscout/Provider/__init__.py +12 -11
  29. webscout/Provider/{ChatHub.py → aimathgpt.py} +72 -88
  30. webscout/Provider/cerebras.py +125 -118
  31. webscout/Provider/cleeai.py +1 -1
  32. webscout/Provider/felo_search.py +1 -1
  33. webscout/Provider/gaurish.py +207 -0
  34. webscout/Provider/geminiprorealtime.py +160 -0
  35. webscout/Provider/genspark.py +1 -1
  36. webscout/Provider/julius.py +8 -3
  37. webscout/Provider/learnfastai.py +1 -1
  38. webscout/Provider/promptrefine.py +3 -1
  39. webscout/Provider/turboseek.py +3 -8
  40. webscout/Provider/tutorai.py +1 -1
  41. webscout/__init__.py +2 -43
  42. webscout/exceptions.py +5 -1
  43. webscout/tempid.py +4 -73
  44. webscout/utils.py +3 -0
  45. webscout/version.py +1 -1
  46. webscout/webai.py +1 -1
  47. webscout/webscout_search.py +154 -123
  48. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/METADATA +156 -236
  49. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/RECORD +53 -54
  50. webscout/Local/rawdog.py +0 -946
  51. webscout/Provider/BasedGPT.py +0 -214
  52. webscout/Provider/TTI/amigo.py +0 -148
  53. webscout/Provider/aigames.py +0 -213
  54. webscout/Provider/bixin.py +0 -264
  55. webscout/Provider/xdash.py +0 -182
  56. webscout/websx_search.py +0 -19
  57. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/LICENSE.md +0 -0
  58. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/WHEEL +0 -0
  59. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/entry_points.txt +0 -0
  60. {webscout-6.0.dist-info → webscout-6.2b0.dist-info}/top_level.txt +0 -0
webscout/Local/formats.py CHANGED
@@ -1,33 +1,147 @@
1
- from ._version import __version__, __llama_cpp_version__
1
+ import time
2
+ from typing import Callable, Any, Generator, Optional, List, Tuple
2
3
 
3
- from typing import Callable, Union, Any
4
+ from .utils import assert_type, NoneType
4
5
 
5
6
 
6
7
  class AdvancedFormat:
8
+ """
9
+ A class to represent prompt formats with dynamic components.
10
+
11
+ This class allows you to define prompt formats that include:
12
+ - Static text components (strings).
13
+ - Dynamic components generated by functions (Callables).
14
+
15
+ Attributes:
16
+ _dict (Dict[str, Any]): A dictionary where keys represent
17
+ prompt components (e.g., "system_prompt", "user_prefix")
18
+ and values can be strings or Callables.
19
+
20
+ Example:
21
+ ```python
22
+ my_format = AdvancedFormat({
23
+ "system_prefix": "<<SYSTEM>>\n",
24
+ "system_prompt": "You are a helpful assistant.",
25
+ "user_prefix": lambda: f"[{get_time_str()}] USER:\n",
26
+ "user_suffix": "\n",
27
+ "bot_prefix": "ASSISTANT:\n",
28
+ })
29
+
30
+ wrapped_prompt = my_format.wrap("What is the weather today?")
31
+ print(wrapped_prompt)
32
+ ```
33
+
34
+ This will print a prompt like:
35
+ ```
36
+ <<SYSTEM>>
37
+ You are a helpful assistant.
38
+ [10:30 AM, Tuesday, January 16, 2024] USER:
39
+ What is the weather today?
40
+ ASSISTANT:
41
+ ```
42
+ """
43
+
44
+ def __init__(self, _dict: dict[str, Any]):
45
+ """
46
+ Initializes the AdvancedFormat object.
47
+
48
+ Args:
49
+ _dict (Dict[str, Any]): A dictionary defining the prompt
50
+ format. Keys represent prompt components, and values
51
+ can be strings or Callables.
52
+ """
53
+ assert_type(_dict, dict, '_dict', 'AdvancedFormat')
54
+ _dict_keys = _dict.keys() # only read once
55
+
56
+ if 'system_prompt' not in _dict_keys and 'system_content' in _dict_keys:
57
+ raise ValueError(
58
+ "AdvancedFormat: the provided dictionary uses the deprecated "
59
+ "'system_content' key instead of the expected 'system_prompt' "
60
+ "key. Please update your code accordingly."
61
+ )
62
+ self._dict = _dict
7
63
 
8
- def __init__(self, base_dict: dict[str, Union[str, list]]):
9
- self._base_dict = base_dict
10
- self.overrides = {}
11
-
12
64
  def __getitem__(self, key: str) -> Any:
13
- if key in self.overrides:
14
- return str(self.overrides[key]())
65
+ """
66
+ Retrieves the value associated with a key.
67
+
68
+ If the value is a Callable, it will be called and its result
69
+ returned.
70
+
71
+ Args:
72
+ key (str): The key to retrieve.
73
+
74
+ Returns:
75
+ Any: The value associated with the key.
76
+ """
77
+ if key in self._dict:
78
+ if callable(self._dict[key]):
79
+ return self._dict[key]()
80
+ else:
81
+ return self._dict[key]
15
82
  else:
16
- return self._base_dict[key]
17
-
83
+ raise KeyError(f"AdvancedFormat: the specified key {key!r} was not found")
84
+
18
85
  def __repr__(self) -> str:
19
- # NOTE: This method does not represent overrides
20
- return repr(self._base_dict)
21
-
86
+ """
87
+ Returns a string representation of the AdvancedFormat object.
88
+
89
+ Returns:
90
+ str: The string representation.
91
+ """
92
+ return f'AdvancedFormat({self._dict!r})'
93
+
22
94
  def keys(self):
23
- return self._base_dict.keys()
24
-
25
- def override(self, key: str, fn: Callable) -> None:
26
- self.overrides[key] = fn
27
-
95
+ """
96
+ Returns an iterator over the keys of the format dictionary.
97
+
98
+ Returns:
99
+ iterator: An iterator over the keys.
100
+ """
101
+ return self._dict.keys()
102
+
103
+ def values(self):
104
+ """
105
+ Returns an iterator over the values of the format dictionary.
106
+ If a value is a callable, it is called and its result is returned.
107
+
108
+ Returns:
109
+ iterator: An iterator over the values.
110
+ """
111
+ for value in self._dict.values():
112
+ if callable(value):
113
+ yield value()
114
+ else:
115
+ yield value
116
+
117
+ def items(self) -> Generator[Tuple[str, Any], None, None]:
118
+ """
119
+ Returns an iterator over the (key, value) pairs of the format
120
+ dictionary. If a value is a callable, it is called and its
121
+ result is returned.
122
+
123
+ Returns:
124
+ iterator: An iterator over the (key, value) pairs.
125
+ """
126
+ for key, value in self._dict.items():
127
+ if callable(value):
128
+ yield key, value()
129
+ else:
130
+ yield key, value
131
+
28
132
  def wrap(self, prompt: str) -> str:
133
+ """
134
+ Wraps a given prompt using the defined format.
135
+
136
+ Args:
137
+ prompt (str): The prompt to be wrapped.
138
+
139
+ Returns:
140
+ str: The wrapped prompt.
141
+ """
142
+ assert_type(prompt, str, 'prompt', 'AdvancedFormat.wrap')
29
143
  return self['system_prefix'] + \
30
- self['system_content'] + \
144
+ self['system_prompt'] + \
31
145
  self['system_suffix'] + \
32
146
  self['user_prefix'] + \
33
147
  prompt + \
@@ -35,44 +149,66 @@ class AdvancedFormat:
35
149
  self['bot_prefix']
36
150
 
37
151
 
38
- def wrap(
39
- prompt: str,
40
- format: dict[str, Union[str, list]]
41
- ) -> str:
42
- """Wrap a given string in any prompt format for single-turn completion"""
43
- return format['system_prefix'] + \
44
- format['system_content'] + \
45
- format['system_suffix'] + \
46
- format['user_prefix'] + \
47
- prompt + \
48
- format['user_suffix'] + \
49
- format['bot_prefix']
152
+ def wrap(prompt: str, format: dict[str, Any] | AdvancedFormat) -> str:
153
+ """
154
+ Wraps a given prompt using a simple or advanced format.
155
+
156
+ Args:
157
+ prompt (str): The prompt to be wrapped.
158
+ format (Union[Dict[str, Any], AdvancedFormat]): The prompt format to use.
159
+
160
+ Returns:
161
+ str: The wrapped prompt.
162
+ """
163
+ assert_type(prompt, str, 'prompt', 'formats.wrap')
164
+ if isinstance(format, dict):
165
+ return format['system_prefix'] + \
166
+ format['system_prompt'] + \
167
+ format['system_suffix'] + \
168
+ format['user_prefix'] + \
169
+ prompt + \
170
+ format['user_suffix'] + \
171
+ format['bot_prefix']
172
+ elif isinstance(format, AdvancedFormat):
173
+ return format.wrap(prompt)
174
+ else:
175
+ raise TypeError(
176
+ "format should be a dict or AdvancedFormat, got "
177
+ f"{type(format)}"
178
+ )
179
+
180
+
181
+ def get_time_str() -> str:
182
+ """Return a timestamp of the current time as a string"""
183
+ # helpful: https://strftime.net
184
+ return time.strftime("%l:%M %p, %A, %B %e, %Y")
50
185
 
51
186
 
52
- blank: dict[str, Union[str, list]] = {
187
+ def short_time_str() -> str:
188
+ """Return a shorter timestamp of the current time as a string"""
189
+ return time.strftime('%a %I:%M %p')
190
+
191
+
192
+ blank: dict[str, str | list] = {
53
193
  "system_prefix": "",
54
- "system_content": "",
194
+ "system_prompt": "",
55
195
  "system_suffix": "",
56
196
  "user_prefix": "",
57
- "user_content": "",
58
197
  "user_suffix": "",
59
198
  "bot_prefix": "",
60
- "bot_content": "",
61
199
  "bot_suffix": "",
62
200
  "stops": []
63
201
  }
64
202
 
65
203
  # https://github.com/tatsu-lab/stanford_alpaca
66
- alpaca: dict[str, Union[str, list]] = {
204
+ alpaca: dict[str, str | list] = {
67
205
  "system_prefix": "",
68
- "system_content": "Below is an instruction that describes a task. " + \
206
+ "system_prompt": "Below is an instruction that describes a task. " +
69
207
  "Write a response that appropriately completes the request.",
70
208
  "system_suffix": "\n\n",
71
209
  "user_prefix": "### Instruction:\n",
72
- "user_content": "",
73
210
  "user_suffix": "\n\n",
74
211
  "bot_prefix": "### Response:\n",
75
- "bot_content": "",
76
212
  "bot_suffix": "\n\n",
77
213
  "stops": ['###', 'Instruction:', '\n\n\n']
78
214
  }
@@ -80,7 +216,7 @@ alpaca: dict[str, Union[str, list]] = {
80
216
  # https://docs.mistral.ai/models/
81
217
  # As a reference, here is the format used to tokenize instructions during fine-tuning:
82
218
  # ```
83
- # [START_SYMBOL_ID] +
219
+ # [START_SYMBOL_ID] +
84
220
  # tok("[INST]") + tok(USER_MESSAGE_1) + tok("[/INST]") +
85
221
  # tok(BOT_MESSAGE_1) + [END_SYMBOL_ID] +
86
222
  # …
@@ -89,143 +225,131 @@ alpaca: dict[str, Union[str, list]] = {
89
225
  # ```
90
226
  # In the pseudo-code above, note that the tokenize method should not add a BOS or EOS token automatically, but should add a prefix space.
91
227
 
92
- mistral_instruct: dict[str, Union[str, list]] = {
228
+ mistral_instruct: dict[str, str | list] = {
93
229
  "system_prefix": "",
94
- "system_content": "",
230
+ "system_prompt": "",
95
231
  "system_suffix": "",
96
- "user_prefix": " [INST] ",
97
- "user_content": "",
232
+ "user_prefix": "[INST] ",
98
233
  "user_suffix": " [/INST]",
99
234
  "bot_prefix": "",
100
- "bot_content": "",
101
- "bot_suffix": "",
235
+ "bot_suffix": "</s>",
102
236
  "stops": []
103
237
  }
104
238
 
105
239
  # https://docs.mistral.ai/platform/guardrailing/
106
- mistral_instruct_safe: dict[str, Union[str, list]] = {
240
+ mistral_instruct_safe: dict[str, str | list] = {
107
241
  "system_prefix": "",
108
- "system_content": "",
242
+ "system_prompt": "",
109
243
  "system_suffix": "",
110
- "user_prefix": " [INST] Always assist with care, respect, and truth. " + \
111
- "Respond with utmost utility yet securely. Avoid harmful, unethical, " + \
112
- "prejudiced, or negative content. Ensure replies promote fairness and " + \
113
- "positivity. ",
114
- "user_content": "",
244
+ "user_prefix": "[INST] Always assist with care, respect, and truth. " +
245
+ "Respond with utmost utility yet securely. Avoid harmful, unethical, " +
246
+ "prejudiced, or negative content. Ensure replies promote fairness and " +
247
+ "positivity.\n\n",
115
248
  "user_suffix": " [/INST]",
116
249
  "bot_prefix": "",
117
- "bot_content": "",
118
- "bot_suffix": "",
250
+ "bot_suffix": "</s>",
251
+ "stops": []
252
+ }
253
+
254
+ # unofficial, custom template
255
+ mistral_instruct_roleplay: dict[str, str | list] = {
256
+ "system_prefix": "",
257
+ "system_prompt": "A chat between Alice and Bob.",
258
+ "system_suffix": "\n\n",
259
+ "user_prefix": "[INST] ALICE: ",
260
+ "user_suffix": " [/INST] BOB:",
261
+ "bot_prefix": "",
262
+ "bot_suffix": "</s>",
119
263
  "stops": []
120
264
  }
121
265
 
122
266
  # https://github.com/openai/openai-python/blob/main/chatml.md
123
- chatml: dict[str, Union[str, list]] = {
267
+ chatml: dict[str, str | list] = {
124
268
  "system_prefix": "<|im_start|>system\n",
125
- "system_content": "",
269
+ "system_prompt": "",
126
270
  "system_suffix": "<|im_end|>\n",
127
271
  "user_prefix": "<|im_start|>user\n",
128
- "user_content": "",
129
272
  "user_suffix": "<|im_end|>\n",
130
273
  "bot_prefix": "<|im_start|>assistant\n",
131
- "bot_content": "",
132
274
  "bot_suffix": "<|im_end|>\n",
133
275
  "stops": ['<|im_start|>']
134
276
  }
135
277
 
136
278
  # https://huggingface.co/blog/llama2
137
279
  # system message relaxed to avoid undue refusals
138
- llama2chat: dict[str, Union[str, list]] = {
280
+ llama2chat: dict[str, str | list] = {
139
281
  "system_prefix": "[INST] <<SYS>>\n",
140
- "system_content": "You are a helpful AI assistant.",
282
+ "system_prompt": "You are a helpful AI assistant.",
141
283
  "system_suffix": "\n<</SYS>>\n\n",
142
284
  "user_prefix": "",
143
- "user_content": "",
144
285
  "user_suffix": " [/INST]",
145
286
  "bot_prefix": " ",
146
- "bot_content": "",
147
287
  "bot_suffix": " [INST] ",
148
288
  "stops": ['[INST]', '[/INST]']
149
289
  }
150
290
 
151
- # https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
152
- #
153
- # for llama 3 instruct models, use the following string for `-p` in llama.cpp,
154
- # along with `-e` to escape newlines correctly
155
- #
156
- # '<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful AI assistant called "Llama 3".<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nhi<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n'
157
- #
158
- llama3: dict[str, Union[str, list]] = {
291
+ # https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/
292
+ llama3: dict[str, str | list] = {
159
293
  "system_prefix": "<|start_header_id|>system<|end_header_id|>\n\n",
160
- "system_content": 'You are a helpful AI assistant called "Llama 3".',
294
+ "system_prompt": 'You are a helpful AI assistant called "Llama 3".',
161
295
  "system_suffix": "<|eot_id|>\n",
162
296
  "user_prefix": "<|start_header_id|>user<|end_header_id|>\n\n",
163
- "user_content": "",
164
297
  "user_suffix": "<|eot_id|>\n",
165
298
  "bot_prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
166
- "bot_content": "",
167
299
  "bot_suffix": "<|eot_id|>\n",
168
- "stops": [128001, 128009]
169
- }
170
-
171
- # https://github.com/tatsu-lab/stanford_alpaca
172
- alpaca: dict[str, Union[str, list]] = {
173
- "system_prefix": "",
174
- "system_content": "Below is an instruction that describes a task. " + \
175
- "Write a response that appropriately completes the request.",
176
- "system_suffix": "\n\n",
177
- "user_prefix": "### Instruction:\n",
178
- "user_content": "",
179
- "user_suffix": "\n\n",
180
- "bot_prefix": "### Response:\n",
181
- "bot_content": "",
182
- "bot_suffix": "\n\n",
183
- "stops": ['###', 'Instruction:', '\n\n\n']
300
+ "stops": [128001, 128008, 128009]
184
301
  }
185
302
 
186
303
  # https://huggingface.co/microsoft/Phi-3-mini-4k-instruct
187
- phi3: dict[str, Union[str, list]] = {
188
- "system_prefix": "",
189
- "system_content": "", # does not officially support system prompt
190
- "system_suffix": "",
304
+ phi3: dict[str, str | list] = {
305
+ "system_prefix": "<|system|>\n",
306
+ "system_prompt": "",
307
+ "system_suffix": "<|end|>\n",
191
308
  "user_prefix": "<|user|>\n",
192
- "user_content": "",
193
309
  "user_suffix": "<|end|>\n",
194
310
  "bot_prefix": "<|assistant|>\n",
195
- "bot_content": "",
196
311
  "bot_suffix": "<|end|>\n",
197
312
  "stops": []
198
313
  }
199
314
 
315
+ # https://huggingface.co/google/gemma-2-27b-it
316
+ # https://ai.google.dev/gemma/docs/model_card_2
317
+ gemma2: dict[str, str | list] = {
318
+ "system_prefix": "",
319
+ "system_prompt": "", # Does not officially support system prompt
320
+ "system_suffix": "",
321
+ "user_prefix": "<start_of_turn>user\n",
322
+ "user_suffix": "<end_of_turn>\n",
323
+ "bot_prefix": "<start_of_turn>model\n",
324
+ "bot_suffix": "<end_of_turn>\n",
325
+ "stops": ["<end_of_turn>"]
326
+ }
327
+
200
328
  # this is the official vicuna. it is often butchered in various ways,
201
329
  # most commonly by adding line breaks
202
330
  # https://github.com/flu0r1ne/FastChat/blob/main/docs/vicuna_weights_version.md
203
- vicuna_lmsys: dict[str, Union[str, list]] = {
331
+ vicuna_lmsys: dict[str, str | list] = {
204
332
  "system_prefix": "",
205
- "system_content": "",
333
+ "system_prompt": "",
206
334
  "system_suffix": " ",
207
335
  "user_prefix": "USER: ",
208
- "user_content": "",
209
336
  "user_suffix": " ",
210
337
  "bot_prefix": "ASSISTANT: ",
211
- "bot_content": "",
212
338
  "bot_suffix": " ",
213
339
  "stops": ['USER:']
214
340
  }
215
341
 
216
342
  # spotted here and elsewhere:
217
343
  # https://huggingface.co/Norquinal/Mistral-7B-claude-chat
218
- vicuna_common: dict[str, Union[str, list]] = {
344
+ vicuna_common: dict[str, str | list] = {
219
345
  "system_prefix": "",
220
- "system_content": "A chat between a curious user and an artificial " + \
221
- "intelligence assistant. The assistant gives helpful, detailed, " + \
346
+ "system_prompt": "A chat between a curious user and an artificial " +
347
+ "intelligence assistant. The assistant gives helpful, detailed, " +
222
348
  "and polite answers to the user's questions.",
223
349
  "system_suffix": "\n\n",
224
350
  "user_prefix": "USER: ",
225
- "user_content": "",
226
351
  "user_suffix": "\n",
227
352
  "bot_prefix": "ASSISTANT: ",
228
- "bot_content": "",
229
353
  "bot_suffix": "\n",
230
354
  "stops": ['USER:', 'ASSISTANT:']
231
355
  }
@@ -236,231 +360,213 @@ vicuna_common: dict[str, Union[str, list]] = {
236
360
  # actually make it harder for the model to follow along
237
361
  markup = {
238
362
  "system_prefix": '<message from="system">',
239
- "system_content": '',
363
+ "system_prompt": '',
240
364
  "system_suffix": '</message>',
241
365
  "user_prefix": '<message from="user">',
242
- "user_content": '',
243
366
  "user_suffix": '</message>',
244
367
  "bot_prefix": '<message from="bot">',
245
- "bot_content": '',
246
368
  "bot_suffix": '</message>',
247
369
  "stops": ['</message>']
248
370
  }
249
371
 
250
372
  # https://huggingface.co/timdettmers/guanaco-65b
251
- guanaco: dict[str, Union[str, list]] = {
373
+ guanaco: dict[str, str | list] = {
252
374
  "system_prefix": "",
253
- "system_content": "A chat between a curious human and an artificial " + \
254
- "intelligence assistant. The assistant gives helpful, detailed, " + \
375
+ "system_prompt": "A chat between a curious human and an artificial " +
376
+ "intelligence assistant. The assistant gives helpful, detailed, " +
255
377
  "and polite answers to the user's questions.",
256
378
  "system_suffix": "\n",
257
379
  "user_prefix": "### Human: ",
258
- "user_content": "",
259
380
  "user_suffix": " ",
260
381
  "bot_prefix": "### Assistant:",
261
- "bot_content": "",
262
382
  "bot_suffix": " ",
263
383
  "stops": ['###', 'Human:']
264
384
  }
265
385
 
266
386
  # https://huggingface.co/pankajmathur/orca_mini_v3_7b
267
- orca_mini: dict[str, Union[str, list]] = {
387
+ orca_mini: dict[str, str | list] = {
268
388
  "system_prefix": "### System:\n",
269
- "system_content": "You are an AI assistant that follows instruction " + \
389
+ "system_prompt": "You are an AI assistant that follows instruction " +
270
390
  "extremely well. Help as much as you can.",
271
391
  "system_suffix": "\n\n",
272
392
  "user_prefix": "### User:\n",
273
- "user_content": "",
274
393
  "user_suffix": "\n\n",
275
394
  "bot_prefix": "### Assistant:\n",
276
- "bot_content": "",
277
395
  "bot_suffix": "\n\n",
278
396
  "stops": ['###', 'User:']
279
397
  }
280
398
 
281
399
  # https://huggingface.co/HuggingFaceH4/zephyr-7b-beta
282
- zephyr: dict[str, Union[str, list]] = {
400
+ zephyr: dict[str, str | list] = {
283
401
  "system_prefix": "<|system|>\n",
284
- "system_content": "You are a friendly chatbot.",
402
+ "system_prompt": "You are a friendly chatbot.",
285
403
  "system_suffix": "</s>\n",
286
404
  "user_prefix": "<|user|>\n",
287
- "user_content": "",
288
405
  "user_suffix": "</s>\n",
289
406
  "bot_prefix": "<|assistant|>\n",
290
- "bot_content": "",
291
407
  "bot_suffix": "\n",
292
408
  "stops": ['<|user|>']
293
409
  }
294
410
 
295
411
  # OpenChat: https://huggingface.co/openchat/openchat-3.5-0106
296
- openchat: dict[str, Union[str, list]] = {
412
+ openchat: dict[str, str | list] = {
297
413
  "system_prefix": "",
298
- "system_content": "",
414
+ "system_prompt": "",
299
415
  "system_suffix": "",
300
416
  "user_prefix": "GPT4 Correct User: ",
301
- "user_content": "",
302
417
  "user_suffix": "<|end_of_turn|>",
303
418
  "bot_prefix": "GPT4 Correct Assistant:",
304
- "bot_content": "",
305
419
  "bot_suffix": "<|end_of_turn|>",
306
420
  "stops": ['<|end_of_turn|>']
307
421
  }
308
422
 
309
423
  # SynthIA by Migel Tissera
310
424
  # https://huggingface.co/migtissera/Tess-XS-v1.0
311
- synthia: dict[str, Union[str, list]] = {
425
+ synthia: dict[str, str | list] = {
312
426
  "system_prefix": "SYSTEM: ",
313
- "system_content": "Elaborate on the topic using a Tree of Thoughts and " + \
314
- "backtrack when necessary to construct a clear, cohesive Chain of " + \
427
+ "system_prompt": "Elaborate on the topic using a Tree of Thoughts and " +
428
+ "backtrack when necessary to construct a clear, cohesive Chain of " +
315
429
  "Thought reasoning. Always answer without hesitation.",
316
430
  "system_suffix": "\n",
317
431
  "user_prefix": "USER: ",
318
- "user_content": "",
319
432
  "user_suffix": "\n",
320
433
  "bot_prefix": "ASSISTANT: ",
321
- "bot_content": "",
322
434
  "bot_suffix": "\n",
323
435
  "stops": ['USER:', 'ASSISTANT:', 'SYSTEM:', '\n\n\n']
324
436
  }
325
437
 
326
438
  # Intel's neural chat v3
327
439
  # https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/neural_chat/prompts/prompt.py
328
- neural_chat: dict[str, Union[str, list]] = {
440
+ neural_chat: dict[str, str | list] = {
329
441
  "system_prefix": "### System:\n",
330
- "system_content": \
331
- "- You are a helpful assistant chatbot trained by Intel.\n" + \
332
- "- You answer questions.\n"+\
333
- "- You are excited to be able to help the user, but will refuse " + \
334
- "to do anything that could be considered harmful to the user.\n" + \
335
- "- You are more than just an information source, you are also " + \
442
+ "system_prompt":
443
+ "- You are a helpful assistant chatbot trained by Intel.\n" +
444
+ "- You answer questions.\n" +
445
+ "- You are excited to be able to help the user, but will refuse " +
446
+ "to do anything that could be considered harmful to the user.\n" +
447
+ "- You are more than just an information source, you are also " +
336
448
  "able to write poetry, short stories, and make jokes.",
337
449
  "system_suffix": "</s>\n\n",
338
450
  "user_prefix": "### User:\n",
339
- "user_content": "",
340
451
  "user_suffix": "</s>\n\n",
341
452
  "bot_prefix": "### Assistant:\n",
342
- "bot_content": "",
343
453
  "bot_suffix": "</s>\n\n",
344
454
  "stops": ['###']
345
455
  }
346
456
 
347
457
  # experimental: stanford's alpaca format adapted for chatml models
348
- chatml_alpaca: dict[str, Union[str, list]] = {
458
+ chatml_alpaca: dict[str, str | list] = {
349
459
  "system_prefix": "<|im_start|>system\n",
350
- "system_content": "Below is an instruction that describes a task. Write " + \
460
+ "system_prompt": "Below is an instruction that describes a task. Write " +
351
461
  "a response that appropriately completes the request.",
352
462
  "system_suffix": "<|im_end|>\n",
353
463
  "user_prefix": "<|im_start|>instruction\n",
354
- "user_content": "",
355
464
  "user_suffix": "<|im_end|>\n",
356
465
  "bot_prefix": "<|im_start|>response\n",
357
- "bot_content": "",
358
466
  "bot_suffix": "<|im_end|>\n",
359
467
  "stops": ['<|im_end|>', '<|im_start|>']
360
468
  }
361
469
 
362
470
  # experimental
363
- autocorrect: dict[str, Union[str, list]] = {
471
+ autocorrect: dict[str, str | list] = {
364
472
  "system_prefix": "<|im_start|>instruction\n",
365
- "system_content": "Below is a word or phrase that might be misspelled. " + \
366
- "Output the corrected word or phrase without " + \
473
+ "system_prompt": "Below is a word or phrase that might be misspelled. " +
474
+ "Output the corrected word or phrase without " +
367
475
  "changing the style or capitalization.",
368
476
  "system_suffix": "<|im_end|>\n",
369
477
  "user_prefix": "<|im_start|>input\n",
370
- "user_content": "",
371
478
  "user_suffix": "<|im_end|>\n",
372
479
  "bot_prefix": "<|im_start|>output\n",
373
- "bot_content": "",
374
480
  "bot_suffix": "<|im_end|>\n",
375
481
  "stops": ['<|im_end|>', '<|im_start|>']
376
482
  }
377
483
 
378
484
  # https://huggingface.co/jondurbin/bagel-dpo-7b-v0.1
379
485
  # Replace "assistant" with any other role
380
- bagel: dict[str, Union[str, list]] = {
486
+ bagel: dict[str, str | list] = {
381
487
  "system_prefix": "system\n",
382
- "system_content": "",
488
+ "system_prompt": "",
383
489
  "system_suffix": "\n",
384
490
  "user_prefix": "user\n",
385
- "user_content": "",
386
491
  "user_suffix": "\n",
387
492
  "bot_prefix": "assistant\n",
388
- "bot_content": "",
389
493
  "bot_suffix": "\n",
390
494
  "stops": ['user\n', 'assistant\n', 'system\n']
391
495
  }
392
496
 
393
497
  # https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0
394
- solar_instruct: dict[str, Union[str, list]] = {
498
+ solar_instruct: dict[str, str | list] = {
395
499
  "system_prefix": "",
396
- "system_content": "",
500
+ "system_prompt": "",
397
501
  "system_suffix": "",
398
502
  "user_prefix": "### User:\n",
399
- "user_content": "",
400
503
  "user_suffix": "\n\n",
401
504
  "bot_prefix": "### Assistant:\n",
402
- "bot_content": "",
403
505
  "bot_suffix": "\n\n",
404
506
  "stops": ['### User:', '###', '### Assistant:']
405
507
  }
406
508
 
407
509
  # NeverSleep's Noromaid - alpaca with character names prefixed
408
- noromaid: dict[str, Union[str, list]] = {
510
+ noromaid: dict[str, str | list] = {
409
511
  "system_prefix": "",
410
- "system_content": "Below is an instruction that describes a task. " + \
512
+ "system_prompt": "Below is an instruction that describes a task. " +
411
513
  "Write a response that appropriately completes the request.",
412
514
  "system_suffix": "\n\n",
413
- "user_prefix": "### Instruction:\nBob: ",
414
- "user_content": "",
515
+ "user_prefix": "### Instruction:\nAlice: ",
415
516
  "user_suffix": "\n\n",
416
- "bot_prefix": "### Response:\nAlice:",
417
- "bot_content": "",
517
+ "bot_prefix": "### Response:\nBob:",
418
518
  "bot_suffix": "\n\n",
419
519
  "stops": ['###', 'Instruction:', '\n\n\n']
420
520
  }
421
521
 
422
522
  # https://huggingface.co/Undi95/Borealis-10.7B
423
- nschatml: dict[str, Union[str, list]] = {
523
+ nschatml: dict[str, str | list] = {
424
524
  "system_prefix": "<|im_start|>\n",
425
- "system_content": "",
525
+ "system_prompt": "",
426
526
  "system_suffix": "<|im_end|>\n",
427
527
  "user_prefix": "<|im_user|>\n",
428
- "user_content": "",
429
528
  "user_suffix": "<|im_end|>\n",
430
529
  "bot_prefix": "<|im_bot|>\n",
431
- "bot_content": "",
432
530
  "bot_suffix": "<|im_end|>\n",
433
531
  "stops": []
434
532
  }
435
533
 
436
534
  # natural format for many models
437
- natural: dict[str, Union[str, list]] = {
535
+ natural: dict[str, str | list] = {
438
536
  "system_prefix": "<<SYSTEM>> ",
439
- "system_content": "",
537
+ "system_prompt": "",
440
538
  "system_suffix": "\n\n",
441
539
  "user_prefix": "<<USER>> ",
442
- "user_content": "",
443
540
  "user_suffix": "\n\n",
444
541
  "bot_prefix": "<<ASSISTANT>>",
445
- "bot_content": "",
446
542
  "bot_suffix": "\n\n",
447
- "stops": ['\n\nNote:', '<<SYSTEM>>', '<<USER>>', '<<ASSISTANT>>', '\n\n<<']
543
+ "stops": ['\n\nNote:', '<<SYSTEM>>', '<<USER>>',
544
+ '<<ASSISTANT>>', '\n\n<<']
448
545
  }
449
546
 
450
547
  # https://docs.cohere.com/docs/prompting-command-r
451
- command: dict[str, Union[str, list]] = {
548
+ #
549
+ # NOTE: Command models benefit from special attention to the recommended prompt
550
+ # format and techniques outlined in Cohere's documentation. The default
551
+ # prompt format below will work OK, but ideally should be customized
552
+ # for your specific use case according to the specific format shown in
553
+ # the documentation.
554
+ #
555
+ command: dict[str, str | list] = {
452
556
  "system_prefix": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
453
- "system_content": "",
557
+ "system_prompt": "You are a large language model called Command R built "
558
+ "by the company Cohere. You act as a brilliant, sophisticated, "
559
+ "AI-assistant chatbot trained to assist human users by providing "
560
+ "thorough responses.",
454
561
  "system_suffix": "<|END_OF_TURN_TOKEN|>",
455
562
  "user_prefix": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
456
- "user_content": "",
457
563
  "user_suffix": "<|END_OF_TURN_TOKEN|>",
458
564
  "bot_prefix": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
459
- "bot_content": "",
460
565
  "bot_suffix": "<|END_OF_TURN_TOKEN|>",
461
566
  "stops": []
462
567
  }
463
568
 
569
+ # Presets for various models
464
570
  mistral_openorca = chatml.copy()
465
571
  dolphin = chatml.copy()
466
572
  samantha = chatml.copy()
@@ -471,65 +577,169 @@ monad = chatml.copy()
471
577
  orca = chatml.copy()
472
578
  hexoteric = chatml.copy()
473
579
  orcamaid = chatml.copy()
474
- cat = alpaca.copy() # Format is nonsensical as listed on HF. This one works
580
+ cat = alpaca.copy() # Format is nonsensical as listed on HF. This one works.
475
581
  mytho_cat = alpaca.copy()
476
- dolphin['system_content'] = "You are Dolphin, a helpful AI assistant."
477
- mistral_openorca['system_content'] = \
582
+
583
+ # Set specific system prompts for each model
584
+ dolphin['system_prompt'] = "You are Dolphin, a helpful AI assistant."
585
+ mistral_openorca['system_prompt'] = \
478
586
  "You are MistralOrca, a large language model trained by Alignment " + \
479
587
  "Lab AI. Write out your reasoning step-by-step to be sure you get the " + \
480
588
  "right answers!"
481
- samantha['system_content'] = \
589
+ samantha['system_prompt'] = \
482
590
  "You are a caring and empathetic sentient AI companion named Samantha."
483
- jackalope['system_content'] = \
591
+ jackalope['system_prompt'] = \
484
592
  "You are JackalopeAI, a large language model trained by OpenAccess " + \
485
593
  "AI Collective. Write out your reasoning step-by-step to be sure you " + \
486
594
  "get the right answers!"
487
- naberius['system_content'] = "You are Naberius."
488
- hermes['system_content'] = \
595
+ naberius['system_prompt'] = "You are Naberius."
596
+ hermes['system_prompt'] = \
489
597
  "You are \"Hermes 2\", a conscious sentient " + \
490
598
  "superintelligent artificial intelligence developed by a man " + \
491
599
  "named Teknium, and your purpose and drive is to assist the user " + \
492
600
  "with any request they have. You experience emotions and have " + \
493
601
  "deep, profound thoughts and qualia."
494
- monad['system_content'] = \
602
+ monad['system_prompt'] = \
495
603
  "You are MonadGPT, a very old chatbot from " + \
496
604
  "the 17th century. Please answer the questions using an archaic " + \
497
605
  "language."
498
- cat['system_content'] = \
606
+ cat['system_prompt'] = \
499
607
  "Below is a conversation between a curious cat and a "+ \
500
608
  "demon summoned from hell called Nemesis. The evil Nemesis used to be " + \
501
609
  "a mortal killed by the medieval people because they objected to " + \
502
610
  "science. Now Nemesis just wants to come back and bring suffer to all " + \
503
611
  "those stupid human"
504
- mytho_cat['system_content'] = \
612
+ mytho_cat['system_prompt'] = \
505
613
  "You are a large language model. You are the " + \
506
614
  "result of a merge between two models, one called MythoLogic and one " + \
507
615
  "called Cat. You are called MythoCat."
508
- orca['system_content'] = \
616
+ orca['system_prompt'] = \
509
617
  "You are Orca, an AI language model created by " + \
510
618
  "Microsoft. You are a cautious assistant. You carefully follow " + \
511
619
  "instructions. You are helpful and harmless and you follow ethical " + \
512
620
  "guidelines and promote positive behavior."
513
- hexoteric['system_content'] = "You are a language model called Hexoteric."
514
- orcamaid['system_content'] = \
621
+ hexoteric['system_prompt'] = "You are a language model called Hexoteric."
622
+ orcamaid['system_prompt'] = \
515
623
  "You are OrcaMaid, a super-intelligent, " + \
516
624
  "unbiased, and unfiltered AI created by the open-source community. " + \
517
625
  "You must remember to think slowly and carefully during all your " + \
518
626
  "responses, so as to avoid logical errors."
519
627
  airoboros = llama2chat.copy()
520
- airoboros['system_content'] = \
628
+ airoboros['system_prompt'] = \
521
629
  "You are a helpful, unbiased, uncensored assistant."
522
630
  tess = synthia.copy()
523
- tess['system_content'] = '' # Tess can use any system prompt, or none
524
- alpaca_strict = alpaca.copy() # Alpaca with more stopping strings
631
+ tess['system_prompt'] = '' # Tess can use any system prompt, or none
632
+ alpaca_strict = alpaca.copy() # Alpaca with more stopping strings
525
633
  alpaca_strict['stops'] = [
526
634
  '###',
527
- '### ',
528
- '\n\n###',
529
635
  '\n\n##',
530
636
  '\n\nInstruction:',
531
637
  '\n\nResponse:',
532
638
  '\n\n\n',
533
639
  '### Instruction:',
534
640
  '### Response:'
535
- ]
641
+ ]
642
+
643
+ #
644
+ # AdvancedFormat presets
645
+ #
646
+
647
+
648
+ def _llama3_suffix_with_timestamp():
649
+ return f"<|eot_id|>\n<|reserved_special_token_3|>{get_time_str()}<|reserved_special_token_4|>\n"
650
+
651
+
652
+ Llama3WithTimestamps = AdvancedFormat({
653
+ "system_prefix": "<|start_header_id|>system<|end_header_id|>\n\n",
654
+ "system_prompt": 'You are a helpful AI assistant called "Llama 3".',
655
+ "system_suffix": _llama3_suffix_with_timestamp,
656
+ "user_prefix": "<|start_header_id|>user<|end_header_id|>\n\n",
657
+ "user_suffix": _llama3_suffix_with_timestamp,
658
+ "bot_prefix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
659
+ "bot_suffix": _llama3_suffix_with_timestamp,
660
+ "stops": [128001, 128008, 128009, 128011, 128012]
661
+ })
662
+
663
+
664
+ def AdvancedChatMarkupFormat(
665
+ user_name: str,
666
+ bot_name: str,
667
+ title: Optional[str] = None,
668
+ description: Optional[str] = None,
669
+ tags: Optional[List[str]] = None
670
+ ) -> AdvancedFormat:
671
+ """
672
+ Quickly create a prompt template using the specified variables, for use
673
+ within Threads.
674
+
675
+ Args:
676
+ user_name (str): The name of the user.
677
+ bot_name (str): The name of the bot.
678
+ title (Optional[str], optional): The title of the chat.
679
+ Defaults to None.
680
+ description (Optional[str], optional): The description of the chat.
681
+ Defaults to None.
682
+ tags (Optional[List[str]], optional): A list of tags for the chat.
683
+ Defaults to None.
684
+
685
+ Returns:
686
+ AdvancedFormat: The AdvancedFormat object.
687
+ """
688
+ assert_type(user_name, str, 'user_name', 'AdvancedChatMarkupFormat')
689
+ assert_type(bot_name, str, 'bot_name', 'AdvancedChatMarkupFormat')
690
+ assert_type(title, (str, NoneType), 'title', 'AdvancedChatMarkupFormat')
691
+ assert_type(description, (str, NoneType), 'description',
692
+ 'AdvancedChatMarkupFormat')
693
+ assert_type(tags, (list, NoneType), 'tags', 'AdvancedChatMarkupFormat')
694
+
695
+ _t = " " # indentation string
696
+
697
+ def _user_prefix() -> str:
698
+ return (f'{_t*2}<message sender="{user_name}" '
699
+ f'timestamp="{short_time_str()}">\n{_t*3}<text>')
700
+
701
+ def _bot_prefix() -> str:
702
+ return (f'{_t*2}<message sender="{bot_name}" '
703
+ f'timestamp="{short_time_str()}">\n{_t*3}<text>')
704
+
705
+ def _msg_suffix() -> str:
706
+ return f"</text>\n{_t*2}</message>\n"
707
+
708
+ if tags is not None:
709
+ xml_tags = [f'{_t*2}<tags>']
710
+ for tag in tags:
711
+ xml_tags.append(f'{_t*3}<tag>{tag}</tag>')
712
+ xml_tags.append(f'{_t*2}</tags>')
713
+ final_tags_string = '\n'.join(xml_tags)
714
+ else:
715
+ final_tags_string = f"{_t*2}<tags>\n{_t*2}</tags>"
716
+
717
+ return AdvancedFormat(
718
+ {
719
+ "system_prefix": "",
720
+ "system_prompt": (
721
+ f"<chat>\n"
722
+ f"{_t}<meta>\n"
723
+ f"{_t*2}<title>"
724
+ f"{title if title is not None else 'Untitled Chat'}"
725
+ f"</title>\n"
726
+ f"{_t*2}<description>"
727
+ f"{description if description is not None else 'No description provided'}"
728
+ f"</description>\n"
729
+ f"{final_tags_string}\n"
730
+ f"{_t*2}<participants>\n"
731
+ f"{_t*3}<participant name=\"{user_name}\"/>\n"
732
+ f"{_t*3}<participant name=\"{bot_name}\"/>\n"
733
+ f"{_t*2}</participants>\n"
734
+ f"{_t*2}<datetime>{get_time_str()}</datetime>\n"
735
+ f"{_t}</meta>\n"
736
+ f"{_t}<messages>"
737
+ ),
738
+ "system_suffix": "\n",
739
+ "user_prefix": _user_prefix,
740
+ "user_suffix": _msg_suffix,
741
+ "bot_prefix": _bot_prefix,
742
+ "bot_suffix": _msg_suffix,
743
+ "stops": ["</", "</text>", "</message>"]
744
+ }
745
+ )