webscout 8.1__py3-none-any.whl → 8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (51) hide show
  1. inferno/__init__.py +6 -0
  2. inferno/__main__.py +9 -0
  3. inferno/cli.py +6 -0
  4. webscout/Local/__init__.py +6 -0
  5. webscout/Local/__main__.py +9 -0
  6. webscout/Local/api.py +576 -0
  7. webscout/Local/cli.py +338 -0
  8. webscout/Local/config.py +75 -0
  9. webscout/Local/llm.py +188 -0
  10. webscout/Local/model_manager.py +205 -0
  11. webscout/Local/server.py +187 -0
  12. webscout/Local/utils.py +93 -0
  13. webscout/Provider/AISEARCH/Perplexity.py +359 -0
  14. webscout/Provider/AISEARCH/__init__.py +2 -1
  15. webscout/Provider/AISEARCH/scira_search.py +8 -4
  16. webscout/Provider/ExaChat.py +18 -8
  17. webscout/Provider/GithubChat.py +5 -1
  18. webscout/Provider/Glider.py +4 -2
  19. webscout/Provider/OPENAI/__init__.py +8 -1
  20. webscout/Provider/OPENAI/chatgpt.py +549 -0
  21. webscout/Provider/OPENAI/exachat.py +20 -8
  22. webscout/Provider/OPENAI/glider.py +3 -1
  23. webscout/Provider/OPENAI/llmchatco.py +3 -1
  24. webscout/Provider/OPENAI/opkfc.py +488 -0
  25. webscout/Provider/OPENAI/scirachat.py +11 -7
  26. webscout/Provider/OPENAI/standardinput.py +425 -0
  27. webscout/Provider/OPENAI/textpollinations.py +285 -0
  28. webscout/Provider/OPENAI/toolbaz.py +405 -0
  29. webscout/Provider/OPENAI/uncovrAI.py +455 -0
  30. webscout/Provider/OPENAI/writecream.py +158 -0
  31. webscout/Provider/StandardInput.py +278 -0
  32. webscout/Provider/TextPollinationsAI.py +27 -28
  33. webscout/Provider/Writecream.py +211 -0
  34. webscout/Provider/WritingMate.py +197 -0
  35. webscout/Provider/Youchat.py +30 -26
  36. webscout/Provider/__init__.py +10 -2
  37. webscout/Provider/koala.py +2 -2
  38. webscout/Provider/llmchatco.py +5 -0
  39. webscout/Provider/scira_chat.py +5 -2
  40. webscout/Provider/scnet.py +187 -0
  41. webscout/Provider/toolbaz.py +320 -0
  42. webscout/Provider/uncovr.py +3 -3
  43. webscout/conversation.py +32 -32
  44. webscout/version.py +1 -1
  45. {webscout-8.1.dist-info → webscout-8.2.dist-info}/METADATA +54 -3
  46. {webscout-8.1.dist-info → webscout-8.2.dist-info}/RECORD +50 -25
  47. webscout-8.2.dist-info/entry_points.txt +5 -0
  48. {webscout-8.1.dist-info → webscout-8.2.dist-info}/top_level.txt +1 -0
  49. webscout-8.1.dist-info/entry_points.txt +0 -3
  50. {webscout-8.1.dist-info → webscout-8.2.dist-info}/LICENSE.md +0 -0
  51. {webscout-8.1.dist-info → webscout-8.2.dist-info}/WHEEL +0 -0
webscout/Local/cli.py ADDED
@@ -0,0 +1,338 @@
1
+ """
2
+ Command-line interface for webscout.Local
3
+ """
4
+
5
+ import typer
6
+ from rich.console import Console
7
+ from rich.table import Table
8
+ from rich.prompt import Prompt
9
+ from typing import Optional
10
+
11
+ from .model_manager import ModelManager
12
+ from .llm import LLMInterface
13
+ from .server import start_server
14
+
15
+ app: typer.Typer = typer.Typer(help="webscout.Local - A llama-cpp-python based LLM serving tool")
16
+ console: Console = Console()
17
+
18
+ model_manager: ModelManager = ModelManager()
19
+
20
+ @app.command("serve")
21
+ def run_model(
22
+ model_string: str = typer.Argument(..., help="Model to run (format: 'name', 'repo_id' or 'repo_id:filename')"),
23
+ host: Optional[str] = typer.Option(None, help="Host to bind the server to"),
24
+ port: Optional[int] = typer.Option(None, help="Port to bind the server to"),
25
+ ) -> None:
26
+ """
27
+ Start a model server (downloads if needed).
28
+ """
29
+ # First check if this is a filename that already exists
30
+ model_path = model_manager.get_model_path(model_string)
31
+ if model_path:
32
+ # This is a filename that exists, find the model name
33
+ for model_info in model_manager.list_models():
34
+ if model_info.get("filename") == model_string or model_info.get("path") == model_path:
35
+ model_name = model_info.get("name")
36
+ break
37
+ else:
38
+ # Fallback to using the string as model name
39
+ model_name = model_string
40
+ else:
41
+ # Parse the model string to see if it's a repo_id:filename format
42
+ repo_id, _ = model_manager.parse_model_string(model_string)
43
+ model_name = repo_id.split("/")[-1] if "/" in repo_id else repo_id
44
+
45
+ # Check if model exists, if not try to download it
46
+ if not model_manager.get_model_path(model_name):
47
+ console.print(f"[yellow]Model {model_name} not found locally. Attempting to download...[/yellow]")
48
+ try:
49
+ # We don't need to use the parsed values directly as download_model handles this
50
+ _ = model_manager.parse_model_string(model_string) # Just to validate the format
51
+ # Download the model
52
+ model_name, _ = model_manager.download_model(model_string)
53
+ console.print(f"[bold green]Model {model_name} downloaded successfully[/bold green]")
54
+ except Exception as e:
55
+ console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
56
+ return
57
+
58
+ # Try to load the model to verify it works
59
+ try:
60
+ llm = LLMInterface(model_name)
61
+ llm.load_model(verbose=False)
62
+ console.print(f"[bold green]Model {model_name} loaded successfully[/bold green]")
63
+ except Exception as e:
64
+ console.print(f"[bold red]Error loading model: {str(e)}[/bold red]")
65
+ return
66
+
67
+ # Start the server
68
+ console.print(f"[bold blue]Starting webscout.Local server with model {model_name}...[/bold blue]")
69
+ start_server(host=host, port=port)
70
+
71
+ @app.command("pull")
72
+ def pull_model(
73
+ model_string: str = typer.Argument(..., help="Model to download (format: 'repo_id' or 'repo_id:filename')"),
74
+ ) -> None:
75
+ """
76
+ Download a model from Hugging Face without running it.
77
+ """
78
+ try:
79
+ model_name, model_path = model_manager.download_model(model_string)
80
+ console.print(f"[bold green]Model {model_name} downloaded successfully to {model_path}[/bold green]")
81
+ except Exception as e:
82
+ console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
83
+
84
+ @app.command("list")
85
+ def list_models() -> None:
86
+ """
87
+ List downloaded models.
88
+ """
89
+ models = model_manager.list_models()
90
+
91
+ if not models:
92
+ console.print("[yellow]No models found. Use 'webscout.Local pull' to download a model.[/yellow]")
93
+ return
94
+
95
+ table = Table(title="Downloaded Models")
96
+ table.add_column("Name", style="cyan")
97
+ table.add_column("Repository", style="green")
98
+ table.add_column("Filename", style="blue")
99
+
100
+ for model in models:
101
+ table.add_row(
102
+ model["name"],
103
+ model.get("repo_id", "Unknown"),
104
+ model.get("filename", "Unknown"),
105
+ )
106
+
107
+ console.print(table)
108
+
109
+ @app.command(name="remove", help="Remove a downloaded model")
110
+ def remove_model(
111
+ model_string: str = typer.Argument(..., help="Name or filename of the model to remove"),
112
+ force: bool = typer.Option(False, "--force", "-f", help="Force removal without confirmation"),
113
+ ) -> None:
114
+ """
115
+ Remove a downloaded model.
116
+ """
117
+ # First check if this is a model name
118
+ model_info = model_manager.get_model_info(model_string)
119
+
120
+ # If not found by name, check if it's a filename
121
+ if not model_info:
122
+ for info in model_manager.list_models():
123
+ if info.get("filename") == model_string:
124
+ model_info = info
125
+ model_string = info["name"]
126
+ break
127
+
128
+ if not model_info:
129
+ console.print(f"[yellow]Model {model_string} not found.[/yellow]")
130
+ return
131
+
132
+ if not force:
133
+ confirm = Prompt.ask(
134
+ f"Are you sure you want to remove model {model_string}?",
135
+ choices=["y", "n"],
136
+ default="n",
137
+ )
138
+
139
+ if confirm.lower() != "y":
140
+ console.print("[yellow]Operation cancelled.[/yellow]")
141
+ return
142
+
143
+ if model_manager.remove_model(model_string):
144
+ console.print(f"[bold green]Model {model_string} removed successfully[/bold green]")
145
+ else:
146
+ console.print(f"[bold red]Error removing model {model_string}[/bold red]")
147
+
148
+ @app.command("run")
149
+ def chat(
150
+ model_string: str = typer.Argument(..., help="Name or filename of the model to chat with"),
151
+ ) -> None:
152
+ """
153
+ Interactive chat with a model.
154
+ """
155
+ # First check if this is a filename that already exists
156
+ model_path = model_manager.get_model_path(model_string)
157
+ if model_path:
158
+ # This is a filename that exists, find the model name
159
+ for model_info in model_manager.list_models():
160
+ if model_info.get("filename") == model_string or model_info.get("path") == model_path:
161
+ model_name = model_info.get("name")
162
+ break
163
+ else:
164
+ # Fallback to using the string as model name
165
+ model_name = model_string
166
+ else:
167
+ # Use the string as model name
168
+ model_name = model_string
169
+
170
+ # Check if model exists, if not try to download it
171
+ if not model_manager.get_model_path(model_name):
172
+ console.print(f"[yellow]Model {model_name} not found locally. Attempting to download...[/yellow]")
173
+ try:
174
+ # Parse the model string to see if it's a repo_id:filename format
175
+ # We don't need to use the parsed values directly as download_model handles this
176
+ _ = model_manager.parse_model_string(model_string) # Just to validate the format
177
+ # Download the model
178
+ model_name, _ = model_manager.download_model(model_string)
179
+ console.print(f"[bold green]Model {model_name} downloaded successfully[/bold green]")
180
+ except Exception as e:
181
+ console.print(f"[bold red]Error downloading model: {str(e)}[/bold red]")
182
+ return
183
+
184
+ # Load the model
185
+ try:
186
+ llm = LLMInterface(model_name)
187
+ llm.load_model(verbose=False)
188
+ except Exception as e:
189
+ console.print(f"[bold red]Error loading model: {str(e)}[/bold red]")
190
+ return
191
+
192
+ console.print(f"[bold green]Chat with {model_name}. Type '/help' for available commands or '/bye' to exit.[bold green]")
193
+
194
+ # Chat history
195
+ messages = []
196
+ system_prompt = None
197
+
198
+ # Initialize with empty system prompt
199
+ messages.append({"role": "system", "content": ""})
200
+
201
+ # Define help text
202
+ help_text = """
203
+ Available commands:
204
+ /help or /? - Show this help message
205
+ /bye - Exit the chat
206
+ /set system <prompt> - Set the system prompt
207
+ /set context <size> - Set context window size (default: 4096)
208
+ /clear or /cls - Clear the terminal screen
209
+ /reset - Reset all settings
210
+ """
211
+
212
+ while True:
213
+ # Get user input
214
+ user_input = input("\n> ")
215
+
216
+ # Handle commands
217
+ if user_input.startswith("/"):
218
+ cmd_parts = user_input.split(maxsplit=2)
219
+ cmd = cmd_parts[0].lower()
220
+
221
+ if cmd == "/bye" or user_input.lower() in ["exit", "quit"]:
222
+ console.print("[yellow]Goodbye![/yellow]")
223
+ break
224
+
225
+ elif cmd == "/help" or cmd == "/?":
226
+ console.print(help_text)
227
+ continue
228
+
229
+ elif cmd == "/clear" or cmd == "/cls":
230
+ # Do not clear history, just clear the terminal screen
231
+ import os
232
+ os.system('cls' if os.name == 'nt' else 'clear')
233
+ console.print(f"[bold green]Chat with {model_name}. Type '/help' for available commands or '/bye' to exit.[/bold green]")
234
+ console.print("[yellow]Screen cleared. Chat history preserved.[/yellow]")
235
+ continue
236
+
237
+ elif cmd == "/reset":
238
+ messages = [{"role": "system", "content": ""}]
239
+ system_prompt = None
240
+ console.print("[yellow]All settings reset.[/yellow]")
241
+ continue
242
+
243
+ elif cmd == "/set" and len(cmd_parts) >= 2:
244
+ if len(cmd_parts) < 3:
245
+ console.print("[red]Error: Missing value for setting[/red]")
246
+ continue
247
+
248
+ setting = cmd_parts[1].lower()
249
+ value = cmd_parts[2]
250
+
251
+ if setting == "system":
252
+ # Remove quotes if present
253
+ if value.startswith('"') and value.endswith('"'):
254
+ value = value[1:-1]
255
+
256
+ system_prompt = value
257
+ # Update system message
258
+ if messages and messages[0].get("role") == "system":
259
+ messages[0]["content"] = system_prompt
260
+ else:
261
+ # Clear messages and add system prompt
262
+ messages = [{"role": "system", "content": system_prompt}]
263
+
264
+ # Print confirmation that it's been applied
265
+ console.print(f"[yellow]System prompt set to:[/yellow]")
266
+ console.print(f"[cyan]\"{system_prompt}\"[/cyan]")
267
+ console.print(f"[green]System prompt applied. Next responses will follow this instruction.[/green]")
268
+
269
+ # Force a test message to ensure the system prompt is applied
270
+ test_messages = messages.copy()
271
+ test_messages.append({"role": "user", "content": "Say 'System prompt active'."})
272
+
273
+ # Test if the system prompt is working
274
+ console.print("[dim]Testing system prompt...[/dim]")
275
+ response = llm.create_chat_completion(
276
+ messages=test_messages,
277
+ stream=False,
278
+ max_tokens=20
279
+ )
280
+ console.print("[dim]System prompt test complete.[/dim]")
281
+ elif setting == "context":
282
+ try:
283
+ context_size = int(value)
284
+ # Reload the model with new context size
285
+ console.print(f"[yellow]Reloading model with context size: {context_size}...[/yellow]")
286
+ llm.load_model(n_ctx=context_size, verbose=False)
287
+ console.print(f"[green]Context size set to: {context_size}[/green]")
288
+ except ValueError:
289
+ console.print(f"[red]Invalid context size: {value}. Must be an integer.[/red]")
290
+ else:
291
+ console.print(f"[red]Unknown setting: {setting}[/red]")
292
+ continue
293
+ else:
294
+ console.print(f"[red]Unknown command: {cmd}[/red]")
295
+ continue
296
+
297
+ # Add user message to history
298
+ messages.append({"role": "user", "content": user_input})
299
+
300
+ # Generate response
301
+ console.print("\n") # Add extra spacing between user input and response
302
+
303
+ # Use a buffer to collect the response
304
+ response_buffer = ""
305
+
306
+ def print_token(token):
307
+ nonlocal response_buffer
308
+ response_buffer += token
309
+ console.print(token, end="", highlight=False)
310
+
311
+ llm.stream_chat_completion(
312
+ messages=messages,
313
+ callback=print_token,
314
+ )
315
+
316
+ # Get the full response to add to history
317
+ response = llm.create_chat_completion(
318
+ messages=messages,
319
+ stream=False,
320
+ )
321
+
322
+ assistant_message = response["choices"][0]["message"]["content"]
323
+ messages.append({"role": "assistant", "content": assistant_message})
324
+
325
+ # Add extra spacing after the response
326
+ console.print("")
327
+
328
+ @app.command("version")
329
+ def version() -> None:
330
+ """
331
+ Show version information.
332
+ """
333
+ from webscout.Local import __version__
334
+ console.print(f"[bold]webscout.Local[/bold] version [cyan]{__version__}[/cyan]")
335
+ console.print("A llama-cpp-python based LLM serving tool")
336
+
337
+ if __name__ == "__main__":
338
+ app()
@@ -0,0 +1,75 @@
1
+ """
2
+ Configuration management for webscout
3
+ """
4
+
5
+ import os
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Dict, Any, Optional, List
9
+
10
+ # Default configuration
11
+ default_config: Dict[str, Any] = {
12
+ "models_dir": "~/.webscout/models",
13
+ "api_host": "127.0.0.1",
14
+ "api_port": 8000,
15
+ "default_context_length": 4096,
16
+ "default_gpu_layers": -1, # -1 means use all available GPU layers
17
+ }
18
+
19
+ class Config:
20
+ """
21
+ Configuration manager for webscout.
22
+ Handles loading, saving, and accessing configuration values.
23
+ """
24
+ config_dir: Path
25
+ config_file: Path
26
+ models_dir: Path
27
+ config: Dict[str, Any]
28
+
29
+ def __init__(self) -> None:
30
+ self.config_dir = Path(os.path.expanduser("~/.webscout"))
31
+ self.config_file = self.config_dir / "config.json"
32
+ self.models_dir = Path(os.path.expanduser(default_config["models_dir"]))
33
+ self._ensure_dirs()
34
+ self._load_config()
35
+
36
+ def _ensure_dirs(self) -> None:
37
+ """Ensure configuration and models directories exist."""
38
+ self.config_dir.mkdir(exist_ok=True, parents=True)
39
+ self.models_dir.mkdir(exist_ok=True, parents=True)
40
+
41
+ def _load_config(self) -> None:
42
+ """Load configuration from file or create default."""
43
+ if not self.config_file.exists():
44
+ self._save_config(default_config)
45
+ self.config = default_config.copy()
46
+ else:
47
+ with open(self.config_file, "r") as f:
48
+ self.config = json.load(f)
49
+
50
+ def _save_config(self, config: Dict[str, Any]) -> None:
51
+ """Save configuration to file."""
52
+ with open(self.config_file, "w") as f:
53
+ json.dump(config, f, indent=2)
54
+
55
+ def get(self, key: str, default: Any = None) -> Any:
56
+ """Get configuration value by key."""
57
+ return self.config.get(key, default)
58
+
59
+ def set(self, key: str, value: Any) -> None:
60
+ """Set configuration value by key."""
61
+ self.config[key] = value
62
+ self._save_config(self.config)
63
+
64
+ def get_model_path(self, model_name: str) -> Path:
65
+ """Get the path to a model directory by model name."""
66
+ return self.models_dir / model_name
67
+
68
+ def list_models(self) -> List[str]:
69
+ """List all downloaded model names."""
70
+ if not self.models_dir.exists():
71
+ return []
72
+ return [d.name for d in self.models_dir.iterdir() if d.is_dir() and ":" not in d.name]
73
+
74
+ # Global configuration instance
75
+ config: Config = Config()
webscout/Local/llm.py ADDED
@@ -0,0 +1,188 @@
1
+ """
2
+ LLM interface for webscout.Local using llama-cpp-python
3
+ """
4
+
5
+ from typing import Dict, Any, List, Optional, Union, Generator, Callable
6
+
7
+ from llama_cpp import Llama
8
+ from rich.console import Console
9
+
10
+ from .config import config
11
+ from .model_manager import ModelManager
12
+
13
+ console = Console()
14
+
15
+ class LLMInterface:
16
+ """
17
+ Interface for LLM models using llama-cpp-python.
18
+ Provides methods for loading models and generating completions or chat responses.
19
+ """
20
+ model_name: str
21
+ model_manager: ModelManager
22
+ model_path: Optional[str]
23
+ llm: Optional[Llama]
24
+
25
+ def __init__(self, model_name: str) -> None:
26
+ """
27
+ Initialize the LLM interface.
28
+ Args:
29
+ model_name (str): Name of the model to load.
30
+ Raises:
31
+ ValueError: If the model is not found locally.
32
+ """
33
+ self.model_name = model_name
34
+ self.model_manager = ModelManager()
35
+ self.model_path = self.model_manager.get_model_path(model_name)
36
+ if not self.model_path:
37
+ raise ValueError(f"Model {model_name} not found. Please download it first.")
38
+ self.llm = None
39
+
40
+ def load_model(self, n_gpu_layers: Optional[int] = None, n_ctx: Optional[int] = None, verbose: bool = False) -> None:
41
+ """
42
+ Load the model into memory.
43
+ Args:
44
+ n_gpu_layers (Optional[int]): Number of layers to offload to GPU (-1 for all).
45
+ n_ctx (Optional[int]): Context size.
46
+ verbose (bool): Whether to show verbose output.
47
+ Raises:
48
+ ValueError: If model loading fails.
49
+ """
50
+ if n_gpu_layers is None:
51
+ n_gpu_layers = config.get("default_gpu_layers", -1)
52
+ if n_ctx is None:
53
+ n_ctx = config.get("default_context_length", 4096)
54
+ console.print(f"[bold blue]Loading model {self.model_name}...[/bold blue]")
55
+ try:
56
+ self.llm = Llama(
57
+ model_path=self.model_path,
58
+ n_gpu_layers=n_gpu_layers,
59
+ n_ctx=n_ctx,
60
+ verbose=verbose
61
+ )
62
+ console.print(f"[bold green]Model {self.model_name} loaded successfully[/bold green]")
63
+ except Exception as e:
64
+ raise ValueError(f"Failed to load model from file: {self.model_path}\n{str(e)}")
65
+
66
+ def create_completion(
67
+ self,
68
+ prompt: str,
69
+ max_tokens: int = 256,
70
+ temperature: float = 0.7,
71
+ top_p: float = 0.95,
72
+ stream: bool = False,
73
+ stop: Optional[List[str]] = None,
74
+ ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
75
+ """
76
+ Create a completion for the given prompt.
77
+ Args:
78
+ prompt (str): The prompt to complete.
79
+ max_tokens (int): Maximum number of tokens to generate.
80
+ temperature (float): Sampling temperature.
81
+ top_p (float): Top-p sampling.
82
+ stream (bool): Whether to stream the response.
83
+ stop (Optional[List[str]]): List of strings to stop generation when encountered.
84
+ Returns:
85
+ Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]: Completion result or generator for streaming.
86
+ """
87
+ if self.llm is None:
88
+ self.load_model()
89
+ if stream:
90
+ return self.llm.create_completion(
91
+ prompt=prompt,
92
+ max_tokens=max_tokens,
93
+ temperature=temperature,
94
+ top_p=top_p,
95
+ stream=True,
96
+ stop=stop or [],
97
+ )
98
+ else:
99
+ return self.llm.create_completion(
100
+ prompt=prompt,
101
+ max_tokens=max_tokens,
102
+ temperature=temperature,
103
+ top_p=top_p,
104
+ stream=False,
105
+ stop=stop or [],
106
+ )
107
+
108
+ def create_chat_completion(
109
+ self,
110
+ messages: List[Dict[str, str]],
111
+ max_tokens: int = 256,
112
+ temperature: float = 0.7,
113
+ top_p: float = 0.95,
114
+ stream: bool = False,
115
+ stop: Optional[List[str]] = None,
116
+ ) -> Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]:
117
+ """
118
+ Create a chat completion for the given messages.
119
+ Args:
120
+ messages (List[Dict[str, str]]): List of chat messages.
121
+ max_tokens (int): Maximum number of tokens to generate.
122
+ temperature (float): Sampling temperature.
123
+ top_p (float): Top-p sampling.
124
+ stream (bool): Whether to stream the response.
125
+ stop (Optional[List[str]]): List of strings to stop generation when encountered.
126
+ Returns:
127
+ Union[Dict[str, Any], Generator[Dict[str, Any], None, None]]: Chat completion result or generator for streaming.
128
+ """
129
+ if self.llm is None:
130
+ self.load_model()
131
+ processed_messages: List[Dict[str, str]] = messages.copy()
132
+ system_messages = [m for m in processed_messages if m.get("role") == "system"]
133
+ non_system_messages = [m for m in processed_messages if m.get("role") != "system"]
134
+ if system_messages:
135
+ processed_messages = [system_messages[0]] + non_system_messages
136
+ else:
137
+ processed_messages = non_system_messages
138
+ if stream:
139
+ return self.llm.create_chat_completion(
140
+ messages=processed_messages,
141
+ max_tokens=max_tokens,
142
+ temperature=temperature,
143
+ top_p=top_p,
144
+ stream=True,
145
+ stop=stop or [],
146
+ )
147
+ else:
148
+ return self.llm.create_chat_completion(
149
+ messages=processed_messages,
150
+ max_tokens=max_tokens,
151
+ temperature=temperature,
152
+ top_p=top_p,
153
+ stream=False,
154
+ stop=stop or [],
155
+ )
156
+
157
+ def stream_chat_completion(
158
+ self,
159
+ messages: List[Dict[str, str]],
160
+ callback: Callable[[str], None],
161
+ max_tokens: int = 256,
162
+ temperature: float = 0.7,
163
+ top_p: float = 0.95,
164
+ stop: Optional[List[str]] = None,
165
+ ) -> None:
166
+ """
167
+ Stream a chat completion with a callback for each token.
168
+ Args:
169
+ messages (List[Dict[str, str]]): List of chat messages.
170
+ callback (Callable[[str], None]): Function to call with each token.
171
+ max_tokens (int): Maximum number of tokens to generate.
172
+ temperature (float): Sampling temperature.
173
+ top_p (float): Top-p sampling.
174
+ stop (Optional[List[str]]): List of strings to stop generation when encountered.
175
+ """
176
+ stream = self.create_chat_completion(
177
+ messages=messages,
178
+ max_tokens=max_tokens,
179
+ temperature=temperature,
180
+ top_p=top_p,
181
+ stream=True,
182
+ stop=stop,
183
+ )
184
+ for chunk in stream:
185
+ if "choices" in chunk and len(chunk["choices"]) > 0:
186
+ if "delta" in chunk["choices"][0] and "content" in chunk["choices"][0]["delta"]:
187
+ content = chunk["choices"][0]["delta"]["content"]
188
+ callback(content)