stratifyai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +5 -0
- cli/stratifyai_cli.py +1753 -0
- stratifyai/__init__.py +113 -0
- stratifyai/api_key_helper.py +372 -0
- stratifyai/caching.py +279 -0
- stratifyai/chat/__init__.py +54 -0
- stratifyai/chat/builder.py +366 -0
- stratifyai/chat/stratifyai_anthropic.py +194 -0
- stratifyai/chat/stratifyai_bedrock.py +200 -0
- stratifyai/chat/stratifyai_deepseek.py +194 -0
- stratifyai/chat/stratifyai_google.py +194 -0
- stratifyai/chat/stratifyai_grok.py +194 -0
- stratifyai/chat/stratifyai_groq.py +195 -0
- stratifyai/chat/stratifyai_ollama.py +201 -0
- stratifyai/chat/stratifyai_openai.py +209 -0
- stratifyai/chat/stratifyai_openrouter.py +201 -0
- stratifyai/chunking.py +158 -0
- stratifyai/client.py +292 -0
- stratifyai/config.py +1273 -0
- stratifyai/cost_tracker.py +257 -0
- stratifyai/embeddings.py +245 -0
- stratifyai/exceptions.py +91 -0
- stratifyai/models.py +59 -0
- stratifyai/providers/__init__.py +5 -0
- stratifyai/providers/anthropic.py +330 -0
- stratifyai/providers/base.py +183 -0
- stratifyai/providers/bedrock.py +634 -0
- stratifyai/providers/deepseek.py +39 -0
- stratifyai/providers/google.py +39 -0
- stratifyai/providers/grok.py +39 -0
- stratifyai/providers/groq.py +39 -0
- stratifyai/providers/ollama.py +43 -0
- stratifyai/providers/openai.py +344 -0
- stratifyai/providers/openai_compatible.py +372 -0
- stratifyai/providers/openrouter.py +39 -0
- stratifyai/py.typed +2 -0
- stratifyai/rag.py +381 -0
- stratifyai/retry.py +185 -0
- stratifyai/router.py +643 -0
- stratifyai/summarization.py +179 -0
- stratifyai/utils/__init__.py +11 -0
- stratifyai/utils/bedrock_validator.py +136 -0
- stratifyai/utils/code_extractor.py +327 -0
- stratifyai/utils/csv_extractor.py +197 -0
- stratifyai/utils/file_analyzer.py +192 -0
- stratifyai/utils/json_extractor.py +219 -0
- stratifyai/utils/log_extractor.py +267 -0
- stratifyai/utils/model_selector.py +324 -0
- stratifyai/utils/provider_validator.py +442 -0
- stratifyai/utils/token_counter.py +186 -0
- stratifyai/vectordb.py +344 -0
- stratifyai-0.1.0.dist-info/METADATA +263 -0
- stratifyai-0.1.0.dist-info/RECORD +57 -0
- stratifyai-0.1.0.dist-info/WHEEL +5 -0
- stratifyai-0.1.0.dist-info/entry_points.txt +2 -0
- stratifyai-0.1.0.dist-info/licenses/LICENSE +21 -0
- stratifyai-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Ollama chat interface for StratifyAI.
|
|
2
|
+
|
|
3
|
+
Provides convenient functions for Ollama local model chat completions.
|
|
4
|
+
Ollama runs models locally - no API key required.
|
|
5
|
+
Model must be specified for each request.
|
|
6
|
+
|
|
7
|
+
Requires: Ollama running locally (default: http://localhost:11434)
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# Model is always required
|
|
11
|
+
from stratifyai.chat import ollama
|
|
12
|
+
response = await ollama.chat("Hello!", model="llama3.2")
|
|
13
|
+
|
|
14
|
+
# Builder pattern (model required)
|
|
15
|
+
client = (
|
|
16
|
+
ollama
|
|
17
|
+
.with_model("mistral")
|
|
18
|
+
.with_system("You are a helpful assistant")
|
|
19
|
+
.with_developer("Use markdown")
|
|
20
|
+
)
|
|
21
|
+
response = await client.chat("Hello!")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
from typing import AsyncIterator, Optional, Union
|
|
26
|
+
|
|
27
|
+
from stratifyai import LLMClient
|
|
28
|
+
from stratifyai.models import ChatResponse, Message
|
|
29
|
+
from stratifyai.chat.builder import ChatBuilder, create_module_builder
|
|
30
|
+
|
|
31
|
+
# Default configuration (no default model - must be specified)
|
|
32
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
33
|
+
DEFAULT_MAX_TOKENS = None
|
|
34
|
+
|
|
35
|
+
# Module-level client (lazy initialization)
|
|
36
|
+
_client: Optional[LLMClient] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_client() -> LLMClient:
|
|
40
|
+
"""Get or create the module-level client."""
|
|
41
|
+
global _client
|
|
42
|
+
if _client is None:
|
|
43
|
+
_client = LLMClient(provider="ollama")
|
|
44
|
+
return _client
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Module-level builder for chaining
|
|
48
|
+
_builder = create_module_builder(
|
|
49
|
+
provider="ollama",
|
|
50
|
+
default_temperature=DEFAULT_TEMPERATURE,
|
|
51
|
+
default_max_tokens=DEFAULT_MAX_TOKENS,
|
|
52
|
+
client_factory=_get_client,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Builder pattern methods (delegate to _builder)
|
|
57
|
+
def with_model(model: str) -> ChatBuilder:
|
|
58
|
+
"""Set the model to use. Returns a new ChatBuilder for chaining."""
|
|
59
|
+
return _builder.with_model(model)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def with_system(prompt: str) -> ChatBuilder:
|
|
63
|
+
"""Set the system prompt. Returns a new ChatBuilder for chaining."""
|
|
64
|
+
return _builder.with_system(prompt)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def with_developer(instructions: str) -> ChatBuilder:
|
|
68
|
+
"""Set developer instructions. Returns a new ChatBuilder for chaining."""
|
|
69
|
+
return _builder.with_developer(instructions)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def with_temperature(temperature: float) -> ChatBuilder:
|
|
73
|
+
"""Set the temperature. Returns a new ChatBuilder for chaining."""
|
|
74
|
+
return _builder.with_temperature(temperature)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def with_max_tokens(max_tokens: int) -> ChatBuilder:
|
|
78
|
+
"""Set max tokens. Returns a new ChatBuilder for chaining."""
|
|
79
|
+
return _builder.with_max_tokens(max_tokens)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def with_options(**kwargs) -> ChatBuilder:
|
|
83
|
+
"""Set additional options. Returns a new ChatBuilder for chaining."""
|
|
84
|
+
return _builder.with_options(**kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def chat(
|
|
88
|
+
prompt: Union[str, list[Message]],
|
|
89
|
+
*,
|
|
90
|
+
model: str,
|
|
91
|
+
system: Optional[str] = None,
|
|
92
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
93
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
94
|
+
stream: bool = False,
|
|
95
|
+
**kwargs,
|
|
96
|
+
) -> Union[ChatResponse, AsyncIterator[ChatResponse]]:
|
|
97
|
+
"""
|
|
98
|
+
Send a chat completion request to Ollama (local).
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
prompt: User message string or list of Message objects.
|
|
102
|
+
model: Model name (required). E.g., "llama3.2", "mistral", "codellama"
|
|
103
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
104
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
105
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
106
|
+
stream: Whether to stream the response. Default: False
|
|
107
|
+
**kwargs: Additional parameters passed to the API.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
ChatResponse object, or AsyncIterator[ChatResponse] if streaming.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ProviderAPIError: If Ollama is not running or model not found.
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
>>> from stratifyai.chat import ollama
|
|
117
|
+
>>> response = await ollama.chat("What is Python?", model="llama3.2")
|
|
118
|
+
>>> print(response.content)
|
|
119
|
+
|
|
120
|
+
# Use a different model (must be pulled first)
|
|
121
|
+
>>> response = ollama.chat("Explain AI", model="mistral")
|
|
122
|
+
"""
|
|
123
|
+
client = _get_client()
|
|
124
|
+
|
|
125
|
+
# Build messages list
|
|
126
|
+
if isinstance(prompt, str):
|
|
127
|
+
messages = []
|
|
128
|
+
if system:
|
|
129
|
+
messages.append(Message(role="system", content=system))
|
|
130
|
+
messages.append(Message(role="user", content=prompt))
|
|
131
|
+
else:
|
|
132
|
+
messages = prompt
|
|
133
|
+
|
|
134
|
+
return await client.chat(
|
|
135
|
+
model=model,
|
|
136
|
+
messages=messages,
|
|
137
|
+
temperature=temperature,
|
|
138
|
+
max_tokens=max_tokens,
|
|
139
|
+
stream=stream,
|
|
140
|
+
**kwargs,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
async def chat_stream(
|
|
145
|
+
prompt: Union[str, list[Message]],
|
|
146
|
+
*,
|
|
147
|
+
model: str,
|
|
148
|
+
system: Optional[str] = None,
|
|
149
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
150
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
151
|
+
**kwargs,
|
|
152
|
+
) -> AsyncIterator[ChatResponse]:
|
|
153
|
+
"""
|
|
154
|
+
Send a streaming chat completion request to Ollama (local).
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
prompt: User message string or list of Message objects.
|
|
158
|
+
model: Model name (required). E.g., "llama3.2", "mistral"
|
|
159
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
160
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
161
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
162
|
+
**kwargs: Additional parameters passed to the API.
|
|
163
|
+
|
|
164
|
+
Yields:
|
|
165
|
+
ChatResponse chunks.
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
>>> from stratifyai.chat import ollama
|
|
169
|
+
>>> async for chunk in ollama.chat_stream("Tell me a story", model="llama3.2"):
|
|
170
|
+
... print(chunk.content, end="", flush=True)
|
|
171
|
+
"""
|
|
172
|
+
return await chat(
|
|
173
|
+
prompt,
|
|
174
|
+
model=model,
|
|
175
|
+
system=system,
|
|
176
|
+
temperature=temperature,
|
|
177
|
+
max_tokens=max_tokens,
|
|
178
|
+
stream=True,
|
|
179
|
+
**kwargs,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def chat_sync(
|
|
184
|
+
prompt,
|
|
185
|
+
*,
|
|
186
|
+
model: str,
|
|
187
|
+
system=None,
|
|
188
|
+
temperature=DEFAULT_TEMPERATURE,
|
|
189
|
+
max_tokens=DEFAULT_MAX_TOKENS,
|
|
190
|
+
**kwargs,
|
|
191
|
+
):
|
|
192
|
+
"""Synchronous wrapper for chat()."""
|
|
193
|
+
return asyncio.run(chat(
|
|
194
|
+
prompt,
|
|
195
|
+
model=model,
|
|
196
|
+
system=system,
|
|
197
|
+
temperature=temperature,
|
|
198
|
+
max_tokens=max_tokens,
|
|
199
|
+
stream=False,
|
|
200
|
+
**kwargs,
|
|
201
|
+
))
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""OpenAI chat interface for StratifyAI.
|
|
2
|
+
|
|
3
|
+
Provides convenient functions for OpenAI chat completions.
|
|
4
|
+
Model must be specified for each request.
|
|
5
|
+
|
|
6
|
+
Environment Variable: OPENAI_API_KEY
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
# Model is always required
|
|
10
|
+
from stratifyai.chat import openai
|
|
11
|
+
response = await openai.chat("Hello!", model="gpt-4.1-mini")
|
|
12
|
+
|
|
13
|
+
# Builder pattern (model required)
|
|
14
|
+
client = (
|
|
15
|
+
openai
|
|
16
|
+
.with_model("gpt-4.1")
|
|
17
|
+
.with_system("You are a helpful assistant")
|
|
18
|
+
.with_developer("Use markdown")
|
|
19
|
+
)
|
|
20
|
+
response = await client.chat("Hello!")
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
from typing import AsyncIterator, Optional, Union
|
|
25
|
+
|
|
26
|
+
from stratifyai import LLMClient
|
|
27
|
+
from stratifyai.models import ChatResponse, Message
|
|
28
|
+
from stratifyai.chat.builder import ChatBuilder, create_module_builder
|
|
29
|
+
|
|
30
|
+
# Default configuration (no default model - must be specified)
|
|
31
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
32
|
+
DEFAULT_MAX_TOKENS = None
|
|
33
|
+
|
|
34
|
+
# Module-level client (lazy initialization)
|
|
35
|
+
_client: Optional[LLMClient] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_client() -> LLMClient:
|
|
39
|
+
"""Get or create the module-level client."""
|
|
40
|
+
global _client
|
|
41
|
+
if _client is None:
|
|
42
|
+
_client = LLMClient(provider="openai")
|
|
43
|
+
return _client
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Module-level builder for chaining
|
|
47
|
+
_builder = create_module_builder(
|
|
48
|
+
provider="openai",
|
|
49
|
+
default_temperature=DEFAULT_TEMPERATURE,
|
|
50
|
+
default_max_tokens=DEFAULT_MAX_TOKENS,
|
|
51
|
+
client_factory=_get_client,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Builder pattern methods (delegate to _builder)
|
|
56
|
+
def with_model(model: str) -> ChatBuilder:
|
|
57
|
+
"""Set the model to use. Returns a new ChatBuilder for chaining."""
|
|
58
|
+
return _builder.with_model(model)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def with_system(prompt: str) -> ChatBuilder:
|
|
62
|
+
"""Set the system prompt. Returns a new ChatBuilder for chaining."""
|
|
63
|
+
return _builder.with_system(prompt)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def with_developer(instructions: str) -> ChatBuilder:
|
|
67
|
+
"""Set developer instructions. Returns a new ChatBuilder for chaining."""
|
|
68
|
+
return _builder.with_developer(instructions)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def with_temperature(temperature: float) -> ChatBuilder:
|
|
72
|
+
"""Set the temperature. Returns a new ChatBuilder for chaining."""
|
|
73
|
+
return _builder.with_temperature(temperature)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def with_max_tokens(max_tokens: int) -> ChatBuilder:
|
|
77
|
+
"""Set max tokens. Returns a new ChatBuilder for chaining."""
|
|
78
|
+
return _builder.with_max_tokens(max_tokens)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def with_options(**kwargs) -> ChatBuilder:
|
|
82
|
+
"""Set additional options. Returns a new ChatBuilder for chaining."""
|
|
83
|
+
return _builder.with_options(**kwargs)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def chat(
|
|
87
|
+
prompt: Union[str, list[Message]],
|
|
88
|
+
*,
|
|
89
|
+
model: str,
|
|
90
|
+
system: Optional[str] = None,
|
|
91
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
92
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
93
|
+
stream: bool = False,
|
|
94
|
+
**kwargs,
|
|
95
|
+
) -> Union[ChatResponse, AsyncIterator[ChatResponse]]:
|
|
96
|
+
"""
|
|
97
|
+
Send an async chat completion request to OpenAI.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
prompt: User message string or list of Message objects.
|
|
101
|
+
model: Model name (required). E.g., "gpt-4.1-mini", "gpt-4.1", "gpt-4o"
|
|
102
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
103
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
104
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
105
|
+
stream: Whether to stream the response. Default: False
|
|
106
|
+
**kwargs: Additional parameters passed to the API.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
ChatResponse object, or AsyncIterator[ChatResponse] if streaming.
|
|
110
|
+
|
|
111
|
+
Example:
|
|
112
|
+
>>> from stratifyai.chat import openai
|
|
113
|
+
>>> response = await openai.chat("What is Python?", model="gpt-4.1-mini")
|
|
114
|
+
>>> print(response.content)
|
|
115
|
+
"""
|
|
116
|
+
client = _get_client()
|
|
117
|
+
|
|
118
|
+
# Build messages list
|
|
119
|
+
if isinstance(prompt, str):
|
|
120
|
+
messages = []
|
|
121
|
+
if system:
|
|
122
|
+
messages.append(Message(role="system", content=system))
|
|
123
|
+
messages.append(Message(role="user", content=prompt))
|
|
124
|
+
else:
|
|
125
|
+
messages = prompt
|
|
126
|
+
|
|
127
|
+
return await client.chat(
|
|
128
|
+
model=model,
|
|
129
|
+
messages=messages,
|
|
130
|
+
temperature=temperature,
|
|
131
|
+
max_tokens=max_tokens,
|
|
132
|
+
stream=stream,
|
|
133
|
+
**kwargs,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
async def chat_stream(
|
|
138
|
+
prompt: Union[str, list[Message]],
|
|
139
|
+
*,
|
|
140
|
+
model: str,
|
|
141
|
+
system: Optional[str] = None,
|
|
142
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
143
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
144
|
+
**kwargs,
|
|
145
|
+
) -> AsyncIterator[ChatResponse]:
|
|
146
|
+
"""
|
|
147
|
+
Send an async streaming chat completion request to OpenAI.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
prompt: User message string or list of Message objects.
|
|
151
|
+
model: Model name (required). E.g., "gpt-4.1-mini", "gpt-4.1"
|
|
152
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
153
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
154
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
155
|
+
**kwargs: Additional parameters passed to the API.
|
|
156
|
+
|
|
157
|
+
Yields:
|
|
158
|
+
ChatResponse chunks.
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
>>> from stratifyai.chat import openai
|
|
162
|
+
>>> async for chunk in openai.chat_stream("Tell me a story", model="gpt-4.1-mini"):
|
|
163
|
+
... print(chunk.content, end="", flush=True)
|
|
164
|
+
"""
|
|
165
|
+
return await chat(
|
|
166
|
+
prompt,
|
|
167
|
+
model=model,
|
|
168
|
+
system=system,
|
|
169
|
+
temperature=temperature,
|
|
170
|
+
max_tokens=max_tokens,
|
|
171
|
+
stream=True,
|
|
172
|
+
**kwargs,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def chat_sync(
|
|
177
|
+
prompt: Union[str, list[Message]],
|
|
178
|
+
*,
|
|
179
|
+
model: str,
|
|
180
|
+
system: Optional[str] = None,
|
|
181
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
182
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
183
|
+
**kwargs,
|
|
184
|
+
) -> ChatResponse:
|
|
185
|
+
"""Synchronous wrapper for chat(). Model is required."""
|
|
186
|
+
return asyncio.run(chat(
|
|
187
|
+
prompt,
|
|
188
|
+
model=model,
|
|
189
|
+
system=system,
|
|
190
|
+
temperature=temperature,
|
|
191
|
+
max_tokens=max_tokens,
|
|
192
|
+
stream=False,
|
|
193
|
+
**kwargs,
|
|
194
|
+
))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
if __name__ == "__main__":
|
|
198
|
+
# Demo usage when run directly
|
|
199
|
+
print("OpenAI Chat Module")
|
|
200
|
+
print("\nSending test prompt...\n")
|
|
201
|
+
|
|
202
|
+
response = chat_sync("Hello! Please respond with a brief greeting.", model="gpt-4.1-mini")
|
|
203
|
+
|
|
204
|
+
print(f"Response: {response.content}")
|
|
205
|
+
print(f"\nModel: {response.model}")
|
|
206
|
+
print(f"Tokens: {response.usage.total_tokens} (prompt: {response.usage.prompt_tokens}, completion: {response.usage.completion_tokens})")
|
|
207
|
+
print(f"Cost: ${response.usage.cost_usd:.6f}")
|
|
208
|
+
if response.latency_ms:
|
|
209
|
+
print(f"Latency: {response.latency_ms:.0f}ms")
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""OpenRouter chat interface for StratifyAI.
|
|
2
|
+
|
|
3
|
+
Provides convenient functions for OpenRouter chat completions.
|
|
4
|
+
OpenRouter provides unified access to models from multiple providers.
|
|
5
|
+
Model must be specified for each request.
|
|
6
|
+
|
|
7
|
+
Environment Variable: OPENROUTER_API_KEY
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
# Model is always required
|
|
11
|
+
from stratifyai.chat import openrouter
|
|
12
|
+
response = await openrouter.chat("Hello!", model="meta-llama/llama-3.3-70b-instruct:free")
|
|
13
|
+
|
|
14
|
+
# Builder pattern (model required)
|
|
15
|
+
client = (
|
|
16
|
+
openrouter
|
|
17
|
+
.with_model("anthropic/claude-3-5-sonnet")
|
|
18
|
+
.with_system("You are a helpful assistant")
|
|
19
|
+
.with_developer("Use markdown")
|
|
20
|
+
)
|
|
21
|
+
response = await client.chat("Hello!")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import asyncio
|
|
25
|
+
from typing import AsyncIterator, Optional, Union
|
|
26
|
+
|
|
27
|
+
from stratifyai import LLMClient
|
|
28
|
+
from stratifyai.models import ChatResponse, Message
|
|
29
|
+
from stratifyai.chat.builder import ChatBuilder, create_module_builder
|
|
30
|
+
|
|
31
|
+
# Default configuration (no default model - must be specified)
|
|
32
|
+
DEFAULT_TEMPERATURE = 0.7
|
|
33
|
+
DEFAULT_MAX_TOKENS = None
|
|
34
|
+
|
|
35
|
+
# Module-level client (lazy initialization)
|
|
36
|
+
_client: Optional[LLMClient] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_client() -> LLMClient:
|
|
40
|
+
"""Get or create the module-level client."""
|
|
41
|
+
global _client
|
|
42
|
+
if _client is None:
|
|
43
|
+
_client = LLMClient(provider="openrouter")
|
|
44
|
+
return _client
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Module-level builder for chaining
|
|
48
|
+
_builder = create_module_builder(
|
|
49
|
+
provider="openrouter",
|
|
50
|
+
default_temperature=DEFAULT_TEMPERATURE,
|
|
51
|
+
default_max_tokens=DEFAULT_MAX_TOKENS,
|
|
52
|
+
client_factory=_get_client,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Builder pattern methods (delegate to _builder)
|
|
57
|
+
def with_model(model: str) -> ChatBuilder:
|
|
58
|
+
"""Set the model to use. Returns a new ChatBuilder for chaining."""
|
|
59
|
+
return _builder.with_model(model)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def with_system(prompt: str) -> ChatBuilder:
|
|
63
|
+
"""Set the system prompt. Returns a new ChatBuilder for chaining."""
|
|
64
|
+
return _builder.with_system(prompt)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def with_developer(instructions: str) -> ChatBuilder:
|
|
68
|
+
"""Set developer instructions. Returns a new ChatBuilder for chaining."""
|
|
69
|
+
return _builder.with_developer(instructions)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def with_temperature(temperature: float) -> ChatBuilder:
|
|
73
|
+
"""Set the temperature. Returns a new ChatBuilder for chaining."""
|
|
74
|
+
return _builder.with_temperature(temperature)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def with_max_tokens(max_tokens: int) -> ChatBuilder:
|
|
78
|
+
"""Set max tokens. Returns a new ChatBuilder for chaining."""
|
|
79
|
+
return _builder.with_max_tokens(max_tokens)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def with_options(**kwargs) -> ChatBuilder:
|
|
83
|
+
"""Set additional options. Returns a new ChatBuilder for chaining."""
|
|
84
|
+
return _builder.with_options(**kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
async def chat(
|
|
88
|
+
prompt: Union[str, list[Message]],
|
|
89
|
+
*,
|
|
90
|
+
model: str,
|
|
91
|
+
system: Optional[str] = None,
|
|
92
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
93
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
94
|
+
stream: bool = False,
|
|
95
|
+
**kwargs,
|
|
96
|
+
) -> Union[ChatResponse, AsyncIterator[ChatResponse]]:
|
|
97
|
+
"""
|
|
98
|
+
Send a chat completion request to OpenRouter.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
prompt: User message string or list of Message objects.
|
|
102
|
+
model: Model name (required). E.g., "anthropic/claude-3-5-sonnet", "openai/gpt-4"
|
|
103
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
104
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
105
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
106
|
+
stream: Whether to stream the response. Default: False
|
|
107
|
+
**kwargs: Additional parameters passed to the API.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
ChatResponse object, or AsyncIterator[ChatResponse] if streaming.
|
|
111
|
+
|
|
112
|
+
Example:
|
|
113
|
+
>>> from stratifyai.chat import openrouter
|
|
114
|
+
>>> response = await openrouter.chat("What is Python?", model="meta-llama/llama-3.3-70b-instruct:free")
|
|
115
|
+
>>> print(response.content)
|
|
116
|
+
|
|
117
|
+
# Use a different model
|
|
118
|
+
>>> response = openrouter.chat(
|
|
119
|
+
... "Explain AI",
|
|
120
|
+
... model="anthropic/claude-3-5-sonnet"
|
|
121
|
+
... )
|
|
122
|
+
"""
|
|
123
|
+
client = _get_client()
|
|
124
|
+
|
|
125
|
+
# Build messages list
|
|
126
|
+
if isinstance(prompt, str):
|
|
127
|
+
messages = []
|
|
128
|
+
if system:
|
|
129
|
+
messages.append(Message(role="system", content=system))
|
|
130
|
+
messages.append(Message(role="user", content=prompt))
|
|
131
|
+
else:
|
|
132
|
+
messages = prompt
|
|
133
|
+
|
|
134
|
+
return await client.chat(
|
|
135
|
+
model=model,
|
|
136
|
+
messages=messages,
|
|
137
|
+
temperature=temperature,
|
|
138
|
+
max_tokens=max_tokens,
|
|
139
|
+
stream=stream,
|
|
140
|
+
**kwargs,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
async def chat_stream(
|
|
145
|
+
prompt: Union[str, list[Message]],
|
|
146
|
+
*,
|
|
147
|
+
model: str,
|
|
148
|
+
system: Optional[str] = None,
|
|
149
|
+
temperature: float = DEFAULT_TEMPERATURE,
|
|
150
|
+
max_tokens: Optional[int] = DEFAULT_MAX_TOKENS,
|
|
151
|
+
**kwargs,
|
|
152
|
+
) -> AsyncIterator[ChatResponse]:
|
|
153
|
+
"""
|
|
154
|
+
Send a streaming chat completion request to OpenRouter.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
prompt: User message string or list of Message objects.
|
|
158
|
+
model: Model name (required). E.g., "anthropic/claude-3-5-sonnet"
|
|
159
|
+
system: Optional system prompt (ignored if prompt is list of Messages).
|
|
160
|
+
temperature: Sampling temperature (0.0-2.0). Default: 0.7
|
|
161
|
+
max_tokens: Maximum tokens to generate. Default: None (model default)
|
|
162
|
+
**kwargs: Additional parameters passed to the API.
|
|
163
|
+
|
|
164
|
+
Yields:
|
|
165
|
+
ChatResponse chunks.
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
>>> from stratifyai.chat import openrouter
|
|
169
|
+
>>> async for chunk in openrouter.chat_stream("Tell me a story", model="anthropic/claude-3-5-sonnet"):
|
|
170
|
+
... print(chunk.content, end="", flush=True)
|
|
171
|
+
"""
|
|
172
|
+
return await chat(
|
|
173
|
+
prompt,
|
|
174
|
+
model=model,
|
|
175
|
+
system=system,
|
|
176
|
+
temperature=temperature,
|
|
177
|
+
max_tokens=max_tokens,
|
|
178
|
+
stream=True,
|
|
179
|
+
**kwargs,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def chat_sync(
|
|
184
|
+
prompt,
|
|
185
|
+
*,
|
|
186
|
+
model: str,
|
|
187
|
+
system=None,
|
|
188
|
+
temperature=DEFAULT_TEMPERATURE,
|
|
189
|
+
max_tokens=DEFAULT_MAX_TOKENS,
|
|
190
|
+
**kwargs,
|
|
191
|
+
):
|
|
192
|
+
"""Synchronous wrapper for chat()."""
|
|
193
|
+
return asyncio.run(chat(
|
|
194
|
+
prompt,
|
|
195
|
+
model=model,
|
|
196
|
+
system=system,
|
|
197
|
+
temperature=temperature,
|
|
198
|
+
max_tokens=max_tokens,
|
|
199
|
+
stream=False,
|
|
200
|
+
**kwargs,
|
|
201
|
+
))
|