voicerun_completions 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,165 @@
1
+ from typing import Any, Optional, Union, AsyncIterable
2
+ from primfunctions.completions.request import (
3
+ CompletionsProvider,
4
+ ChatCompletionRequest,
5
+ StreamOptions,
6
+ ToolChoice,
7
+ ToolDefinition,
8
+ normalize_tools,
9
+ )
10
+ from primfunctions.completions.messages import ConversationHistory, normalize_messages
11
+ from primfunctions.completions.streaming import ChatCompletionChunk
12
+ from primfunctions.completions.response import ChatCompletionResponse
13
+
14
+ from .providers.base import CompletionClient
15
+ from .providers.openai.openai_client import OpenAiCompletionClient
16
+ from .providers.anthropic.anthropic_client import AnthropicCompletionClient
17
+ from .providers.google.google_client import GoogleCompletionClient
18
+
19
+
20
+ async def generate_chat_completion(
21
+ provider: Union[str, CompletionsProvider],
22
+ api_key: str,
23
+ model: str,
24
+ messages: Union[ConversationHistory, list[dict]],
25
+ *,
26
+ tools: Optional[list[Union[ToolDefinition, dict[str, Any]]]] = None,
27
+ tool_choice: Optional[ToolChoice] = None,
28
+ temperature: Optional[float] = None,
29
+ timeout: Optional[float] = None,
30
+ max_tokens: Optional[int] = None,
31
+ ) -> ChatCompletionResponse:
32
+ """
33
+ Generate chat completion.
34
+
35
+ Args:
36
+ provider: LLM provider ("openai", "anthropic", or "google")
37
+ api_key: API key for the provider
38
+ model: Model identifier (provider-specific)
39
+ messages: Conversation history or list of message dicts
40
+ tools: Optional list of tool/function definitions
41
+ tool_choice: Tool choice strategy ("none", "auto", "required", or tool name)
42
+ temperature: Sampling temperature (0.0-2.0)
43
+ timeout: Request timeout in seconds
44
+ max_tokens: Maximum tokens to generate
45
+
46
+ Returns:
47
+ ChatCompletionResponse with the complete response
48
+ """
49
+
50
+ # Normalize string input to enum
51
+ if isinstance(provider, str):
52
+ try:
53
+ provider = CompletionsProvider(provider.lower())
54
+ except ValueError:
55
+ raise ValueError(f"Invalid provider: {provider}.")
56
+
57
+ # Normalize messages to proper message objects
58
+ normalized_messages = normalize_messages(messages)
59
+
60
+ # Normalize tools to proper tool objects
61
+ normalized_tools = normalize_tools(tools) if tools else None
62
+
63
+ client: CompletionClient
64
+ if provider == CompletionsProvider.OPENAI:
65
+ client = OpenAiCompletionClient()
66
+ elif provider == CompletionsProvider.ANTHROPIC:
67
+ client = AnthropicCompletionClient()
68
+ elif provider == CompletionsProvider.GOOGLE:
69
+ client = GoogleCompletionClient()
70
+ else:
71
+ raise ValueError(f"Invalid provider: {provider}")
72
+
73
+ request = ChatCompletionRequest(
74
+ provider=provider,
75
+ api_key=api_key,
76
+ model=model,
77
+ messages=normalized_messages,
78
+ tools=normalized_tools,
79
+ tool_choice=tool_choice,
80
+ temperature=temperature,
81
+ timeout=timeout,
82
+ max_tokens=max_tokens,
83
+ streaming=False,
84
+ )
85
+
86
+ return await client.generate_chat_completion(request=request)
87
+
88
+
89
+ async def generate_chat_completion_stream(
90
+ provider: Union[str, CompletionsProvider],
91
+ api_key: str,
92
+ model: str,
93
+ messages: Union[ConversationHistory, list[dict]],
94
+ *,
95
+ tools: Optional[list[Union[ToolDefinition, dict[str, Any]]]] = None,
96
+ tool_choice: Optional[ToolChoice] = None,
97
+ temperature: Optional[float] = None,
98
+ timeout: Optional[float] = None,
99
+ max_tokens: Optional[int] = None,
100
+ stream_options: Optional[Union[StreamOptions, dict[str, Any]]] = None,
101
+ ) -> AsyncIterable[ChatCompletionChunk]:
102
+ """
103
+ Generate streaming chat completion.
104
+
105
+ Args:
106
+ provider: LLM provider ("openai", "anthropic", or "google")
107
+ api_key: API key for the provider
108
+ model: Model identifier (provider-specific)
109
+ messages: Conversation history or list of message dicts
110
+ tools: Optional list of tool/function definitions
111
+ tool_choice: Tool choice strategy ("none", "auto", "required", or tool name)
112
+ temperature: Sampling temperature (0.0-2.0)
113
+ timeout: Request timeout in seconds
114
+ max_tokens: Maximum tokens to generate
115
+ stream_options: TODO
116
+
117
+ Returns:
118
+ AsyncIterable of ChatCompletionChunk objects (typed chunks)
119
+ """
120
+
121
+ # Normalize string input to enum
122
+ if isinstance(provider, str):
123
+ try:
124
+ provider = CompletionsProvider(provider.lower())
125
+ except ValueError:
126
+ raise ValueError(f"Invalid provider: {provider}.")
127
+
128
+ # Normalize messages to proper message objects
129
+ normalized_messages = normalize_messages(messages)
130
+
131
+ # Normalize tools to proper tool objects
132
+ normalized_tools = normalize_tools(tools) if tools else None
133
+
134
+ # Normalize stream options if provided as dict
135
+ if stream_options and isinstance(stream_options, dict):
136
+ stream_options = StreamOptions.deserialize(stream_options)
137
+
138
+ client: CompletionClient
139
+ if provider == CompletionsProvider.OPENAI:
140
+ client = OpenAiCompletionClient()
141
+ elif provider == CompletionsProvider.ANTHROPIC:
142
+ client = AnthropicCompletionClient()
143
+ elif provider == CompletionsProvider.GOOGLE:
144
+ # TODO: enable once google fixes their SDK
145
+ raise ValueError(f"Google streaming currently unsupported")
146
+ # client = GoogleCompletionClient()
147
+ else:
148
+ raise ValueError(f"Invalid provider: {provider}")
149
+
150
+ request = ChatCompletionRequest(
151
+ provider=provider,
152
+ api_key=api_key,
153
+ model=model,
154
+ messages=normalized_messages,
155
+ tools=normalized_tools,
156
+ tool_choice=tool_choice,
157
+ temperature=temperature,
158
+ timeout=timeout,
159
+ max_tokens=max_tokens,
160
+ streaming=True,
161
+ stream_options=stream_options,
162
+ )
163
+
164
+ return client.generate_chat_completion_stream(request=request)
165
+
@@ -0,0 +1,192 @@
1
+ from typing import Any, List, Optional
2
+ from anthropic import AsyncAnthropic, AsyncStream
3
+ from anthropic.types import (
4
+ Message as AnthropicMessage,
5
+ MessageParam as AnthropicMessageParam,
6
+ TextBlockParam as AnthropicTextBlockParam,
7
+ ToolParam as AnthropicToolDefinition,
8
+ ToolChoiceParam as AnthropicToolChoice,
9
+ TextBlock as AnthropicTextBlock,
10
+ ToolUseBlock as AnthropicToolCall,
11
+ RawMessageStreamEvent as AnthropicStreamEvent,
12
+ )
13
+ from primfunctions.completions.messages import AssistantMessage, ToolCall, FunctionCall
14
+ from primfunctions.completions.response import ChatCompletionResponse
15
+ from primfunctions.completions.request import ChatCompletionRequest, StreamOptions
16
+
17
+ from ..base import CompletionClient
18
+ from .utils import denormalize_conversation_history, denormalize_tools, denormalize_tool_choice
19
+ from .streaming import AnthropicStreamProcessor
20
+
21
+
22
+ class AnthropicCompletionClient(CompletionClient):
23
+
24
+ def _denormalize_request(
25
+ self,
26
+ request: ChatCompletionRequest,
27
+ ) -> dict[str, Any]:
28
+ """Convert ChatCompletionRequest to kwargs for _get_completion."""
29
+
30
+ messages, system_prompt = denormalize_conversation_history(request.messages)
31
+
32
+ kwargs = {
33
+ "api_key": request.api_key,
34
+ "model": request.model,
35
+ "messages": messages,
36
+ "tools": denormalize_tools(request.tools),
37
+ "tool_choice": denormalize_tool_choice(request.tool_choice),
38
+ "temperature": request.temperature if request.temperature else None,
39
+ }
40
+
41
+ # Add system prompt if present
42
+ if system_prompt:
43
+ kwargs["system"] = system_prompt
44
+
45
+ # Include or default required max_tokens
46
+ kwargs["max_tokens"] = request.max_tokens if request.max_tokens else 4000
47
+
48
+ # Include timeout if provided
49
+ if request.timeout is not None:
50
+ kwargs["timeout"] = request.timeout
51
+
52
+ return kwargs
53
+
54
+
55
+ def _normalize_response(
56
+ self,
57
+ response: AnthropicMessage,
58
+ ) -> ChatCompletionResponse:
59
+ """Convert Anthropic Message to normalized ChatCompletionResponse."""
60
+
61
+ # Extract text content and tool calls from content blocks
62
+ text_parts: list[str] = []
63
+ tool_calls: list[ToolCall] = []
64
+ tool_call_index: int = 0
65
+
66
+ for block in response.content:
67
+ match block:
68
+ case AnthropicTextBlock():
69
+ text_parts.append(block.text)
70
+ case AnthropicToolCall():
71
+ tool_calls.append(ToolCall(
72
+ id=block.id,
73
+ type="function",
74
+ function=FunctionCall(
75
+ name=block.name,
76
+ arguments=block.input
77
+ ),
78
+ index=tool_call_index,
79
+ ))
80
+ tool_call_index += 1
81
+
82
+ # Combine text content
83
+ content = "".join(text_parts) if text_parts else None
84
+
85
+ # Create normalized message
86
+ normalized_message = AssistantMessage(
87
+ content=content,
88
+ tool_calls=tool_calls if tool_calls else None,
89
+ )
90
+
91
+ return ChatCompletionResponse(
92
+ message=normalized_message,
93
+ finish_reason=response.stop_reason,
94
+ usage=response.usage.model_dump() if response.usage else None
95
+ )
96
+
97
+
98
+ async def _get_completion(
99
+ self,
100
+ api_key: str,
101
+ model: str,
102
+ messages: list[AnthropicMessageParam],
103
+ max_tokens: int,
104
+ tools: Optional[list[AnthropicToolDefinition]] = None,
105
+ tool_choice: Optional[AnthropicToolChoice] = None,
106
+ temperature: Optional[float] = None,
107
+ system: Optional[List[AnthropicTextBlockParam]] = None,
108
+ timeout: Optional[float] = None,
109
+ ) -> AnthropicMessage:
110
+ """TODO.
111
+
112
+ [Client](https://github.com/anthropics/anthropic-sdk-python)
113
+ """
114
+ async with AsyncAnthropic(api_key=api_key) as client:
115
+ # Build kwargs dict with required values
116
+ kwargs = {
117
+ "model": model,
118
+ "messages": messages,
119
+ "max_tokens": max_tokens,
120
+ "stream": False,
121
+ # Disable thinking
122
+ "thinking": {
123
+ "type": "disabled"
124
+ },
125
+ }
126
+
127
+ # Only add optional parameters if they're provided
128
+ if tools is not None:
129
+ kwargs["tools"] = tools
130
+ if tool_choice is not None:
131
+ kwargs["tool_choice"] = tool_choice
132
+ if temperature is not None:
133
+ kwargs["temperature"] = temperature
134
+ if system is not None:
135
+ kwargs["system"] = system
136
+ if timeout is not None:
137
+ kwargs["timeout"] = timeout
138
+
139
+ return await client.messages.create(**kwargs)
140
+
141
+
142
+ def _get_stream_processor(
143
+ self,
144
+ stream_options: Optional[StreamOptions] = None,
145
+ ) -> AnthropicStreamProcessor:
146
+ """Get anthropic-specific StreamProcessor."""
147
+ return AnthropicStreamProcessor(stream_options=stream_options)
148
+
149
+
150
+ async def _get_completion_stream(
151
+ self,
152
+ api_key: str,
153
+ model: str,
154
+ messages: list[AnthropicMessageParam],
155
+ max_tokens: int,
156
+ tools: Optional[list[AnthropicToolDefinition]] = None,
157
+ tool_choice: Optional[AnthropicToolChoice] = None,
158
+ temperature: Optional[float] = None,
159
+ system: Optional[List[AnthropicTextBlockParam]] = None,
160
+ timeout: Optional[float] = None,
161
+ ) -> AsyncStream[AnthropicStreamEvent]:
162
+ """Stream chat response events from Anthropic.
163
+
164
+ [Client](https://github.com/anthropics/anthropic-sdk-python)
165
+ """
166
+ client = AsyncAnthropic(api_key=api_key)
167
+
168
+ # Build kwargs dict with required values
169
+ kwargs = {
170
+ "model": model,
171
+ "messages": messages,
172
+ "max_tokens": max_tokens,
173
+ "stream": True,
174
+ # Disable thinking
175
+ "thinking": {
176
+ "type": "disabled"
177
+ },
178
+ }
179
+
180
+ # Only add optional parameters if they're provided
181
+ if tools is not None:
182
+ kwargs["tools"] = tools
183
+ if tool_choice is not None:
184
+ kwargs["tool_choice"] = tool_choice
185
+ if temperature is not None:
186
+ kwargs["temperature"] = temperature
187
+ if system is not None:
188
+ kwargs["system"] = system
189
+ if timeout is not None:
190
+ kwargs["timeout"] = timeout
191
+
192
+ return await client.messages.create(**kwargs)
@@ -0,0 +1,197 @@
1
+ from typing import Any, AsyncIterable, Dict, List, Optional
2
+ from anthropic.types import (
3
+ RawMessageStreamEvent as AnthropicStreamEvent,
4
+ RawMessageStartEvent as AnthropicMessageStartEvent,
5
+ RawContentBlockStartEvent as AnthropicStartEvent,
6
+ RawContentBlockDeltaEvent as AnthropicContentDeltaEvent,
7
+ RawMessageDeltaEvent as AnthropicMessageDeltaEvent,
8
+ TextDelta as AnthropicTextDelta,
9
+ InputJSONDelta as AnthropicJsonDelta,
10
+ ToolUseBlock as AnthropicToolCall,
11
+ TextBlock as AnthropicTextBlock,
12
+ )
13
+ from primfunctions.completions.messages import ToolCall, AssistantMessage
14
+ from primfunctions.completions.response import ChatCompletionResponse
15
+ from primfunctions.completions.streaming import (
16
+ ChatCompletionChunk,
17
+ AssistantMessageDeltaChunk,
18
+ AssistantMessageSentenceChunk,
19
+ FinishReasonChunk,
20
+ ToolCallChunk,
21
+ UsageChunk,
22
+ FinalResponseChunk,
23
+ )
24
+ from primfunctions.utils.streaming import update_sentence_buffer, clean_text_for_speech
25
+ from primfunctions.completions.request import StreamOptions
26
+
27
+ from ..base import StreamProcessor, PartialToolCall
28
+
29
+
30
+ class AnthropicStreamProcessor(StreamProcessor):
31
+ """Processes Anthropic message stream events yielding normalized chunks."""
32
+
33
+ def __init__(
34
+ self,
35
+ stream_options: Optional[StreamOptions] = None,
36
+ ):
37
+ self.stream_sentences: bool = False
38
+ self.clean_sentences: bool = True
39
+ self.min_sentence_length: int = 6
40
+ self.punctuation_marks: Optional[list[str]] = None
41
+ self.punctuation_language: Optional[str] = None
42
+
43
+ # Override stream options defaults
44
+ if stream_options:
45
+ self.stream_sentences = stream_options.stream_sentences
46
+ self.clean_sentences = stream_options.clean_sentences
47
+ self.min_sentence_length = stream_options.min_sentence_length
48
+ self.punctuation_marks = stream_options.punctuation_marks
49
+ self.punctuation_language = stream_options.punctuation_language
50
+
51
+ self.active_call: PartialToolCall = None
52
+ self.active_call_index = 0
53
+ self.content: str = ""
54
+ self.tool_calls: List[ToolCall] = []
55
+ self.finish_reason: str = ""
56
+ self.usage: Dict[str, Any] = {}
57
+ self.sentence_buffer = ""
58
+
59
+
60
+ def _process_text_partial(self, text: str) -> List[ChatCompletionChunk]:
61
+ """Process text partial."""
62
+ chunks: List[ChatCompletionChunk] = []
63
+
64
+ if not text:
65
+ return chunks
66
+
67
+ if self.stream_sentences:
68
+ # Append delta to sentence buffer
69
+ sentence_buffer, complete_sentence = update_sentence_buffer(
70
+ content=text,
71
+ sentence_buffer=self.sentence_buffer,
72
+ punctuation_marks=self.punctuation_marks,
73
+ clean_text=self.clean_sentences,
74
+ min_sentence_length=self.min_sentence_length,
75
+ )
76
+ self.sentence_buffer = sentence_buffer
77
+
78
+ if complete_sentence:
79
+ chunks.append(AssistantMessageSentenceChunk(
80
+ sentence=complete_sentence
81
+ ))
82
+ else:
83
+ # Otherwise stream content delta directly
84
+ chunks.append(AssistantMessageDeltaChunk(
85
+ content=text
86
+ ))
87
+
88
+ # Add content delta to accumulated response
89
+ self.content += text
90
+
91
+ return chunks
92
+
93
+
94
+ def _process_event(
95
+ self,
96
+ event: AnthropicStreamEvent,
97
+ ) -> List[ChatCompletionChunk]:
98
+ """Convert Anthropic streaming event to individual typed chunks."""
99
+ chunks = []
100
+
101
+ match event.type:
102
+ case "message_start":
103
+ msg_start_event: AnthropicMessageStartEvent = event
104
+
105
+ # Capture usage from message start
106
+ self.usage.update(msg_start_event.message.usage.model_dump())
107
+
108
+ case "content_block_start":
109
+ start_event: AnthropicStartEvent = event
110
+
111
+ # For tool_use blocks, initialize the partial tool call
112
+ if start_event.content_block.type == "tool_use":
113
+ tool_block: AnthropicToolCall = start_event.content_block
114
+ self.active_call = PartialToolCall(
115
+ id=tool_block.id,
116
+ type="function",
117
+ function_name=tool_block.name,
118
+ arguments_buffer="",
119
+ index=self.active_call_index,
120
+ )
121
+ elif start_event.content_block.type == "text":
122
+ text_block: AnthropicTextBlock = start_event.content_block
123
+ chunks.extend(self._process_text_partial(text_block.text))
124
+
125
+ case "content_block_delta":
126
+ delta_event: AnthropicContentDeltaEvent = event
127
+
128
+ if hasattr(delta_event.delta, 'type'):
129
+ # Handle text content delta
130
+ if delta_event.delta.type == "text_delta":
131
+ text_delta: AnthropicTextDelta = delta_event.delta
132
+ chunks.extend(self._process_text_partial(text_delta.text))
133
+
134
+ # Handle JSON content delta
135
+ if delta_event.delta.type == "input_json_delta" and self.active_call:
136
+ json_delta: AnthropicJsonDelta = delta_event.delta
137
+ self.active_call.arguments_buffer += json_delta.partial_json
138
+
139
+ case "content_block_stop":
140
+ # Only special handling for tool call
141
+ if self.active_call:
142
+ tool_call = self.active_call.to_tool_call()
143
+ chunks.append(ToolCallChunk(tool_call=tool_call))
144
+ self.tool_calls.append(tool_call)
145
+ self.active_call = None
146
+ self.active_call_index += 1
147
+
148
+ case "message_delta":
149
+ msg_delta_event: AnthropicMessageDeltaEvent = event
150
+
151
+ # Handle usage information if present
152
+ if msg_delta_event.usage:
153
+ self.usage.update(msg_delta_event.usage.model_dump())
154
+
155
+ # Handle finish reason from delta
156
+ if msg_delta_event.delta.stop_reason:
157
+ self.finish_reason = msg_delta_event.delta.stop_reason
158
+
159
+ case "message_stop":
160
+ # End of message - no special handling
161
+ pass
162
+
163
+ return chunks
164
+
165
+ async def process_stream(
166
+ self,
167
+ stream: AsyncIterable[AnthropicStreamEvent],
168
+ ) -> AsyncIterable[ChatCompletionChunk]:
169
+ """Process Anthropic event stream and yield normalized chunks."""
170
+ async for event in stream:
171
+ for chunk in self._process_event(event):
172
+ yield chunk
173
+
174
+ # Handle remaining sentence buffer if streaming sentences
175
+ if self.stream_sentences and self.sentence_buffer:
176
+ complete_sentence = clean_text_for_speech(self.sentence_buffer) if self.clean_sentences else self.sentence_buffer
177
+ yield AssistantMessageSentenceChunk(
178
+ sentence=complete_sentence
179
+ )
180
+
181
+ # Yield the finish chunk (or default)
182
+ yield FinishReasonChunk(finish_reason=self.finish_reason or "stop")
183
+
184
+ # Yield the usage chunk
185
+ yield UsageChunk(usage=self.usage)
186
+
187
+ # Yield aggregated chat completion response as final chunk
188
+ yield FinalResponseChunk(
189
+ response=ChatCompletionResponse(
190
+ message=AssistantMessage(
191
+ content=self.content or None,
192
+ tool_calls=self.tool_calls or None,
193
+ ),
194
+ usage=self.usage,
195
+ finish_reason=self.finish_reason,
196
+ )
197
+ )