voicerun_completions 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voicerun_completions/__init__.py +0 -0
- voicerun_completions/client.py +165 -0
- voicerun_completions/providers/anthropic/anthropic_client.py +192 -0
- voicerun_completions/providers/anthropic/streaming.py +197 -0
- voicerun_completions/providers/anthropic/utils.py +193 -0
- voicerun_completions/providers/base.py +145 -0
- voicerun_completions/providers/google/google_client.py +165 -0
- voicerun_completions/providers/google/streaming.py +177 -0
- voicerun_completions/providers/google/utils.py +142 -0
- voicerun_completions/providers/openai/openai_client.py +159 -0
- voicerun_completions/providers/openai/streaming.py +182 -0
- voicerun_completions/providers/openai/utils.py +135 -0
- voicerun_completions-0.1.2.dist-info/METADATA +46 -0
- voicerun_completions-0.1.2.dist-info/RECORD +16 -0
- voicerun_completions-0.1.2.dist-info/WHEEL +5 -0
- voicerun_completions-0.1.2.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import Any, Optional, Union, AsyncIterable
|
|
2
|
+
from primfunctions.completions.request import (
|
|
3
|
+
CompletionsProvider,
|
|
4
|
+
ChatCompletionRequest,
|
|
5
|
+
StreamOptions,
|
|
6
|
+
ToolChoice,
|
|
7
|
+
ToolDefinition,
|
|
8
|
+
normalize_tools,
|
|
9
|
+
)
|
|
10
|
+
from primfunctions.completions.messages import ConversationHistory, normalize_messages
|
|
11
|
+
from primfunctions.completions.streaming import ChatCompletionChunk
|
|
12
|
+
from primfunctions.completions.response import ChatCompletionResponse
|
|
13
|
+
|
|
14
|
+
from .providers.base import CompletionClient
|
|
15
|
+
from .providers.openai.openai_client import OpenAiCompletionClient
|
|
16
|
+
from .providers.anthropic.anthropic_client import AnthropicCompletionClient
|
|
17
|
+
from .providers.google.google_client import GoogleCompletionClient
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def generate_chat_completion(
|
|
21
|
+
provider: Union[str, CompletionsProvider],
|
|
22
|
+
api_key: str,
|
|
23
|
+
model: str,
|
|
24
|
+
messages: Union[ConversationHistory, list[dict]],
|
|
25
|
+
*,
|
|
26
|
+
tools: Optional[list[Union[ToolDefinition, dict[str, Any]]]] = None,
|
|
27
|
+
tool_choice: Optional[ToolChoice] = None,
|
|
28
|
+
temperature: Optional[float] = None,
|
|
29
|
+
timeout: Optional[float] = None,
|
|
30
|
+
max_tokens: Optional[int] = None,
|
|
31
|
+
) -> ChatCompletionResponse:
|
|
32
|
+
"""
|
|
33
|
+
Generate chat completion.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
provider: LLM provider ("openai", "anthropic", or "google")
|
|
37
|
+
api_key: API key for the provider
|
|
38
|
+
model: Model identifier (provider-specific)
|
|
39
|
+
messages: Conversation history or list of message dicts
|
|
40
|
+
tools: Optional list of tool/function definitions
|
|
41
|
+
tool_choice: Tool choice strategy ("none", "auto", "required", or tool name)
|
|
42
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
43
|
+
timeout: Request timeout in seconds
|
|
44
|
+
max_tokens: Maximum tokens to generate
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
ChatCompletionResponse with the complete response
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# Normalize string input to enum
|
|
51
|
+
if isinstance(provider, str):
|
|
52
|
+
try:
|
|
53
|
+
provider = CompletionsProvider(provider.lower())
|
|
54
|
+
except ValueError:
|
|
55
|
+
raise ValueError(f"Invalid provider: {provider}.")
|
|
56
|
+
|
|
57
|
+
# Normalize messages to proper message objects
|
|
58
|
+
normalized_messages = normalize_messages(messages)
|
|
59
|
+
|
|
60
|
+
# Normalize tools to proper tool objects
|
|
61
|
+
normalized_tools = normalize_tools(tools) if tools else None
|
|
62
|
+
|
|
63
|
+
client: CompletionClient
|
|
64
|
+
if provider == CompletionsProvider.OPENAI:
|
|
65
|
+
client = OpenAiCompletionClient()
|
|
66
|
+
elif provider == CompletionsProvider.ANTHROPIC:
|
|
67
|
+
client = AnthropicCompletionClient()
|
|
68
|
+
elif provider == CompletionsProvider.GOOGLE:
|
|
69
|
+
client = GoogleCompletionClient()
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError(f"Invalid provider: {provider}")
|
|
72
|
+
|
|
73
|
+
request = ChatCompletionRequest(
|
|
74
|
+
provider=provider,
|
|
75
|
+
api_key=api_key,
|
|
76
|
+
model=model,
|
|
77
|
+
messages=normalized_messages,
|
|
78
|
+
tools=normalized_tools,
|
|
79
|
+
tool_choice=tool_choice,
|
|
80
|
+
temperature=temperature,
|
|
81
|
+
timeout=timeout,
|
|
82
|
+
max_tokens=max_tokens,
|
|
83
|
+
streaming=False,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return await client.generate_chat_completion(request=request)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
async def generate_chat_completion_stream(
|
|
90
|
+
provider: Union[str, CompletionsProvider],
|
|
91
|
+
api_key: str,
|
|
92
|
+
model: str,
|
|
93
|
+
messages: Union[ConversationHistory, list[dict]],
|
|
94
|
+
*,
|
|
95
|
+
tools: Optional[list[Union[ToolDefinition, dict[str, Any]]]] = None,
|
|
96
|
+
tool_choice: Optional[ToolChoice] = None,
|
|
97
|
+
temperature: Optional[float] = None,
|
|
98
|
+
timeout: Optional[float] = None,
|
|
99
|
+
max_tokens: Optional[int] = None,
|
|
100
|
+
stream_options: Optional[Union[StreamOptions, dict[str, Any]]] = None,
|
|
101
|
+
) -> AsyncIterable[ChatCompletionChunk]:
|
|
102
|
+
"""
|
|
103
|
+
Generate streaming chat completion.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
provider: LLM provider ("openai", "anthropic", or "google")
|
|
107
|
+
api_key: API key for the provider
|
|
108
|
+
model: Model identifier (provider-specific)
|
|
109
|
+
messages: Conversation history or list of message dicts
|
|
110
|
+
tools: Optional list of tool/function definitions
|
|
111
|
+
tool_choice: Tool choice strategy ("none", "auto", "required", or tool name)
|
|
112
|
+
temperature: Sampling temperature (0.0-2.0)
|
|
113
|
+
timeout: Request timeout in seconds
|
|
114
|
+
max_tokens: Maximum tokens to generate
|
|
115
|
+
stream_options: TODO
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
AsyncIterable of ChatCompletionChunk objects (typed chunks)
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# Normalize string input to enum
|
|
122
|
+
if isinstance(provider, str):
|
|
123
|
+
try:
|
|
124
|
+
provider = CompletionsProvider(provider.lower())
|
|
125
|
+
except ValueError:
|
|
126
|
+
raise ValueError(f"Invalid provider: {provider}.")
|
|
127
|
+
|
|
128
|
+
# Normalize messages to proper message objects
|
|
129
|
+
normalized_messages = normalize_messages(messages)
|
|
130
|
+
|
|
131
|
+
# Normalize tools to proper tool objects
|
|
132
|
+
normalized_tools = normalize_tools(tools) if tools else None
|
|
133
|
+
|
|
134
|
+
# Normalize stream options if provided as dict
|
|
135
|
+
if stream_options and isinstance(stream_options, dict):
|
|
136
|
+
stream_options = StreamOptions.deserialize(stream_options)
|
|
137
|
+
|
|
138
|
+
client: CompletionClient
|
|
139
|
+
if provider == CompletionsProvider.OPENAI:
|
|
140
|
+
client = OpenAiCompletionClient()
|
|
141
|
+
elif provider == CompletionsProvider.ANTHROPIC:
|
|
142
|
+
client = AnthropicCompletionClient()
|
|
143
|
+
elif provider == CompletionsProvider.GOOGLE:
|
|
144
|
+
# TODO: enable once google fixes their SDK
|
|
145
|
+
raise ValueError(f"Google streaming currently unsupported")
|
|
146
|
+
# client = GoogleCompletionClient()
|
|
147
|
+
else:
|
|
148
|
+
raise ValueError(f"Invalid provider: {provider}")
|
|
149
|
+
|
|
150
|
+
request = ChatCompletionRequest(
|
|
151
|
+
provider=provider,
|
|
152
|
+
api_key=api_key,
|
|
153
|
+
model=model,
|
|
154
|
+
messages=normalized_messages,
|
|
155
|
+
tools=normalized_tools,
|
|
156
|
+
tool_choice=tool_choice,
|
|
157
|
+
temperature=temperature,
|
|
158
|
+
timeout=timeout,
|
|
159
|
+
max_tokens=max_tokens,
|
|
160
|
+
streaming=True,
|
|
161
|
+
stream_options=stream_options,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return client.generate_chat_completion_stream(request=request)
|
|
165
|
+
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from typing import Any, List, Optional
|
|
2
|
+
from anthropic import AsyncAnthropic, AsyncStream
|
|
3
|
+
from anthropic.types import (
|
|
4
|
+
Message as AnthropicMessage,
|
|
5
|
+
MessageParam as AnthropicMessageParam,
|
|
6
|
+
TextBlockParam as AnthropicTextBlockParam,
|
|
7
|
+
ToolParam as AnthropicToolDefinition,
|
|
8
|
+
ToolChoiceParam as AnthropicToolChoice,
|
|
9
|
+
TextBlock as AnthropicTextBlock,
|
|
10
|
+
ToolUseBlock as AnthropicToolCall,
|
|
11
|
+
RawMessageStreamEvent as AnthropicStreamEvent,
|
|
12
|
+
)
|
|
13
|
+
from primfunctions.completions.messages import AssistantMessage, ToolCall, FunctionCall
|
|
14
|
+
from primfunctions.completions.response import ChatCompletionResponse
|
|
15
|
+
from primfunctions.completions.request import ChatCompletionRequest, StreamOptions
|
|
16
|
+
|
|
17
|
+
from ..base import CompletionClient
|
|
18
|
+
from .utils import denormalize_conversation_history, denormalize_tools, denormalize_tool_choice
|
|
19
|
+
from .streaming import AnthropicStreamProcessor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AnthropicCompletionClient(CompletionClient):
|
|
23
|
+
|
|
24
|
+
def _denormalize_request(
|
|
25
|
+
self,
|
|
26
|
+
request: ChatCompletionRequest,
|
|
27
|
+
) -> dict[str, Any]:
|
|
28
|
+
"""Convert ChatCompletionRequest to kwargs for _get_completion."""
|
|
29
|
+
|
|
30
|
+
messages, system_prompt = denormalize_conversation_history(request.messages)
|
|
31
|
+
|
|
32
|
+
kwargs = {
|
|
33
|
+
"api_key": request.api_key,
|
|
34
|
+
"model": request.model,
|
|
35
|
+
"messages": messages,
|
|
36
|
+
"tools": denormalize_tools(request.tools),
|
|
37
|
+
"tool_choice": denormalize_tool_choice(request.tool_choice),
|
|
38
|
+
"temperature": request.temperature if request.temperature else None,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
# Add system prompt if present
|
|
42
|
+
if system_prompt:
|
|
43
|
+
kwargs["system"] = system_prompt
|
|
44
|
+
|
|
45
|
+
# Include or default required max_tokens
|
|
46
|
+
kwargs["max_tokens"] = request.max_tokens if request.max_tokens else 4000
|
|
47
|
+
|
|
48
|
+
# Include timeout if provided
|
|
49
|
+
if request.timeout is not None:
|
|
50
|
+
kwargs["timeout"] = request.timeout
|
|
51
|
+
|
|
52
|
+
return kwargs
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _normalize_response(
|
|
56
|
+
self,
|
|
57
|
+
response: AnthropicMessage,
|
|
58
|
+
) -> ChatCompletionResponse:
|
|
59
|
+
"""Convert Anthropic Message to normalized ChatCompletionResponse."""
|
|
60
|
+
|
|
61
|
+
# Extract text content and tool calls from content blocks
|
|
62
|
+
text_parts: list[str] = []
|
|
63
|
+
tool_calls: list[ToolCall] = []
|
|
64
|
+
tool_call_index: int = 0
|
|
65
|
+
|
|
66
|
+
for block in response.content:
|
|
67
|
+
match block:
|
|
68
|
+
case AnthropicTextBlock():
|
|
69
|
+
text_parts.append(block.text)
|
|
70
|
+
case AnthropicToolCall():
|
|
71
|
+
tool_calls.append(ToolCall(
|
|
72
|
+
id=block.id,
|
|
73
|
+
type="function",
|
|
74
|
+
function=FunctionCall(
|
|
75
|
+
name=block.name,
|
|
76
|
+
arguments=block.input
|
|
77
|
+
),
|
|
78
|
+
index=tool_call_index,
|
|
79
|
+
))
|
|
80
|
+
tool_call_index += 1
|
|
81
|
+
|
|
82
|
+
# Combine text content
|
|
83
|
+
content = "".join(text_parts) if text_parts else None
|
|
84
|
+
|
|
85
|
+
# Create normalized message
|
|
86
|
+
normalized_message = AssistantMessage(
|
|
87
|
+
content=content,
|
|
88
|
+
tool_calls=tool_calls if tool_calls else None,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return ChatCompletionResponse(
|
|
92
|
+
message=normalized_message,
|
|
93
|
+
finish_reason=response.stop_reason,
|
|
94
|
+
usage=response.usage.model_dump() if response.usage else None
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def _get_completion(
|
|
99
|
+
self,
|
|
100
|
+
api_key: str,
|
|
101
|
+
model: str,
|
|
102
|
+
messages: list[AnthropicMessageParam],
|
|
103
|
+
max_tokens: int,
|
|
104
|
+
tools: Optional[list[AnthropicToolDefinition]] = None,
|
|
105
|
+
tool_choice: Optional[AnthropicToolChoice] = None,
|
|
106
|
+
temperature: Optional[float] = None,
|
|
107
|
+
system: Optional[List[AnthropicTextBlockParam]] = None,
|
|
108
|
+
timeout: Optional[float] = None,
|
|
109
|
+
) -> AnthropicMessage:
|
|
110
|
+
"""TODO.
|
|
111
|
+
|
|
112
|
+
[Client](https://github.com/anthropics/anthropic-sdk-python)
|
|
113
|
+
"""
|
|
114
|
+
async with AsyncAnthropic(api_key=api_key) as client:
|
|
115
|
+
# Build kwargs dict with required values
|
|
116
|
+
kwargs = {
|
|
117
|
+
"model": model,
|
|
118
|
+
"messages": messages,
|
|
119
|
+
"max_tokens": max_tokens,
|
|
120
|
+
"stream": False,
|
|
121
|
+
# Disable thinking
|
|
122
|
+
"thinking": {
|
|
123
|
+
"type": "disabled"
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# Only add optional parameters if they're provided
|
|
128
|
+
if tools is not None:
|
|
129
|
+
kwargs["tools"] = tools
|
|
130
|
+
if tool_choice is not None:
|
|
131
|
+
kwargs["tool_choice"] = tool_choice
|
|
132
|
+
if temperature is not None:
|
|
133
|
+
kwargs["temperature"] = temperature
|
|
134
|
+
if system is not None:
|
|
135
|
+
kwargs["system"] = system
|
|
136
|
+
if timeout is not None:
|
|
137
|
+
kwargs["timeout"] = timeout
|
|
138
|
+
|
|
139
|
+
return await client.messages.create(**kwargs)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _get_stream_processor(
|
|
143
|
+
self,
|
|
144
|
+
stream_options: Optional[StreamOptions] = None,
|
|
145
|
+
) -> AnthropicStreamProcessor:
|
|
146
|
+
"""Get anthropic-specific StreamProcessor."""
|
|
147
|
+
return AnthropicStreamProcessor(stream_options=stream_options)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def _get_completion_stream(
|
|
151
|
+
self,
|
|
152
|
+
api_key: str,
|
|
153
|
+
model: str,
|
|
154
|
+
messages: list[AnthropicMessageParam],
|
|
155
|
+
max_tokens: int,
|
|
156
|
+
tools: Optional[list[AnthropicToolDefinition]] = None,
|
|
157
|
+
tool_choice: Optional[AnthropicToolChoice] = None,
|
|
158
|
+
temperature: Optional[float] = None,
|
|
159
|
+
system: Optional[List[AnthropicTextBlockParam]] = None,
|
|
160
|
+
timeout: Optional[float] = None,
|
|
161
|
+
) -> AsyncStream[AnthropicStreamEvent]:
|
|
162
|
+
"""Stream chat response events from Anthropic.
|
|
163
|
+
|
|
164
|
+
[Client](https://github.com/anthropics/anthropic-sdk-python)
|
|
165
|
+
"""
|
|
166
|
+
client = AsyncAnthropic(api_key=api_key)
|
|
167
|
+
|
|
168
|
+
# Build kwargs dict with required values
|
|
169
|
+
kwargs = {
|
|
170
|
+
"model": model,
|
|
171
|
+
"messages": messages,
|
|
172
|
+
"max_tokens": max_tokens,
|
|
173
|
+
"stream": True,
|
|
174
|
+
# Disable thinking
|
|
175
|
+
"thinking": {
|
|
176
|
+
"type": "disabled"
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Only add optional parameters if they're provided
|
|
181
|
+
if tools is not None:
|
|
182
|
+
kwargs["tools"] = tools
|
|
183
|
+
if tool_choice is not None:
|
|
184
|
+
kwargs["tool_choice"] = tool_choice
|
|
185
|
+
if temperature is not None:
|
|
186
|
+
kwargs["temperature"] = temperature
|
|
187
|
+
if system is not None:
|
|
188
|
+
kwargs["system"] = system
|
|
189
|
+
if timeout is not None:
|
|
190
|
+
kwargs["timeout"] = timeout
|
|
191
|
+
|
|
192
|
+
return await client.messages.create(**kwargs)
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from typing import Any, AsyncIterable, Dict, List, Optional
|
|
2
|
+
from anthropic.types import (
|
|
3
|
+
RawMessageStreamEvent as AnthropicStreamEvent,
|
|
4
|
+
RawMessageStartEvent as AnthropicMessageStartEvent,
|
|
5
|
+
RawContentBlockStartEvent as AnthropicStartEvent,
|
|
6
|
+
RawContentBlockDeltaEvent as AnthropicContentDeltaEvent,
|
|
7
|
+
RawMessageDeltaEvent as AnthropicMessageDeltaEvent,
|
|
8
|
+
TextDelta as AnthropicTextDelta,
|
|
9
|
+
InputJSONDelta as AnthropicJsonDelta,
|
|
10
|
+
ToolUseBlock as AnthropicToolCall,
|
|
11
|
+
TextBlock as AnthropicTextBlock,
|
|
12
|
+
)
|
|
13
|
+
from primfunctions.completions.messages import ToolCall, AssistantMessage
|
|
14
|
+
from primfunctions.completions.response import ChatCompletionResponse
|
|
15
|
+
from primfunctions.completions.streaming import (
|
|
16
|
+
ChatCompletionChunk,
|
|
17
|
+
AssistantMessageDeltaChunk,
|
|
18
|
+
AssistantMessageSentenceChunk,
|
|
19
|
+
FinishReasonChunk,
|
|
20
|
+
ToolCallChunk,
|
|
21
|
+
UsageChunk,
|
|
22
|
+
FinalResponseChunk,
|
|
23
|
+
)
|
|
24
|
+
from primfunctions.utils.streaming import update_sentence_buffer, clean_text_for_speech
|
|
25
|
+
from primfunctions.completions.request import StreamOptions
|
|
26
|
+
|
|
27
|
+
from ..base import StreamProcessor, PartialToolCall
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AnthropicStreamProcessor(StreamProcessor):
|
|
31
|
+
"""Processes Anthropic message stream events yielding normalized chunks."""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
stream_options: Optional[StreamOptions] = None,
|
|
36
|
+
):
|
|
37
|
+
self.stream_sentences: bool = False
|
|
38
|
+
self.clean_sentences: bool = True
|
|
39
|
+
self.min_sentence_length: int = 6
|
|
40
|
+
self.punctuation_marks: Optional[list[str]] = None
|
|
41
|
+
self.punctuation_language: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
# Override stream options defaults
|
|
44
|
+
if stream_options:
|
|
45
|
+
self.stream_sentences = stream_options.stream_sentences
|
|
46
|
+
self.clean_sentences = stream_options.clean_sentences
|
|
47
|
+
self.min_sentence_length = stream_options.min_sentence_length
|
|
48
|
+
self.punctuation_marks = stream_options.punctuation_marks
|
|
49
|
+
self.punctuation_language = stream_options.punctuation_language
|
|
50
|
+
|
|
51
|
+
self.active_call: PartialToolCall = None
|
|
52
|
+
self.active_call_index = 0
|
|
53
|
+
self.content: str = ""
|
|
54
|
+
self.tool_calls: List[ToolCall] = []
|
|
55
|
+
self.finish_reason: str = ""
|
|
56
|
+
self.usage: Dict[str, Any] = {}
|
|
57
|
+
self.sentence_buffer = ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _process_text_partial(self, text: str) -> List[ChatCompletionChunk]:
|
|
61
|
+
"""Process text partial."""
|
|
62
|
+
chunks: List[ChatCompletionChunk] = []
|
|
63
|
+
|
|
64
|
+
if not text:
|
|
65
|
+
return chunks
|
|
66
|
+
|
|
67
|
+
if self.stream_sentences:
|
|
68
|
+
# Append delta to sentence buffer
|
|
69
|
+
sentence_buffer, complete_sentence = update_sentence_buffer(
|
|
70
|
+
content=text,
|
|
71
|
+
sentence_buffer=self.sentence_buffer,
|
|
72
|
+
punctuation_marks=self.punctuation_marks,
|
|
73
|
+
clean_text=self.clean_sentences,
|
|
74
|
+
min_sentence_length=self.min_sentence_length,
|
|
75
|
+
)
|
|
76
|
+
self.sentence_buffer = sentence_buffer
|
|
77
|
+
|
|
78
|
+
if complete_sentence:
|
|
79
|
+
chunks.append(AssistantMessageSentenceChunk(
|
|
80
|
+
sentence=complete_sentence
|
|
81
|
+
))
|
|
82
|
+
else:
|
|
83
|
+
# Otherwise stream content delta directly
|
|
84
|
+
chunks.append(AssistantMessageDeltaChunk(
|
|
85
|
+
content=text
|
|
86
|
+
))
|
|
87
|
+
|
|
88
|
+
# Add content delta to accumulated response
|
|
89
|
+
self.content += text
|
|
90
|
+
|
|
91
|
+
return chunks
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _process_event(
|
|
95
|
+
self,
|
|
96
|
+
event: AnthropicStreamEvent,
|
|
97
|
+
) -> List[ChatCompletionChunk]:
|
|
98
|
+
"""Convert Anthropic streaming event to individual typed chunks."""
|
|
99
|
+
chunks = []
|
|
100
|
+
|
|
101
|
+
match event.type:
|
|
102
|
+
case "message_start":
|
|
103
|
+
msg_start_event: AnthropicMessageStartEvent = event
|
|
104
|
+
|
|
105
|
+
# Capture usage from message start
|
|
106
|
+
self.usage.update(msg_start_event.message.usage.model_dump())
|
|
107
|
+
|
|
108
|
+
case "content_block_start":
|
|
109
|
+
start_event: AnthropicStartEvent = event
|
|
110
|
+
|
|
111
|
+
# For tool_use blocks, initialize the partial tool call
|
|
112
|
+
if start_event.content_block.type == "tool_use":
|
|
113
|
+
tool_block: AnthropicToolCall = start_event.content_block
|
|
114
|
+
self.active_call = PartialToolCall(
|
|
115
|
+
id=tool_block.id,
|
|
116
|
+
type="function",
|
|
117
|
+
function_name=tool_block.name,
|
|
118
|
+
arguments_buffer="",
|
|
119
|
+
index=self.active_call_index,
|
|
120
|
+
)
|
|
121
|
+
elif start_event.content_block.type == "text":
|
|
122
|
+
text_block: AnthropicTextBlock = start_event.content_block
|
|
123
|
+
chunks.extend(self._process_text_partial(text_block.text))
|
|
124
|
+
|
|
125
|
+
case "content_block_delta":
|
|
126
|
+
delta_event: AnthropicContentDeltaEvent = event
|
|
127
|
+
|
|
128
|
+
if hasattr(delta_event.delta, 'type'):
|
|
129
|
+
# Handle text content delta
|
|
130
|
+
if delta_event.delta.type == "text_delta":
|
|
131
|
+
text_delta: AnthropicTextDelta = delta_event.delta
|
|
132
|
+
chunks.extend(self._process_text_partial(text_delta.text))
|
|
133
|
+
|
|
134
|
+
# Handle JSON content delta
|
|
135
|
+
if delta_event.delta.type == "input_json_delta" and self.active_call:
|
|
136
|
+
json_delta: AnthropicJsonDelta = delta_event.delta
|
|
137
|
+
self.active_call.arguments_buffer += json_delta.partial_json
|
|
138
|
+
|
|
139
|
+
case "content_block_stop":
|
|
140
|
+
# Only special handling for tool call
|
|
141
|
+
if self.active_call:
|
|
142
|
+
tool_call = self.active_call.to_tool_call()
|
|
143
|
+
chunks.append(ToolCallChunk(tool_call=tool_call))
|
|
144
|
+
self.tool_calls.append(tool_call)
|
|
145
|
+
self.active_call = None
|
|
146
|
+
self.active_call_index += 1
|
|
147
|
+
|
|
148
|
+
case "message_delta":
|
|
149
|
+
msg_delta_event: AnthropicMessageDeltaEvent = event
|
|
150
|
+
|
|
151
|
+
# Handle usage information if present
|
|
152
|
+
if msg_delta_event.usage:
|
|
153
|
+
self.usage.update(msg_delta_event.usage.model_dump())
|
|
154
|
+
|
|
155
|
+
# Handle finish reason from delta
|
|
156
|
+
if msg_delta_event.delta.stop_reason:
|
|
157
|
+
self.finish_reason = msg_delta_event.delta.stop_reason
|
|
158
|
+
|
|
159
|
+
case "message_stop":
|
|
160
|
+
# End of message - no special handling
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
return chunks
|
|
164
|
+
|
|
165
|
+
async def process_stream(
|
|
166
|
+
self,
|
|
167
|
+
stream: AsyncIterable[AnthropicStreamEvent],
|
|
168
|
+
) -> AsyncIterable[ChatCompletionChunk]:
|
|
169
|
+
"""Process Anthropic event stream and yield normalized chunks."""
|
|
170
|
+
async for event in stream:
|
|
171
|
+
for chunk in self._process_event(event):
|
|
172
|
+
yield chunk
|
|
173
|
+
|
|
174
|
+
# Handle remaining sentence buffer if streaming sentences
|
|
175
|
+
if self.stream_sentences and self.sentence_buffer:
|
|
176
|
+
complete_sentence = clean_text_for_speech(self.sentence_buffer) if self.clean_sentences else self.sentence_buffer
|
|
177
|
+
yield AssistantMessageSentenceChunk(
|
|
178
|
+
sentence=complete_sentence
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Yield the finish chunk (or default)
|
|
182
|
+
yield FinishReasonChunk(finish_reason=self.finish_reason or "stop")
|
|
183
|
+
|
|
184
|
+
# Yield the usage chunk
|
|
185
|
+
yield UsageChunk(usage=self.usage)
|
|
186
|
+
|
|
187
|
+
# Yield aggregated chat completion response as final chunk
|
|
188
|
+
yield FinalResponseChunk(
|
|
189
|
+
response=ChatCompletionResponse(
|
|
190
|
+
message=AssistantMessage(
|
|
191
|
+
content=self.content or None,
|
|
192
|
+
tool_calls=self.tool_calls or None,
|
|
193
|
+
),
|
|
194
|
+
usage=self.usage,
|
|
195
|
+
finish_reason=self.finish_reason,
|
|
196
|
+
)
|
|
197
|
+
)
|