spaik-sdk 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. spaik_sdk/__init__.py +21 -0
  2. spaik_sdk/agent/__init__.py +0 -0
  3. spaik_sdk/agent/base_agent.py +249 -0
  4. spaik_sdk/attachments/__init__.py +22 -0
  5. spaik_sdk/attachments/builder.py +61 -0
  6. spaik_sdk/attachments/file_storage_provider.py +27 -0
  7. spaik_sdk/attachments/mime_types.py +118 -0
  8. spaik_sdk/attachments/models.py +63 -0
  9. spaik_sdk/attachments/provider_support.py +53 -0
  10. spaik_sdk/attachments/storage/__init__.py +0 -0
  11. spaik_sdk/attachments/storage/base_file_storage.py +32 -0
  12. spaik_sdk/attachments/storage/impl/__init__.py +0 -0
  13. spaik_sdk/attachments/storage/impl/local_file_storage.py +101 -0
  14. spaik_sdk/audio/__init__.py +12 -0
  15. spaik_sdk/audio/options.py +53 -0
  16. spaik_sdk/audio/providers/__init__.py +1 -0
  17. spaik_sdk/audio/providers/google_tts.py +77 -0
  18. spaik_sdk/audio/providers/openai_stt.py +71 -0
  19. spaik_sdk/audio/providers/openai_tts.py +111 -0
  20. spaik_sdk/audio/stt.py +61 -0
  21. spaik_sdk/audio/tts.py +124 -0
  22. spaik_sdk/config/credentials_provider.py +10 -0
  23. spaik_sdk/config/env.py +59 -0
  24. spaik_sdk/config/env_credentials_provider.py +7 -0
  25. spaik_sdk/config/get_credentials_provider.py +14 -0
  26. spaik_sdk/image_gen/__init__.py +9 -0
  27. spaik_sdk/image_gen/image_generator.py +83 -0
  28. spaik_sdk/image_gen/options.py +24 -0
  29. spaik_sdk/image_gen/providers/__init__.py +0 -0
  30. spaik_sdk/image_gen/providers/google.py +75 -0
  31. spaik_sdk/image_gen/providers/openai.py +60 -0
  32. spaik_sdk/llm/__init__.py +0 -0
  33. spaik_sdk/llm/cancellation_handle.py +10 -0
  34. spaik_sdk/llm/consumption/__init__.py +0 -0
  35. spaik_sdk/llm/consumption/consumption_estimate.py +26 -0
  36. spaik_sdk/llm/consumption/consumption_estimate_builder.py +113 -0
  37. spaik_sdk/llm/consumption/consumption_extractor.py +59 -0
  38. spaik_sdk/llm/consumption/token_usage.py +31 -0
  39. spaik_sdk/llm/converters.py +146 -0
  40. spaik_sdk/llm/cost/__init__.py +1 -0
  41. spaik_sdk/llm/cost/builtin_cost_provider.py +83 -0
  42. spaik_sdk/llm/cost/cost_estimate.py +8 -0
  43. spaik_sdk/llm/cost/cost_provider.py +28 -0
  44. spaik_sdk/llm/extract_error_message.py +37 -0
  45. spaik_sdk/llm/langchain_loop_manager.py +270 -0
  46. spaik_sdk/llm/langchain_service.py +196 -0
  47. spaik_sdk/llm/message_handler.py +188 -0
  48. spaik_sdk/llm/streaming/__init__.py +1 -0
  49. spaik_sdk/llm/streaming/block_manager.py +152 -0
  50. spaik_sdk/llm/streaming/models.py +42 -0
  51. spaik_sdk/llm/streaming/streaming_content_handler.py +157 -0
  52. spaik_sdk/llm/streaming/streaming_event_handler.py +215 -0
  53. spaik_sdk/llm/streaming/streaming_state_manager.py +58 -0
  54. spaik_sdk/models/__init__.py +0 -0
  55. spaik_sdk/models/factories/__init__.py +0 -0
  56. spaik_sdk/models/factories/anthropic_factory.py +33 -0
  57. spaik_sdk/models/factories/base_model_factory.py +71 -0
  58. spaik_sdk/models/factories/google_factory.py +30 -0
  59. spaik_sdk/models/factories/ollama_factory.py +41 -0
  60. spaik_sdk/models/factories/openai_factory.py +50 -0
  61. spaik_sdk/models/llm_config.py +46 -0
  62. spaik_sdk/models/llm_families.py +7 -0
  63. spaik_sdk/models/llm_model.py +17 -0
  64. spaik_sdk/models/llm_wrapper.py +25 -0
  65. spaik_sdk/models/model_registry.py +156 -0
  66. spaik_sdk/models/providers/__init__.py +0 -0
  67. spaik_sdk/models/providers/anthropic_provider.py +29 -0
  68. spaik_sdk/models/providers/azure_provider.py +31 -0
  69. spaik_sdk/models/providers/base_provider.py +62 -0
  70. spaik_sdk/models/providers/google_provider.py +26 -0
  71. spaik_sdk/models/providers/ollama_provider.py +26 -0
  72. spaik_sdk/models/providers/openai_provider.py +26 -0
  73. spaik_sdk/models/providers/provider_type.py +90 -0
  74. spaik_sdk/orchestration/__init__.py +24 -0
  75. spaik_sdk/orchestration/base_orchestrator.py +238 -0
  76. spaik_sdk/orchestration/checkpoint.py +80 -0
  77. spaik_sdk/orchestration/models.py +103 -0
  78. spaik_sdk/prompt/__init__.py +0 -0
  79. spaik_sdk/prompt/get_prompt_loader.py +13 -0
  80. spaik_sdk/prompt/local_prompt_loader.py +21 -0
  81. spaik_sdk/prompt/prompt_loader.py +48 -0
  82. spaik_sdk/prompt/prompt_loader_mode.py +14 -0
  83. spaik_sdk/py.typed +1 -0
  84. spaik_sdk/recording/__init__.py +1 -0
  85. spaik_sdk/recording/base_playback.py +90 -0
  86. spaik_sdk/recording/base_recorder.py +50 -0
  87. spaik_sdk/recording/conditional_recorder.py +38 -0
  88. spaik_sdk/recording/impl/__init__.py +1 -0
  89. spaik_sdk/recording/impl/local_playback.py +76 -0
  90. spaik_sdk/recording/impl/local_recorder.py +85 -0
  91. spaik_sdk/recording/langchain_serializer.py +88 -0
  92. spaik_sdk/server/__init__.py +1 -0
  93. spaik_sdk/server/api/routers/__init__.py +0 -0
  94. spaik_sdk/server/api/routers/api_builder.py +149 -0
  95. spaik_sdk/server/api/routers/audio_router_factory.py +201 -0
  96. spaik_sdk/server/api/routers/file_router_factory.py +111 -0
  97. spaik_sdk/server/api/routers/thread_router_factory.py +284 -0
  98. spaik_sdk/server/api/streaming/__init__.py +0 -0
  99. spaik_sdk/server/api/streaming/format_sse_event.py +41 -0
  100. spaik_sdk/server/api/streaming/negotiate_streaming_response.py +8 -0
  101. spaik_sdk/server/api/streaming/streaming_negotiator.py +10 -0
  102. spaik_sdk/server/authorization/__init__.py +0 -0
  103. spaik_sdk/server/authorization/base_authorizer.py +64 -0
  104. spaik_sdk/server/authorization/base_user.py +13 -0
  105. spaik_sdk/server/authorization/dummy_authorizer.py +17 -0
  106. spaik_sdk/server/job_processor/__init__.py +0 -0
  107. spaik_sdk/server/job_processor/base_job_processor.py +8 -0
  108. spaik_sdk/server/job_processor/thread_job_processor.py +32 -0
  109. spaik_sdk/server/pubsub/__init__.py +1 -0
  110. spaik_sdk/server/pubsub/cancellation_publisher.py +7 -0
  111. spaik_sdk/server/pubsub/cancellation_subscriber.py +38 -0
  112. spaik_sdk/server/pubsub/event_publisher.py +13 -0
  113. spaik_sdk/server/pubsub/impl/__init__.py +1 -0
  114. spaik_sdk/server/pubsub/impl/local_cancellation_pubsub.py +48 -0
  115. spaik_sdk/server/pubsub/impl/signalr_publisher.py +36 -0
  116. spaik_sdk/server/queue/__init__.py +1 -0
  117. spaik_sdk/server/queue/agent_job_queue.py +27 -0
  118. spaik_sdk/server/queue/impl/__init__.py +1 -0
  119. spaik_sdk/server/queue/impl/azure_queue.py +24 -0
  120. spaik_sdk/server/response/__init__.py +0 -0
  121. spaik_sdk/server/response/agent_response_generator.py +39 -0
  122. spaik_sdk/server/response/response_generator.py +13 -0
  123. spaik_sdk/server/response/simple_agent_response_generator.py +14 -0
  124. spaik_sdk/server/services/__init__.py +0 -0
  125. spaik_sdk/server/services/thread_converters.py +113 -0
  126. spaik_sdk/server/services/thread_models.py +90 -0
  127. spaik_sdk/server/services/thread_service.py +91 -0
  128. spaik_sdk/server/storage/__init__.py +1 -0
  129. spaik_sdk/server/storage/base_thread_repository.py +51 -0
  130. spaik_sdk/server/storage/impl/__init__.py +0 -0
  131. spaik_sdk/server/storage/impl/in_memory_thread_repository.py +100 -0
  132. spaik_sdk/server/storage/impl/local_file_thread_repository.py +217 -0
  133. spaik_sdk/server/storage/thread_filter.py +166 -0
  134. spaik_sdk/server/storage/thread_metadata.py +53 -0
  135. spaik_sdk/thread/__init__.py +0 -0
  136. spaik_sdk/thread/adapters/__init__.py +0 -0
  137. spaik_sdk/thread/adapters/cli/__init__.py +0 -0
  138. spaik_sdk/thread/adapters/cli/block_display.py +92 -0
  139. spaik_sdk/thread/adapters/cli/display_manager.py +84 -0
  140. spaik_sdk/thread/adapters/cli/live_cli.py +235 -0
  141. spaik_sdk/thread/adapters/event_adapter.py +28 -0
  142. spaik_sdk/thread/adapters/streaming_block_adapter.py +57 -0
  143. spaik_sdk/thread/adapters/sync_adapter.py +76 -0
  144. spaik_sdk/thread/models.py +224 -0
  145. spaik_sdk/thread/thread_container.py +468 -0
  146. spaik_sdk/tools/__init__.py +0 -0
  147. spaik_sdk/tools/impl/__init__.py +0 -0
  148. spaik_sdk/tools/impl/mcp_tool_provider.py +93 -0
  149. spaik_sdk/tools/impl/search_tool_provider.py +18 -0
  150. spaik_sdk/tools/tool_provider.py +131 -0
  151. spaik_sdk/tracing/__init__.py +13 -0
  152. spaik_sdk/tracing/agent_trace.py +72 -0
  153. spaik_sdk/tracing/get_trace_sink.py +15 -0
  154. spaik_sdk/tracing/local_trace_sink.py +23 -0
  155. spaik_sdk/tracing/trace_sink.py +19 -0
  156. spaik_sdk/tracing/trace_sink_mode.py +14 -0
  157. spaik_sdk/utils/__init__.py +0 -0
  158. spaik_sdk/utils/init_logger.py +24 -0
  159. spaik_sdk-0.6.2.dist-info/METADATA +379 -0
  160. spaik_sdk-0.6.2.dist-info/RECORD +161 -0
  161. spaik_sdk-0.6.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,101 @@
1
+ import json
2
+ import uuid
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from spaik_sdk.attachments.models import FileMetadata
7
+ from spaik_sdk.attachments.storage.base_file_storage import BaseFileStorage
8
+
9
+
10
+ class LocalFileStorage(BaseFileStorage):
11
+ def __init__(self, data_dir: str = "data/files"):
12
+ self.data_dir = Path(data_dir)
13
+ self.files_dir = self.data_dir / "content"
14
+ self.metadata_dir = self.data_dir / "metadata"
15
+
16
+ self.files_dir.mkdir(parents=True, exist_ok=True)
17
+ self.metadata_dir.mkdir(parents=True, exist_ok=True)
18
+
19
+ def _file_path(self, file_id: str) -> Path:
20
+ return self.files_dir / file_id
21
+
22
+ def _metadata_path(self, file_id: str) -> Path:
23
+ return self.metadata_dir / f"{file_id}.json"
24
+
25
+ def _save_metadata(self, metadata: FileMetadata) -> None:
26
+ with open(self._metadata_path(metadata.file_id), "w") as f:
27
+ json.dump(metadata.to_dict(), f)
28
+
29
+ def _load_metadata(self, file_id: str) -> Optional[FileMetadata]:
30
+ metadata_path = self._metadata_path(file_id)
31
+ if not metadata_path.exists():
32
+ return None
33
+ try:
34
+ with open(metadata_path, "r") as f:
35
+ return FileMetadata.from_dict(json.load(f))
36
+ except (json.JSONDecodeError, KeyError):
37
+ return None
38
+
39
+ async def store(
40
+ self,
41
+ data: bytes,
42
+ mime_type: str,
43
+ owner_id: str,
44
+ filename: Optional[str] = None,
45
+ ) -> FileMetadata:
46
+ file_id = str(uuid.uuid4())
47
+
48
+ file_path = self._file_path(file_id)
49
+ with open(file_path, "wb") as f:
50
+ f.write(data)
51
+
52
+ metadata = FileMetadata(
53
+ file_id=file_id,
54
+ mime_type=mime_type,
55
+ owner_id=owner_id,
56
+ size_bytes=len(data),
57
+ filename=filename,
58
+ )
59
+ self._save_metadata(metadata)
60
+
61
+ return metadata
62
+
63
+ async def retrieve(self, file_id: str) -> tuple[bytes, FileMetadata]:
64
+ metadata = await self.get_metadata(file_id)
65
+ if metadata is None:
66
+ raise FileNotFoundError(f"File not found: {file_id}")
67
+
68
+ file_path = self._file_path(file_id)
69
+ if not file_path.exists():
70
+ raise FileNotFoundError(f"File content not found: {file_id}")
71
+
72
+ with open(file_path, "rb") as f:
73
+ data = f.read()
74
+
75
+ return data, metadata
76
+
77
+ async def get_metadata(self, file_id: str) -> Optional[FileMetadata]:
78
+ return self._load_metadata(file_id)
79
+
80
+ async def delete(self, file_id: str) -> bool:
81
+ file_path = self._file_path(file_id)
82
+ metadata_path = self._metadata_path(file_id)
83
+
84
+ deleted = False
85
+ if file_path.exists():
86
+ file_path.unlink()
87
+ deleted = True
88
+ if metadata_path.exists():
89
+ metadata_path.unlink()
90
+ deleted = True
91
+
92
+ return deleted
93
+
94
+ async def exists(self, file_id: str) -> bool:
95
+ return self._file_path(file_id).exists() and self._metadata_path(file_id).exists()
96
+
97
+ def clear_all(self) -> None:
98
+ for file_path in self.files_dir.glob("*"):
99
+ file_path.unlink()
100
+ for metadata_path in self.metadata_dir.glob("*.json"):
101
+ metadata_path.unlink()
@@ -0,0 +1,12 @@
1
+ from spaik_sdk.audio.options import AudioFormat, STTOptions, TTSOptions, TTSVoice
2
+ from spaik_sdk.audio.stt import SpeechToText
3
+ from spaik_sdk.audio.tts import TextToSpeech
4
+
5
+ __all__ = [
6
+ "TextToSpeech",
7
+ "SpeechToText",
8
+ "TTSOptions",
9
+ "STTOptions",
10
+ "TTSVoice",
11
+ "AudioFormat",
12
+ ]
@@ -0,0 +1,53 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import Enum
3
+ from typing import Any
4
+
5
+
6
+ class AudioFormat(Enum):
7
+ MP3 = "mp3"
8
+ OPUS = "opus"
9
+ AAC = "aac"
10
+ FLAC = "flac"
11
+ WAV = "wav"
12
+ PCM = "pcm"
13
+
14
+
15
+ class TTSVoice(Enum):
16
+ """Common TTS voices across providers."""
17
+
18
+ # OpenAI voices
19
+ ALLOY = "alloy"
20
+ ECHO = "echo"
21
+ FABLE = "fable"
22
+ ONYX = "onyx"
23
+ NOVA = "nova"
24
+ SHIMMER = "shimmer"
25
+
26
+ # Gemini voices (subset)
27
+ ZEPHYR = "Zephyr"
28
+ PUCK = "Puck"
29
+ CHARON = "Charon"
30
+ KORE = "Kore"
31
+ FENRIR = "Fenrir"
32
+ AOEDE = "Aoede"
33
+
34
+
35
+ @dataclass
36
+ class TTSOptions:
37
+ """Options for text-to-speech synthesis."""
38
+
39
+ voice: str = "alloy"
40
+ speed: float = 1.0
41
+ output_format: AudioFormat = AudioFormat.MP3
42
+ language: str | None = None
43
+ vendor: dict[str, Any] = field(default_factory=dict)
44
+
45
+
46
+ @dataclass
47
+ class STTOptions:
48
+ """Options for speech-to-text transcription."""
49
+
50
+ language: str | None = None
51
+ prompt: str | None = None
52
+ temperature: float = 0.0
53
+ vendor: dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1 @@
1
+ # Audio providers
@@ -0,0 +1,77 @@
1
+ import base64
2
+
3
+ import httpx
4
+
5
+ from spaik_sdk.audio.options import TTSOptions
6
+
7
+ GOOGLE_GENERATIVE_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models"
8
+
9
+
10
+ async def synthesize(
11
+ text: str,
12
+ model: str,
13
+ api_key: str,
14
+ options: TTSOptions,
15
+ endpoint: str | None = None,
16
+ headers: dict[str, str] | None = None,
17
+ ) -> bytes:
18
+ """
19
+ Synthesize speech using Google's Gemini TTS API.
20
+
21
+ Args:
22
+ text: The text to convert to speech
23
+ model: The model to use (e.g., "gemini-2.5-flash-tts", "gemini-2.5-pro-tts")
24
+ api_key: Google API key
25
+ options: TTS options
26
+ endpoint: Optional custom endpoint
27
+ headers: Optional additional headers
28
+
29
+ Returns:
30
+ Audio bytes (WAV format from Gemini)
31
+ """
32
+ base_url = endpoint or GOOGLE_GENERATIVE_ENDPOINT
33
+ url = f"{base_url}/{model}:generateContent?key={api_key}"
34
+
35
+ request_headers = {
36
+ "Content-Type": "application/json",
37
+ }
38
+ if headers:
39
+ request_headers.update(headers)
40
+
41
+ # Build the speech config
42
+ speech_config: dict = {
43
+ "voiceConfig": {
44
+ "prebuiltVoiceConfig": {
45
+ "voiceName": options.voice,
46
+ }
47
+ }
48
+ }
49
+
50
+ generation_config: dict = {
51
+ "responseModalities": ["AUDIO"],
52
+ "speechConfig": speech_config,
53
+ }
54
+ generation_config.update(options.vendor)
55
+
56
+ payload = {
57
+ "contents": [{"parts": [{"text": text}]}],
58
+ "generationConfig": generation_config,
59
+ }
60
+
61
+ async with httpx.AsyncClient(timeout=120.0) as client:
62
+ response = await client.post(url, headers=request_headers, json=payload)
63
+ if response.status_code != 200:
64
+ raise ValueError(f"Google TTS API error {response.status_code}: {response.text}")
65
+ data = response.json()
66
+
67
+ candidates = data.get("candidates", [])
68
+ if not candidates:
69
+ raise ValueError("No audio generated by Gemini TTS API")
70
+
71
+ parts = candidates[0].get("content", {}).get("parts", [])
72
+ for part in parts:
73
+ if "inlineData" in part:
74
+ audio_b64 = part["inlineData"]["data"]
75
+ return base64.b64decode(audio_b64)
76
+
77
+ raise ValueError("No audio data found in Gemini TTS API response")
@@ -0,0 +1,71 @@
1
+ import httpx
2
+
3
+ from spaik_sdk.audio.options import STTOptions
4
+
5
+ OPENAI_STT_ENDPOINT = "https://api.openai.com/v1/audio/transcriptions"
6
+
7
+
8
+ async def transcribe(
9
+ audio_bytes: bytes,
10
+ model: str,
11
+ api_key: str,
12
+ options: STTOptions,
13
+ filename: str = "audio.webm",
14
+ endpoint: str | None = None,
15
+ headers: dict[str, str] | None = None,
16
+ ) -> str:
17
+ """
18
+ Transcribe audio using OpenAI's Whisper API.
19
+
20
+ Args:
21
+ audio_bytes: The audio data to transcribe
22
+ model: The model to use (e.g., "whisper-1", "gpt-4o-transcribe")
23
+ api_key: OpenAI API key
24
+ options: STT options
25
+ filename: Filename hint for the audio format
26
+ endpoint: Optional custom endpoint
27
+ headers: Optional additional headers
28
+
29
+ Returns:
30
+ Transcribed text
31
+ """
32
+ url = endpoint or OPENAI_STT_ENDPOINT
33
+
34
+ request_headers = {
35
+ "Authorization": f"Bearer {api_key}",
36
+ }
37
+ if headers:
38
+ request_headers.update(headers)
39
+
40
+ # Determine content type from filename
41
+ content_type = "audio/webm"
42
+ if filename.endswith(".mp3"):
43
+ content_type = "audio/mpeg"
44
+ elif filename.endswith(".wav"):
45
+ content_type = "audio/wav"
46
+ elif filename.endswith(".m4a"):
47
+ content_type = "audio/mp4"
48
+ elif filename.endswith(".ogg"):
49
+ content_type = "audio/ogg"
50
+
51
+ # Build multipart form data
52
+ files = {
53
+ "file": (filename, audio_bytes, content_type),
54
+ }
55
+ data: dict[str, str] = {
56
+ "model": model,
57
+ "response_format": "text",
58
+ }
59
+
60
+ if options.language:
61
+ data["language"] = options.language
62
+ if options.prompt:
63
+ data["prompt"] = options.prompt
64
+ if options.temperature > 0:
65
+ data["temperature"] = str(options.temperature)
66
+
67
+ async with httpx.AsyncClient(timeout=120.0) as client:
68
+ response = await client.post(url, headers=request_headers, files=files, data=data)
69
+ if response.status_code != 200:
70
+ raise ValueError(f"OpenAI STT API error {response.status_code}: {response.text}")
71
+ return response.text.strip()
@@ -0,0 +1,111 @@
1
+ from collections.abc import AsyncIterator
2
+
3
+ import httpx
4
+
5
+ from spaik_sdk.audio.options import AudioFormat, TTSOptions
6
+
7
+ OPENAI_TTS_ENDPOINT = "https://api.openai.com/v1/audio/speech"
8
+
9
+
10
+ def _get_format_map() -> dict[AudioFormat, str]:
11
+ return {
12
+ AudioFormat.MP3: "mp3",
13
+ AudioFormat.OPUS: "opus",
14
+ AudioFormat.AAC: "aac",
15
+ AudioFormat.FLAC: "flac",
16
+ AudioFormat.WAV: "wav",
17
+ AudioFormat.PCM: "pcm",
18
+ }
19
+
20
+
21
+ def _build_payload(text: str, model: str, options: TTSOptions) -> dict:
22
+ format_map = _get_format_map()
23
+ payload: dict = {
24
+ "model": model,
25
+ "input": text,
26
+ "voice": options.voice,
27
+ "response_format": format_map.get(options.output_format, "mp3"),
28
+ "speed": options.speed,
29
+ }
30
+ payload.update(options.vendor)
31
+ return payload
32
+
33
+
34
+ def _build_headers(api_key: str, extra_headers: dict[str, str] | None = None) -> dict[str, str]:
35
+ headers = {
36
+ "Authorization": f"Bearer {api_key}",
37
+ "Content-Type": "application/json",
38
+ }
39
+ if extra_headers:
40
+ headers.update(extra_headers)
41
+ return headers
42
+
43
+
44
+ async def synthesize(
45
+ text: str,
46
+ model: str,
47
+ api_key: str,
48
+ options: TTSOptions,
49
+ endpoint: str | None = None,
50
+ headers: dict[str, str] | None = None,
51
+ ) -> bytes:
52
+ """
53
+ Synthesize speech using OpenAI's TTS API.
54
+
55
+ Args:
56
+ text: The text to convert to speech
57
+ model: The model to use (e.g., "tts-1", "tts-1-hd", "gpt-4o-mini-tts")
58
+ api_key: OpenAI API key
59
+ options: TTS options
60
+ endpoint: Optional custom endpoint
61
+ headers: Optional additional headers
62
+
63
+ Returns:
64
+ Audio bytes in the specified format
65
+ """
66
+ url = endpoint or OPENAI_TTS_ENDPOINT
67
+ request_headers = _build_headers(api_key, headers)
68
+ payload = _build_payload(text, model, options)
69
+
70
+ async with httpx.AsyncClient(timeout=120.0) as client:
71
+ response = await client.post(url, headers=request_headers, json=payload)
72
+ if response.status_code != 200:
73
+ raise ValueError(f"OpenAI TTS API error {response.status_code}: {response.text}")
74
+ return response.content
75
+
76
+
77
+ async def synthesize_stream(
78
+ text: str,
79
+ model: str,
80
+ api_key: str,
81
+ options: TTSOptions,
82
+ endpoint: str | None = None,
83
+ headers: dict[str, str] | None = None,
84
+ ) -> AsyncIterator[bytes]:
85
+ """
86
+ Stream synthesized speech using OpenAI's TTS API.
87
+
88
+ Yields audio chunks as they arrive, allowing playback to start immediately.
89
+
90
+ Args:
91
+ text: The text to convert to speech
92
+ model: The model to use (e.g., "tts-1", "tts-1-hd", "gpt-4o-mini-tts")
93
+ api_key: OpenAI API key
94
+ options: TTS options
95
+ endpoint: Optional custom endpoint
96
+ headers: Optional additional headers
97
+
98
+ Yields:
99
+ Audio bytes chunks
100
+ """
101
+ url = endpoint or OPENAI_TTS_ENDPOINT
102
+ request_headers = _build_headers(api_key, headers)
103
+ payload = _build_payload(text, model, options)
104
+
105
+ async with httpx.AsyncClient(timeout=120.0) as client:
106
+ async with client.stream("POST", url, headers=request_headers, json=payload) as response:
107
+ if response.status_code != 200:
108
+ content = await response.aread()
109
+ raise ValueError(f"OpenAI TTS API error {response.status_code}: {content.decode()}")
110
+ async for chunk in response.aiter_bytes(chunk_size=4096):
111
+ yield chunk
spaik_sdk/audio/stt.py ADDED
@@ -0,0 +1,61 @@
1
+ from spaik_sdk.audio.options import STTOptions
2
+ from spaik_sdk.audio.providers import openai_stt
3
+ from spaik_sdk.config.env import env_config
4
+ from spaik_sdk.config.get_credentials_provider import credentials_provider
5
+
6
+
7
+ class SpeechToText:
8
+ """
9
+ Speech-to-text transcriber using OpenAI Whisper.
10
+
11
+ Note: Only OpenAI is supported for STT as Gemini doesn't have
12
+ a dedicated speech-to-text API endpoint.
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ model: str | None = None,
18
+ endpoint: str | None = None,
19
+ headers: dict[str, str] | None = None,
20
+ ):
21
+ """
22
+ Initialize the SpeechToText transcriber.
23
+
24
+ Args:
25
+ model: STT model name. If None, uses STT_MODEL env var or defaults to whisper-1.
26
+ endpoint: Optional custom API endpoint.
27
+ headers: Optional additional HTTP headers.
28
+ """
29
+ self.model = model or env_config.get_key("STT_MODEL", "whisper-1", required=False)
30
+ self.endpoint = endpoint
31
+ self.headers = headers
32
+
33
+ async def transcribe(
34
+ self,
35
+ audio_bytes: bytes,
36
+ options: STTOptions | None = None,
37
+ filename: str = "audio.webm",
38
+ ) -> str:
39
+ """
40
+ Transcribe audio to text.
41
+
42
+ Args:
43
+ audio_bytes: The audio data to transcribe.
44
+ options: STT options (language, prompt hint, etc.)
45
+ filename: Filename hint for audio format detection.
46
+
47
+ Returns:
48
+ Transcribed text string.
49
+ """
50
+ opts = options or STTOptions()
51
+ api_key = credentials_provider.get_provider_key("openai")
52
+
53
+ return await openai_stt.transcribe(
54
+ audio_bytes=audio_bytes,
55
+ model=self.model,
56
+ api_key=api_key,
57
+ options=opts,
58
+ filename=filename,
59
+ endpoint=self.endpoint,
60
+ headers=self.headers,
61
+ )
spaik_sdk/audio/tts.py ADDED
@@ -0,0 +1,124 @@
1
+ from collections.abc import AsyncIterator
2
+
3
+ from spaik_sdk.audio.options import TTSOptions
4
+ from spaik_sdk.audio.providers import google_tts, openai_tts
5
+ from spaik_sdk.config.env import env_config
6
+ from spaik_sdk.config.get_credentials_provider import credentials_provider
7
+
8
+
9
+ class TextToSpeech:
10
+ """
11
+ Text-to-speech synthesizer supporting multiple providers.
12
+
13
+ Automatically detects the provider based on the model name.
14
+ Supports OpenAI (tts-1, tts-1-hd, gpt-4o-mini-tts) and
15
+ Google Gemini (gemini-2.5-flash-tts, gemini-2.5-pro-tts).
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ model: str | None = None,
21
+ endpoint: str | None = None,
22
+ headers: dict[str, str] | None = None,
23
+ ):
24
+ """
25
+ Initialize the TextToSpeech synthesizer.
26
+
27
+ Args:
28
+ model: TTS model name. If None, uses TTS_MODEL env var.
29
+ endpoint: Optional custom API endpoint.
30
+ headers: Optional additional HTTP headers.
31
+ """
32
+ self.model = model or env_config.get_key("TTS_MODEL", "tts-1", required=False)
33
+ self.endpoint = endpoint
34
+ self.headers = headers
35
+
36
+ def _get_provider(self) -> str:
37
+ """Determine the provider based on model name."""
38
+ model_lower = self.model.lower()
39
+ if model_lower.startswith("tts-") or model_lower.startswith("gpt-"):
40
+ return "openai"
41
+ elif model_lower.startswith("gemini"):
42
+ return "google"
43
+ else:
44
+ raise ValueError(f"Unknown TTS model provider for: {self.model}")
45
+
46
+ async def synthesize(
47
+ self,
48
+ text: str,
49
+ options: TTSOptions | None = None,
50
+ ) -> bytes:
51
+ """
52
+ Synthesize speech from text.
53
+
54
+ Args:
55
+ text: The text to convert to speech.
56
+ options: TTS options (voice, speed, format, etc.)
57
+
58
+ Returns:
59
+ Audio bytes in the specified format.
60
+ """
61
+ opts = options or TTSOptions()
62
+ provider = self._get_provider()
63
+
64
+ if provider == "openai":
65
+ api_key = credentials_provider.get_provider_key("openai")
66
+ return await openai_tts.synthesize(
67
+ text=text,
68
+ model=self.model,
69
+ api_key=api_key,
70
+ options=opts,
71
+ endpoint=self.endpoint,
72
+ headers=self.headers,
73
+ )
74
+ elif provider == "google":
75
+ api_key = credentials_provider.get_provider_key("google")
76
+ return await google_tts.synthesize(
77
+ text=text,
78
+ model=self.model,
79
+ api_key=api_key,
80
+ options=opts,
81
+ endpoint=self.endpoint,
82
+ headers=self.headers,
83
+ )
84
+ else:
85
+ raise ValueError(f"Unsupported TTS provider: {provider}")
86
+
87
+ async def synthesize_stream(
88
+ self,
89
+ text: str,
90
+ options: TTSOptions | None = None,
91
+ ) -> AsyncIterator[bytes]:
92
+ """
93
+ Stream synthesized speech from text.
94
+
95
+ Yields audio chunks as they arrive, allowing playback to start immediately.
96
+ Currently only supported for OpenAI models.
97
+
98
+ Args:
99
+ text: The text to convert to speech.
100
+ options: TTS options (voice, speed, format, etc.)
101
+
102
+ Yields:
103
+ Audio bytes chunks.
104
+ """
105
+ opts = options or TTSOptions()
106
+ provider = self._get_provider()
107
+
108
+ if provider == "openai":
109
+ api_key = credentials_provider.get_provider_key("openai")
110
+ async for chunk in openai_tts.synthesize_stream(
111
+ text=text,
112
+ model=self.model,
113
+ api_key=api_key,
114
+ options=opts,
115
+ endpoint=self.endpoint,
116
+ headers=self.headers,
117
+ ):
118
+ yield chunk
119
+ elif provider == "google":
120
+ # Google doesn't support streaming, fall back to full synthesis
121
+ audio_bytes = await self.synthesize(text, options)
122
+ yield audio_bytes
123
+ else:
124
+ raise ValueError(f"Unsupported TTS provider: {provider}")
@@ -0,0 +1,10 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class CredentialsProvider(ABC):
5
+ @abstractmethod
6
+ def get_key(self, key: str, default: str = "", required: bool = True) -> str:
7
+ pass
8
+
9
+ def get_provider_key(self, provider: str) -> str:
10
+ return self.get_key(f"{provider.upper()}_API_KEY")
@@ -0,0 +1,59 @@
1
+ import os
2
+ from typing import Dict
3
+
4
+ from spaik_sdk.models.llm_model import LLMModel
5
+ from spaik_sdk.models.model_registry import ModelRegistry
6
+ from spaik_sdk.models.providers.provider_type import ProviderType
7
+ from spaik_sdk.prompt.prompt_loader_mode import PromptLoaderMode
8
+ from spaik_sdk.tracing.trace_sink_mode import TraceSinkMode
9
+
10
+
11
+ class EnvConfig:
12
+ def get_key(self, key: str, default: str = "", required: bool = True) -> str:
13
+ value = os.environ.get(key, default)
14
+ if required and not value:
15
+ raise ValueError(f"Environment variable {key} is required but not set")
16
+ return value
17
+
18
+ def get_azure_keys(self) -> Dict[str, str]:
19
+ return {
20
+ "api_key": self.get_key("AZURE_API_KEY"),
21
+ "api_version": self.get_key("AZURE_API_VERSION"),
22
+ "endpoint": self.get_key("AZURE_ENDPOINT"),
23
+ "o3-mini_deployment": self.get_key("AZURE_O3_MINI_DEPLOYMENT", required=False),
24
+ "gpt-4_1_deployment": self.get_key("AZURE_GPT_4_1_DEPLOYMENT", required=False),
25
+ "gpt-4o_deployment": self.get_key("AZURE_GPT_4O_DEPLOYMENT", required=False),
26
+ }
27
+
28
+ def get_default_model(self) -> LLMModel:
29
+ return ModelRegistry.from_name(self.get_key("DEFAULT_MODEL"))
30
+
31
+ def get_provider_type(self) -> ProviderType:
32
+ provider_type_name = self.get_key("MODEL_PROVIDER", required=False)
33
+ if not provider_type_name:
34
+ return ProviderType.from_model_name(self.get_default_model().name)
35
+ return ProviderType.from_name(provider_type_name)
36
+
37
+ def is_debug_mode(self, key: str) -> bool:
38
+ debug_modes = self.get_key("DEBUG_MODES", required=False)
39
+ if debug_modes:
40
+ return key in debug_modes.split(",")
41
+ return False
42
+
43
+ def get_prompts_dir(self) -> str:
44
+ return self.get_key("PROMPTS_DIR", "prompts")
45
+
46
+ def get_prompt_loader_mode(self) -> PromptLoaderMode:
47
+ return PromptLoaderMode.from_name(self.get_key("PROMPT_LOADER_MODE", "local"))
48
+
49
+ def get_trace_sink_mode(self) -> TraceSinkMode:
50
+ return TraceSinkMode.from_name(self.get_key("TRACE_SINK_MODE", "local"))
51
+
52
+ def get_credentials_provider_type(self) -> str:
53
+ return self.get_key("CREDENTIALS_PROVIDER_TYPE", "env")
54
+
55
+ def get_image_model(self) -> str:
56
+ return self.get_key("IMAGE_MODEL")
57
+
58
+
59
+ env_config = EnvConfig()
@@ -0,0 +1,7 @@
1
+ from spaik_sdk.config.credentials_provider import CredentialsProvider
2
+ from spaik_sdk.config.env import env_config
3
+
4
+
5
+ class EnvCredentialsProvider(CredentialsProvider):
6
+ def get_key(self, key: str, default: str = "", required: bool = True) -> str:
7
+ return env_config.get_key(key, default, required)