spaik-sdk 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. spaik_sdk/__init__.py +21 -0
  2. spaik_sdk/agent/__init__.py +0 -0
  3. spaik_sdk/agent/base_agent.py +249 -0
  4. spaik_sdk/attachments/__init__.py +22 -0
  5. spaik_sdk/attachments/builder.py +61 -0
  6. spaik_sdk/attachments/file_storage_provider.py +27 -0
  7. spaik_sdk/attachments/mime_types.py +118 -0
  8. spaik_sdk/attachments/models.py +63 -0
  9. spaik_sdk/attachments/provider_support.py +53 -0
  10. spaik_sdk/attachments/storage/__init__.py +0 -0
  11. spaik_sdk/attachments/storage/base_file_storage.py +32 -0
  12. spaik_sdk/attachments/storage/impl/__init__.py +0 -0
  13. spaik_sdk/attachments/storage/impl/local_file_storage.py +101 -0
  14. spaik_sdk/audio/__init__.py +12 -0
  15. spaik_sdk/audio/options.py +53 -0
  16. spaik_sdk/audio/providers/__init__.py +1 -0
  17. spaik_sdk/audio/providers/google_tts.py +77 -0
  18. spaik_sdk/audio/providers/openai_stt.py +71 -0
  19. spaik_sdk/audio/providers/openai_tts.py +111 -0
  20. spaik_sdk/audio/stt.py +61 -0
  21. spaik_sdk/audio/tts.py +124 -0
  22. spaik_sdk/config/credentials_provider.py +10 -0
  23. spaik_sdk/config/env.py +59 -0
  24. spaik_sdk/config/env_credentials_provider.py +7 -0
  25. spaik_sdk/config/get_credentials_provider.py +14 -0
  26. spaik_sdk/image_gen/__init__.py +9 -0
  27. spaik_sdk/image_gen/image_generator.py +83 -0
  28. spaik_sdk/image_gen/options.py +24 -0
  29. spaik_sdk/image_gen/providers/__init__.py +0 -0
  30. spaik_sdk/image_gen/providers/google.py +75 -0
  31. spaik_sdk/image_gen/providers/openai.py +60 -0
  32. spaik_sdk/llm/__init__.py +0 -0
  33. spaik_sdk/llm/cancellation_handle.py +10 -0
  34. spaik_sdk/llm/consumption/__init__.py +0 -0
  35. spaik_sdk/llm/consumption/consumption_estimate.py +26 -0
  36. spaik_sdk/llm/consumption/consumption_estimate_builder.py +113 -0
  37. spaik_sdk/llm/consumption/consumption_extractor.py +59 -0
  38. spaik_sdk/llm/consumption/token_usage.py +31 -0
  39. spaik_sdk/llm/converters.py +146 -0
  40. spaik_sdk/llm/cost/__init__.py +1 -0
  41. spaik_sdk/llm/cost/builtin_cost_provider.py +83 -0
  42. spaik_sdk/llm/cost/cost_estimate.py +8 -0
  43. spaik_sdk/llm/cost/cost_provider.py +28 -0
  44. spaik_sdk/llm/extract_error_message.py +37 -0
  45. spaik_sdk/llm/langchain_loop_manager.py +270 -0
  46. spaik_sdk/llm/langchain_service.py +196 -0
  47. spaik_sdk/llm/message_handler.py +188 -0
  48. spaik_sdk/llm/streaming/__init__.py +1 -0
  49. spaik_sdk/llm/streaming/block_manager.py +152 -0
  50. spaik_sdk/llm/streaming/models.py +42 -0
  51. spaik_sdk/llm/streaming/streaming_content_handler.py +157 -0
  52. spaik_sdk/llm/streaming/streaming_event_handler.py +215 -0
  53. spaik_sdk/llm/streaming/streaming_state_manager.py +58 -0
  54. spaik_sdk/models/__init__.py +0 -0
  55. spaik_sdk/models/factories/__init__.py +0 -0
  56. spaik_sdk/models/factories/anthropic_factory.py +33 -0
  57. spaik_sdk/models/factories/base_model_factory.py +71 -0
  58. spaik_sdk/models/factories/google_factory.py +30 -0
  59. spaik_sdk/models/factories/ollama_factory.py +41 -0
  60. spaik_sdk/models/factories/openai_factory.py +50 -0
  61. spaik_sdk/models/llm_config.py +46 -0
  62. spaik_sdk/models/llm_families.py +7 -0
  63. spaik_sdk/models/llm_model.py +17 -0
  64. spaik_sdk/models/llm_wrapper.py +25 -0
  65. spaik_sdk/models/model_registry.py +156 -0
  66. spaik_sdk/models/providers/__init__.py +0 -0
  67. spaik_sdk/models/providers/anthropic_provider.py +29 -0
  68. spaik_sdk/models/providers/azure_provider.py +31 -0
  69. spaik_sdk/models/providers/base_provider.py +62 -0
  70. spaik_sdk/models/providers/google_provider.py +26 -0
  71. spaik_sdk/models/providers/ollama_provider.py +26 -0
  72. spaik_sdk/models/providers/openai_provider.py +26 -0
  73. spaik_sdk/models/providers/provider_type.py +90 -0
  74. spaik_sdk/orchestration/__init__.py +24 -0
  75. spaik_sdk/orchestration/base_orchestrator.py +238 -0
  76. spaik_sdk/orchestration/checkpoint.py +80 -0
  77. spaik_sdk/orchestration/models.py +103 -0
  78. spaik_sdk/prompt/__init__.py +0 -0
  79. spaik_sdk/prompt/get_prompt_loader.py +13 -0
  80. spaik_sdk/prompt/local_prompt_loader.py +21 -0
  81. spaik_sdk/prompt/prompt_loader.py +48 -0
  82. spaik_sdk/prompt/prompt_loader_mode.py +14 -0
  83. spaik_sdk/py.typed +1 -0
  84. spaik_sdk/recording/__init__.py +1 -0
  85. spaik_sdk/recording/base_playback.py +90 -0
  86. spaik_sdk/recording/base_recorder.py +50 -0
  87. spaik_sdk/recording/conditional_recorder.py +38 -0
  88. spaik_sdk/recording/impl/__init__.py +1 -0
  89. spaik_sdk/recording/impl/local_playback.py +76 -0
  90. spaik_sdk/recording/impl/local_recorder.py +85 -0
  91. spaik_sdk/recording/langchain_serializer.py +88 -0
  92. spaik_sdk/server/__init__.py +1 -0
  93. spaik_sdk/server/api/routers/__init__.py +0 -0
  94. spaik_sdk/server/api/routers/api_builder.py +149 -0
  95. spaik_sdk/server/api/routers/audio_router_factory.py +201 -0
  96. spaik_sdk/server/api/routers/file_router_factory.py +111 -0
  97. spaik_sdk/server/api/routers/thread_router_factory.py +284 -0
  98. spaik_sdk/server/api/streaming/__init__.py +0 -0
  99. spaik_sdk/server/api/streaming/format_sse_event.py +41 -0
  100. spaik_sdk/server/api/streaming/negotiate_streaming_response.py +8 -0
  101. spaik_sdk/server/api/streaming/streaming_negotiator.py +10 -0
  102. spaik_sdk/server/authorization/__init__.py +0 -0
  103. spaik_sdk/server/authorization/base_authorizer.py +64 -0
  104. spaik_sdk/server/authorization/base_user.py +13 -0
  105. spaik_sdk/server/authorization/dummy_authorizer.py +17 -0
  106. spaik_sdk/server/job_processor/__init__.py +0 -0
  107. spaik_sdk/server/job_processor/base_job_processor.py +8 -0
  108. spaik_sdk/server/job_processor/thread_job_processor.py +32 -0
  109. spaik_sdk/server/pubsub/__init__.py +1 -0
  110. spaik_sdk/server/pubsub/cancellation_publisher.py +7 -0
  111. spaik_sdk/server/pubsub/cancellation_subscriber.py +38 -0
  112. spaik_sdk/server/pubsub/event_publisher.py +13 -0
  113. spaik_sdk/server/pubsub/impl/__init__.py +1 -0
  114. spaik_sdk/server/pubsub/impl/local_cancellation_pubsub.py +48 -0
  115. spaik_sdk/server/pubsub/impl/signalr_publisher.py +36 -0
  116. spaik_sdk/server/queue/__init__.py +1 -0
  117. spaik_sdk/server/queue/agent_job_queue.py +27 -0
  118. spaik_sdk/server/queue/impl/__init__.py +1 -0
  119. spaik_sdk/server/queue/impl/azure_queue.py +24 -0
  120. spaik_sdk/server/response/__init__.py +0 -0
  121. spaik_sdk/server/response/agent_response_generator.py +39 -0
  122. spaik_sdk/server/response/response_generator.py +13 -0
  123. spaik_sdk/server/response/simple_agent_response_generator.py +14 -0
  124. spaik_sdk/server/services/__init__.py +0 -0
  125. spaik_sdk/server/services/thread_converters.py +113 -0
  126. spaik_sdk/server/services/thread_models.py +90 -0
  127. spaik_sdk/server/services/thread_service.py +91 -0
  128. spaik_sdk/server/storage/__init__.py +1 -0
  129. spaik_sdk/server/storage/base_thread_repository.py +51 -0
  130. spaik_sdk/server/storage/impl/__init__.py +0 -0
  131. spaik_sdk/server/storage/impl/in_memory_thread_repository.py +100 -0
  132. spaik_sdk/server/storage/impl/local_file_thread_repository.py +217 -0
  133. spaik_sdk/server/storage/thread_filter.py +166 -0
  134. spaik_sdk/server/storage/thread_metadata.py +53 -0
  135. spaik_sdk/thread/__init__.py +0 -0
  136. spaik_sdk/thread/adapters/__init__.py +0 -0
  137. spaik_sdk/thread/adapters/cli/__init__.py +0 -0
  138. spaik_sdk/thread/adapters/cli/block_display.py +92 -0
  139. spaik_sdk/thread/adapters/cli/display_manager.py +84 -0
  140. spaik_sdk/thread/adapters/cli/live_cli.py +235 -0
  141. spaik_sdk/thread/adapters/event_adapter.py +28 -0
  142. spaik_sdk/thread/adapters/streaming_block_adapter.py +57 -0
  143. spaik_sdk/thread/adapters/sync_adapter.py +76 -0
  144. spaik_sdk/thread/models.py +224 -0
  145. spaik_sdk/thread/thread_container.py +468 -0
  146. spaik_sdk/tools/__init__.py +0 -0
  147. spaik_sdk/tools/impl/__init__.py +0 -0
  148. spaik_sdk/tools/impl/mcp_tool_provider.py +93 -0
  149. spaik_sdk/tools/impl/search_tool_provider.py +18 -0
  150. spaik_sdk/tools/tool_provider.py +131 -0
  151. spaik_sdk/tracing/__init__.py +13 -0
  152. spaik_sdk/tracing/agent_trace.py +72 -0
  153. spaik_sdk/tracing/get_trace_sink.py +15 -0
  154. spaik_sdk/tracing/local_trace_sink.py +23 -0
  155. spaik_sdk/tracing/trace_sink.py +19 -0
  156. spaik_sdk/tracing/trace_sink_mode.py +14 -0
  157. spaik_sdk/utils/__init__.py +0 -0
  158. spaik_sdk/utils/init_logger.py +24 -0
  159. spaik_sdk-0.6.2.dist-info/METADATA +379 -0
  160. spaik_sdk-0.6.2.dist-info/RECORD +161 -0
  161. spaik_sdk-0.6.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,14 @@
1
+ from spaik_sdk.config.credentials_provider import CredentialsProvider
2
+ from spaik_sdk.config.env import env_config
3
+ from spaik_sdk.config.env_credentials_provider import EnvCredentialsProvider
4
+
5
+
6
+ def get_credentials_provider() -> CredentialsProvider:
7
+ provider_type = env_config.get_credentials_provider_type()
8
+ if provider_type == "env":
9
+ return EnvCredentialsProvider()
10
+ else:
11
+ raise ValueError(f"Unsupported provider type: {provider_type}")
12
+
13
+
14
+ credentials_provider = get_credentials_provider()
@@ -0,0 +1,9 @@
1
+ from spaik_sdk.image_gen.image_generator import ImageGenerator
2
+ from spaik_sdk.image_gen.options import ImageFormat, ImageGenOptions, ImageQuality
3
+
4
+ __all__ = [
5
+ "ImageGenerator",
6
+ "ImageGenOptions",
7
+ "ImageQuality",
8
+ "ImageFormat",
9
+ ]
@@ -0,0 +1,83 @@
1
+ import re
2
+ from pathlib import Path
3
+ from time import time
4
+
5
+ from spaik_sdk.config.env import env_config
6
+ from spaik_sdk.config.get_credentials_provider import credentials_provider
7
+ from spaik_sdk.image_gen.options import ImageFormat, ImageGenOptions
8
+ from spaik_sdk.image_gen.providers import google as google_provider
9
+ from spaik_sdk.image_gen.providers import openai as openai_provider
10
+
11
+
12
+ def _slugify(text: str, max_length: int = 50) -> str:
13
+ slug = re.sub(r"[^a-zA-Z0-9]+", "_", text.lower())
14
+ slug = slug.strip("_")
15
+ return slug[:max_length]
16
+
17
+
18
+ def _generate_filename(prompt: str, output_format: ImageFormat) -> str:
19
+ slug = _slugify(prompt)
20
+ timestamp = int(time())
21
+ extension = output_format.value
22
+ return f"{slug}_{timestamp}.{extension}"
23
+
24
+
25
+ class ImageGenerator:
26
+ def __init__(
27
+ self,
28
+ model: str | None = None,
29
+ output_dir: str = ".",
30
+ endpoint: str | None = None,
31
+ headers: dict[str, str] | None = None,
32
+ ):
33
+ self.model = model or env_config.get_image_model()
34
+ self.output_dir = Path(output_dir)
35
+ self.endpoint = endpoint
36
+ self.headers = headers
37
+
38
+ def _get_provider(self) -> str:
39
+ if self.model.startswith("gpt-image"):
40
+ return "openai"
41
+ elif self.model.startswith("gemini"):
42
+ return "google"
43
+ else:
44
+ raise ValueError(f"Unknown image model provider for: {self.model}")
45
+
46
+ async def generate_image(
47
+ self,
48
+ prompt: str,
49
+ options: ImageGenOptions | None = None,
50
+ output_filename: str | None = None,
51
+ ) -> Path:
52
+ opts = options or ImageGenOptions()
53
+ provider = self._get_provider()
54
+
55
+ if provider == "openai":
56
+ api_key = credentials_provider.get_provider_key("openai")
57
+ image_bytes = await openai_provider.generate_image(
58
+ prompt=prompt,
59
+ model=self.model,
60
+ api_key=api_key,
61
+ options=opts,
62
+ endpoint=self.endpoint,
63
+ headers=self.headers,
64
+ )
65
+ elif provider == "google":
66
+ api_key = credentials_provider.get_provider_key("google")
67
+ image_bytes = await google_provider.generate_image(
68
+ prompt=prompt,
69
+ model=self.model,
70
+ api_key=api_key,
71
+ options=opts,
72
+ endpoint=self.endpoint,
73
+ headers=self.headers,
74
+ )
75
+ else:
76
+ raise ValueError(f"Unsupported provider: {provider}")
77
+
78
+ filename = output_filename or _generate_filename(prompt, opts.output_format)
79
+ self.output_dir.mkdir(parents=True, exist_ok=True)
80
+ output_path = self.output_dir / filename
81
+
82
+ output_path.write_bytes(image_bytes)
83
+ return output_path
@@ -0,0 +1,24 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import Enum
3
+ from typing import Any
4
+
5
+
6
+ class ImageQuality(Enum):
7
+ LOW = "low"
8
+ MEDIUM = "medium"
9
+ HIGH = "high"
10
+
11
+
12
+ class ImageFormat(Enum):
13
+ PNG = "png"
14
+ JPEG = "jpeg"
15
+ WEBP = "webp"
16
+
17
+
18
+ @dataclass
19
+ class ImageGenOptions:
20
+ width: int = 1024
21
+ height: int = 1024
22
+ quality: ImageQuality = ImageQuality.MEDIUM
23
+ output_format: ImageFormat = ImageFormat.PNG
24
+ vendor: dict[str, Any] = field(default_factory=dict)
File without changes
@@ -0,0 +1,75 @@
1
+ import base64
2
+
3
+ import httpx
4
+
5
+ from spaik_sdk.image_gen.options import ImageGenOptions
6
+
7
+ GOOGLE_GENERATIVE_ENDPOINT = "https://generativelanguage.googleapis.com/v1/models"
8
+
9
+
10
+ def _derive_resolution_and_aspect(width: int, height: int) -> tuple[str, str]:
11
+ from math import gcd
12
+
13
+ divisor = gcd(width, height)
14
+ aspect_w = width // divisor
15
+ aspect_h = height // divisor
16
+ aspect_ratio = f"{aspect_w}:{aspect_h}"
17
+
18
+ max_dim = max(width, height)
19
+ if max_dim <= 1024:
20
+ resolution = "1K"
21
+ elif max_dim <= 2048:
22
+ resolution = "2K"
23
+ else:
24
+ resolution = "4K"
25
+
26
+ return resolution, aspect_ratio
27
+
28
+
29
+ async def generate_image(
30
+ prompt: str,
31
+ model: str,
32
+ api_key: str,
33
+ options: ImageGenOptions,
34
+ endpoint: str | None = None,
35
+ headers: dict[str, str] | None = None,
36
+ ) -> bytes:
37
+ base_url = endpoint or GOOGLE_GENERATIVE_ENDPOINT
38
+ url = f"{base_url}/{model}:generateContent?key={api_key}"
39
+
40
+ request_headers = {
41
+ "Content-Type": "application/json",
42
+ }
43
+ if headers:
44
+ request_headers.update(headers)
45
+
46
+ resolution, aspect_ratio = _derive_resolution_and_aspect(options.width, options.height)
47
+
48
+ generation_config: dict = {
49
+ "responseModalities": ["image", "text"],
50
+ "resolution": resolution,
51
+ "aspectRatio": aspect_ratio,
52
+ }
53
+ generation_config.update(options.vendor)
54
+
55
+ payload = {
56
+ "contents": [{"parts": [{"text": prompt}]}],
57
+ "generationConfig": generation_config,
58
+ }
59
+
60
+ async with httpx.AsyncClient(timeout=120.0) as client:
61
+ response = await client.post(url, headers=request_headers, json=payload)
62
+ response.raise_for_status()
63
+ data = response.json()
64
+
65
+ candidates = data.get("candidates", [])
66
+ if not candidates:
67
+ raise ValueError("No image generated by Gemini API")
68
+
69
+ parts = candidates[0].get("content", {}).get("parts", [])
70
+ for part in parts:
71
+ if "inlineData" in part:
72
+ image_b64 = part["inlineData"]["data"]
73
+ return base64.b64decode(image_b64)
74
+
75
+ raise ValueError("No image data found in Gemini API response")
@@ -0,0 +1,60 @@
1
+ import base64
2
+
3
+ import httpx
4
+
5
+ from spaik_sdk.image_gen.options import ImageFormat, ImageGenOptions
6
+
7
+ OPENAI_IMAGES_ENDPOINT = "https://api.openai.com/v1/images/generations"
8
+
9
+
10
+ async def generate_image(
11
+ prompt: str,
12
+ model: str,
13
+ api_key: str,
14
+ options: ImageGenOptions,
15
+ endpoint: str | None = None,
16
+ headers: dict[str, str] | None = None,
17
+ ) -> bytes:
18
+ url = endpoint or OPENAI_IMAGES_ENDPOINT
19
+
20
+ request_headers = {
21
+ "Authorization": f"Bearer {api_key}",
22
+ "Content-Type": "application/json",
23
+ }
24
+ if headers:
25
+ request_headers.update(headers)
26
+
27
+ size = f"{options.width}x{options.height}"
28
+
29
+ response_format_map = {
30
+ ImageFormat.PNG: "png",
31
+ ImageFormat.JPEG: "jpeg",
32
+ ImageFormat.WEBP: "webp",
33
+ }
34
+
35
+ payload: dict = {
36
+ "model": model,
37
+ "prompt": prompt,
38
+ "size": size,
39
+ "quality": options.quality.value,
40
+ "output_format": response_format_map[options.output_format],
41
+ "n": 1,
42
+ }
43
+ payload.update(options.vendor)
44
+
45
+ async with httpx.AsyncClient(timeout=120.0) as client:
46
+ response = await client.post(url, headers=request_headers, json=payload)
47
+ if response.status_code != 200:
48
+ raise ValueError(f"OpenAI API error {response.status_code}: {response.text}")
49
+ data = response.json()
50
+
51
+ image_data = data["data"][0]
52
+ if "b64_json" in image_data:
53
+ return base64.b64decode(image_data["b64_json"])
54
+ elif "url" in image_data:
55
+ async with httpx.AsyncClient(timeout=120.0) as client:
56
+ img_response = await client.get(image_data["url"])
57
+ img_response.raise_for_status()
58
+ return img_response.content
59
+ else:
60
+ raise ValueError(f"Unexpected response format: {image_data.keys()}")
File without changes
@@ -0,0 +1,10 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class CancellationHandle(ABC):
5
+ """Abstract base class for handling cancellation of LLM operations."""
6
+
7
+ @abstractmethod
8
+ async def is_cancelled(self) -> bool:
9
+ """Check if the operation has been cancelled."""
10
+ pass
File without changes
@@ -0,0 +1,26 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Dict
3
+
4
+ from spaik_sdk.llm.consumption.token_usage import TokenUsage
5
+
6
+
7
+ @dataclass
8
+ class ConsumptionEstimate:
9
+ """Consumption estimation for a request."""
10
+
11
+ token_usage: TokenUsage
12
+ metadata: Dict[str, Any] = field(default_factory=dict)
13
+
14
+ def to_dict(self) -> Dict[str, Any]:
15
+ """Convert to dictionary for serialization."""
16
+ return {
17
+ "token_usage": {
18
+ "input_tokens": self.token_usage.input_tokens,
19
+ "output_tokens": self.token_usage.output_tokens,
20
+ "total_tokens": self.token_usage.total_tokens,
21
+ "reasoning_tokens": self.token_usage.reasoning_tokens,
22
+ "cache_creation_tokens": self.token_usage.cache_creation_tokens,
23
+ "cache_read_tokens": self.token_usage.cache_read_tokens,
24
+ },
25
+ "metadata": self.metadata or {},
26
+ }
@@ -0,0 +1,113 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from spaik_sdk.llm.consumption.consumption_estimate import ConsumptionEstimate
4
+ from spaik_sdk.llm.consumption.token_usage import TokenUsage
5
+ from spaik_sdk.utils.init_logger import init_logger
6
+
7
+ logger = init_logger(__name__)
8
+
9
+
10
+ class ConsumptionEstimateBuilder:
11
+ """Builder for creating consumption estimates from various data sources."""
12
+
13
+ def __init__(self):
14
+ self._token_usage: Optional[TokenUsage] = None
15
+ self._metadata: Dict[str, Any] = {}
16
+
17
+ def from_usage_metadata(self, usage_metadata: Dict[str, Any]) -> "ConsumptionEstimateBuilder":
18
+ """Extract consumption data from LangChain usage_metadata."""
19
+
20
+ logger.info(f"usage_metadata: {usage_metadata}")
21
+ if not usage_metadata:
22
+ return self
23
+
24
+ # Extract basic token counts
25
+ input_tokens = usage_metadata.get("input_tokens", 0)
26
+ output_tokens = usage_metadata.get("output_tokens", 0)
27
+ total_tokens = usage_metadata.get("total_tokens", 0)
28
+
29
+ # Extract reasoning tokens (Google/OpenAI)
30
+ reasoning_tokens = 0
31
+ output_token_details = usage_metadata.get("output_token_details")
32
+ if output_token_details:
33
+ reasoning_tokens = output_token_details.get("reasoning", 0)
34
+
35
+ # Extract cache tokens (Anthropic)
36
+ cache_creation_tokens = 0
37
+ cache_read_tokens = 0
38
+ input_token_details = usage_metadata.get("input_token_details")
39
+ if input_token_details:
40
+ cache_creation_tokens = input_token_details.get("cache_creation", 0)
41
+ cache_read_tokens = input_token_details.get("cache_read", 0)
42
+
43
+ self._token_usage = TokenUsage(
44
+ input_tokens=input_tokens,
45
+ output_tokens=output_tokens,
46
+ total_tokens=total_tokens,
47
+ reasoning_tokens=reasoning_tokens,
48
+ cache_creation_tokens=cache_creation_tokens,
49
+ cache_read_tokens=cache_read_tokens,
50
+ )
51
+
52
+ logger.info(f"self._token_usage: {self._token_usage}")
53
+ return self
54
+
55
+ def from_response_metadata(self, response_metadata: Any) -> "ConsumptionEstimateBuilder":
56
+ """Extract consumption data from response_metadata when usage_metadata is unavailable."""
57
+ if not response_metadata:
58
+ return self
59
+
60
+ # Store metadata for potential estimation
61
+ self._metadata.update(
62
+ {
63
+ "finish_reason": getattr(response_metadata, "finish_reason", None),
64
+ "stop_reason": getattr(response_metadata, "stop_reason", None),
65
+ "response_id": getattr(response_metadata, "id", None),
66
+ }
67
+ )
68
+
69
+ return self
70
+
71
+ def from_event_metadata(self, metadata: Dict[str, Any]) -> "ConsumptionEstimateBuilder":
72
+ """Extract relevant info from LangChain event metadata."""
73
+ if not metadata:
74
+ return self
75
+
76
+ # Store any potentially useful metadata
77
+ self._metadata.update(
78
+ {
79
+ "ls_provider": metadata.get("ls_provider"),
80
+ "ls_model_name": metadata.get("ls_model_name"),
81
+ }
82
+ )
83
+
84
+ return self
85
+
86
+ def estimate_from_content(self, content: str) -> "ConsumptionEstimateBuilder":
87
+ """Rough estimation when no usage metadata is available."""
88
+ if not content or self._token_usage:
89
+ return self
90
+
91
+ # Very rough token estimation (4 chars per token average)
92
+ estimated_output_tokens = len(content) // 4
93
+ estimated_input_tokens = estimated_output_tokens * 5
94
+
95
+ self._token_usage = TokenUsage(
96
+ input_tokens=estimated_input_tokens,
97
+ output_tokens=estimated_output_tokens,
98
+ total_tokens=estimated_output_tokens,
99
+ )
100
+
101
+ self._metadata["estimation_method"] = "content_length"
102
+
103
+ return self
104
+
105
+ def build(self) -> Optional[ConsumptionEstimate]:
106
+ """Build the final ConsumptionEstimate."""
107
+ if not self._token_usage:
108
+ return None
109
+
110
+ return ConsumptionEstimate(
111
+ token_usage=self._token_usage,
112
+ metadata=self._metadata,
113
+ )
@@ -0,0 +1,59 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from spaik_sdk.llm.consumption.consumption_estimate import ConsumptionEstimate
4
+ from spaik_sdk.llm.consumption.consumption_estimate_builder import ConsumptionEstimateBuilder
5
+ from spaik_sdk.utils.init_logger import init_logger
6
+
7
+ logger = init_logger(__name__)
8
+
9
+
10
+ class ConsumptionExtractor:
11
+ """Extracts consumption information from LangChain streaming events."""
12
+
13
+ def extract_from_stream_end(self, data: Dict[str, Any]) -> Optional[ConsumptionEstimate]:
14
+ """Extract consumption data from on_chat_model_end event data."""
15
+ builder = ConsumptionEstimateBuilder()
16
+
17
+ # Extract from event metadata if available
18
+ if "metadata" in data:
19
+ builder.from_event_metadata(data["metadata"])
20
+
21
+ # Extract from output data
22
+ if "output" in data:
23
+ output = data["output"]
24
+
25
+ # Try usage_metadata first (preferred)
26
+ usage_metadata = getattr(output, "usage_metadata", None)
27
+ if usage_metadata:
28
+ usage_metadata = dict(usage_metadata)
29
+ builder.from_usage_metadata(usage_metadata)
30
+ else:
31
+ # Fallback to response_metadata
32
+ response_metadata = getattr(output, "response_metadata", None)
33
+ if response_metadata:
34
+ builder.from_response_metadata(response_metadata)
35
+
36
+ # Last resort: estimate from content
37
+ content = getattr(output, "content", "")
38
+ if isinstance(content, str):
39
+ builder.estimate_from_content(content)
40
+
41
+ consumption_estimate = builder.build()
42
+
43
+ if consumption_estimate:
44
+ cache_info = ""
45
+ if consumption_estimate.token_usage.cache_creation_tokens > 0 or consumption_estimate.token_usage.cache_read_tokens > 0:
46
+ cache_info = (
47
+ f", cache_create: {consumption_estimate.token_usage.cache_creation_tokens}, "
48
+ f"cache_read: {consumption_estimate.token_usage.cache_read_tokens}"
49
+ )
50
+
51
+ logger.info(
52
+ f"📊 Consumption tracking - "
53
+ f"Tokens: {consumption_estimate.token_usage.total_tokens} "
54
+ f"(in: {consumption_estimate.token_usage.input_tokens}, out: {consumption_estimate.token_usage.output_tokens}, "
55
+ f"reasoning: {consumption_estimate.token_usage.reasoning_tokens}{cache_info})"
56
+ )
57
+ logger.debug(f"📊 Full consumption data: {consumption_estimate.to_dict()}")
58
+
59
+ return consumption_estimate
@@ -0,0 +1,31 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Mapping
3
+
4
+
5
+ @dataclass
6
+ class TokenUsage:
7
+ input_tokens: int = 0
8
+ output_tokens: int = 0
9
+ total_tokens: int = 0
10
+ reasoning_tokens: int = 0
11
+ cache_creation_tokens: int = 0
12
+ cache_read_tokens: int = 0
13
+
14
+ def __post_init__(self):
15
+ if self.total_tokens == 0:
16
+ self.total_tokens = self.input_tokens + self.output_tokens
17
+
18
+ @classmethod
19
+ def from_langchain(cls, usage: Mapping[str, Any]) -> "TokenUsage":
20
+ """Create TokenUsage from LangChain usage_metadata."""
21
+ output_details = usage.get("output_token_details", {})
22
+ input_details = usage.get("input_token_details", {})
23
+
24
+ return cls(
25
+ input_tokens=usage.get("input_tokens", 0),
26
+ output_tokens=usage.get("output_tokens", 0),
27
+ total_tokens=usage.get("total_tokens", 0),
28
+ reasoning_tokens=output_details.get("reasoning", 0) if isinstance(output_details, dict) else 0,
29
+ cache_creation_tokens=input_details.get("cache_creation", 0) if isinstance(input_details, dict) else 0,
30
+ cache_read_tokens=input_details.get("cache_read", 0) if isinstance(input_details, dict) else 0,
31
+ )
@@ -0,0 +1,146 @@
1
+ import base64
2
+ from typing import Any, Dict, List, Optional, Union, cast
3
+
4
+ from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
5
+
6
+ from spaik_sdk.attachments.models import Attachment
7
+ from spaik_sdk.attachments.provider_support import is_supported_by_provider
8
+ from spaik_sdk.attachments.storage.base_file_storage import BaseFileStorage
9
+ from spaik_sdk.thread.models import MessageBlock, MessageBlockType, ThreadMessage
10
+ from spaik_sdk.utils.init_logger import init_logger
11
+
12
+ logger = init_logger(__name__)
13
+
14
+
15
+ def convert_thread_message_to_langchain(
16
+ thread_message: ThreadMessage,
17
+ include_author_name: bool = False,
18
+ ) -> BaseMessage:
19
+ processed_blocks = [_process_message_block(block) for block in thread_message.blocks]
20
+ content = "\n".join(filter(None, processed_blocks))
21
+ author_prefix = f"{'[' + thread_message.author_name + ']: ' if include_author_name else ''}"
22
+ if thread_message.ai:
23
+ return AIMessage(content=f"{author_prefix}{content}")
24
+ else:
25
+ return HumanMessage(content=f"{author_prefix}{content}")
26
+
27
+
28
+ async def convert_thread_message_to_langchain_multimodal(
29
+ thread_message: ThreadMessage,
30
+ file_storage: BaseFileStorage,
31
+ provider_family: str = "openai",
32
+ include_author_name: bool = False,
33
+ ) -> BaseMessage:
34
+ processed_blocks = [_process_message_block(block) for block in thread_message.blocks]
35
+ text_content = "\n".join(filter(None, processed_blocks))
36
+ author_prefix = f"{'[' + thread_message.author_name + ']: ' if include_author_name else ''}"
37
+
38
+ if thread_message.ai:
39
+ return AIMessage(content=f"{author_prefix}{text_content}")
40
+
41
+ if not thread_message.attachments:
42
+ return HumanMessage(content=f"{author_prefix}{text_content}")
43
+
44
+ content_parts: List[Dict[str, Any]] = []
45
+
46
+ if text_content:
47
+ content_parts.append({"type": "text", "text": f"{author_prefix}{text_content}"})
48
+
49
+ for attachment in thread_message.attachments:
50
+ # Check if MIME type is supported by the provider
51
+ if not is_supported_by_provider(attachment.mime_type, provider_family):
52
+ logger.warning(f"Skipping unsupported attachment type {attachment.mime_type} for provider {provider_family}")
53
+ filename = attachment.filename or "unnamed file"
54
+ fallback_msg = (
55
+ f"[SYSTEM: The user attempted to attach a file '{filename}' of type '{attachment.mime_type}', "
56
+ f"but this file type is not supported by the current AI model. "
57
+ f"Please inform the user that you cannot process this file type "
58
+ f"and suggest supported alternatives like images (PNG, JPEG, GIF, WEBP) or PDF documents.]"
59
+ )
60
+ content_parts.append({"type": "text", "text": fallback_msg})
61
+ continue
62
+
63
+ attachment_content = await _convert_attachment_to_content_part(attachment, file_storage, provider_family)
64
+ if attachment_content:
65
+ content_parts.append(attachment_content)
66
+
67
+ if not content_parts:
68
+ return HumanMessage(content="")
69
+
70
+ return HumanMessage(content=cast(List[Union[str, Dict[str, Any]]], content_parts))
71
+
72
+
73
+ async def _convert_attachment_to_content_part(
74
+ attachment: Attachment,
75
+ file_storage: BaseFileStorage,
76
+ provider_family: str = "openai",
77
+ ) -> Optional[Dict[str, Any]]:
78
+ try:
79
+ data, metadata = await file_storage.retrieve(attachment.file_id)
80
+ except FileNotFoundError:
81
+ return None
82
+
83
+ mime_type = attachment.mime_type
84
+ b64_data = base64.b64encode(data).decode("utf-8")
85
+
86
+ if mime_type.startswith("image/"):
87
+ if provider_family == "anthropic":
88
+ return {
89
+ "type": "image",
90
+ "source": {"type": "base64", "media_type": mime_type, "data": b64_data},
91
+ }
92
+ return {
93
+ "type": "image_url",
94
+ "image_url": {"url": f"data:{mime_type};base64,{b64_data}"},
95
+ }
96
+ elif mime_type == "application/pdf":
97
+ if provider_family == "anthropic":
98
+ return {
99
+ "type": "document",
100
+ "source": {"type": "base64", "media_type": mime_type, "data": b64_data},
101
+ }
102
+ return {
103
+ "type": "image_url",
104
+ "image_url": {"url": f"data:{mime_type};base64,{b64_data}"},
105
+ }
106
+ elif mime_type.startswith("audio/"):
107
+ return {
108
+ "type": "input_audio",
109
+ "input_audio": {"data": b64_data, "format": _get_audio_format(mime_type)},
110
+ }
111
+ elif mime_type.startswith("video/"):
112
+ return {
113
+ "type": "video_url",
114
+ "video_url": {"url": f"data:{mime_type};base64,{b64_data}"},
115
+ }
116
+ else:
117
+ return {
118
+ "type": "file",
119
+ "file": {"data": b64_data, "mime_type": mime_type},
120
+ }
121
+
122
+
123
+ def _get_audio_format(mime_type: str) -> str:
124
+ format_map = {
125
+ "audio/mpeg": "mp3",
126
+ "audio/mp3": "mp3",
127
+ "audio/wav": "wav",
128
+ "audio/ogg": "ogg",
129
+ "audio/webm": "webm",
130
+ }
131
+ return format_map.get(mime_type, "wav")
132
+
133
+
134
+ def _process_message_block(block: MessageBlock) -> str:
135
+ if block.type == MessageBlockType.REASONING:
136
+ return "<thinking/>"
137
+ elif block.type == MessageBlockType.TOOL_USE:
138
+ tool_name = block.tool_name or "unknown"
139
+ return f'<tool_call tool="{tool_name}"/>'
140
+ elif block.type == MessageBlockType.ERROR:
141
+ content = block.content or "unknown error"
142
+ return f'<error msg="{content}"/>'
143
+ elif block.type == MessageBlockType.PLAIN:
144
+ return block.content or ""
145
+ else:
146
+ return block.content or ""
@@ -0,0 +1 @@
1
+