stratifyai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. cli/__init__.py +5 -0
  2. cli/stratifyai_cli.py +1753 -0
  3. stratifyai/__init__.py +113 -0
  4. stratifyai/api_key_helper.py +372 -0
  5. stratifyai/caching.py +279 -0
  6. stratifyai/chat/__init__.py +54 -0
  7. stratifyai/chat/builder.py +366 -0
  8. stratifyai/chat/stratifyai_anthropic.py +194 -0
  9. stratifyai/chat/stratifyai_bedrock.py +200 -0
  10. stratifyai/chat/stratifyai_deepseek.py +194 -0
  11. stratifyai/chat/stratifyai_google.py +194 -0
  12. stratifyai/chat/stratifyai_grok.py +194 -0
  13. stratifyai/chat/stratifyai_groq.py +195 -0
  14. stratifyai/chat/stratifyai_ollama.py +201 -0
  15. stratifyai/chat/stratifyai_openai.py +209 -0
  16. stratifyai/chat/stratifyai_openrouter.py +201 -0
  17. stratifyai/chunking.py +158 -0
  18. stratifyai/client.py +292 -0
  19. stratifyai/config.py +1273 -0
  20. stratifyai/cost_tracker.py +257 -0
  21. stratifyai/embeddings.py +245 -0
  22. stratifyai/exceptions.py +91 -0
  23. stratifyai/models.py +59 -0
  24. stratifyai/providers/__init__.py +5 -0
  25. stratifyai/providers/anthropic.py +330 -0
  26. stratifyai/providers/base.py +183 -0
  27. stratifyai/providers/bedrock.py +634 -0
  28. stratifyai/providers/deepseek.py +39 -0
  29. stratifyai/providers/google.py +39 -0
  30. stratifyai/providers/grok.py +39 -0
  31. stratifyai/providers/groq.py +39 -0
  32. stratifyai/providers/ollama.py +43 -0
  33. stratifyai/providers/openai.py +344 -0
  34. stratifyai/providers/openai_compatible.py +372 -0
  35. stratifyai/providers/openrouter.py +39 -0
  36. stratifyai/py.typed +2 -0
  37. stratifyai/rag.py +381 -0
  38. stratifyai/retry.py +185 -0
  39. stratifyai/router.py +643 -0
  40. stratifyai/summarization.py +179 -0
  41. stratifyai/utils/__init__.py +11 -0
  42. stratifyai/utils/bedrock_validator.py +136 -0
  43. stratifyai/utils/code_extractor.py +327 -0
  44. stratifyai/utils/csv_extractor.py +197 -0
  45. stratifyai/utils/file_analyzer.py +192 -0
  46. stratifyai/utils/json_extractor.py +219 -0
  47. stratifyai/utils/log_extractor.py +267 -0
  48. stratifyai/utils/model_selector.py +324 -0
  49. stratifyai/utils/provider_validator.py +442 -0
  50. stratifyai/utils/token_counter.py +186 -0
  51. stratifyai/vectordb.py +344 -0
  52. stratifyai-0.1.0.dist-info/METADATA +263 -0
  53. stratifyai-0.1.0.dist-info/RECORD +57 -0
  54. stratifyai-0.1.0.dist-info/WHEEL +5 -0
  55. stratifyai-0.1.0.dist-info/entry_points.txt +2 -0
  56. stratifyai-0.1.0.dist-info/licenses/LICENSE +21 -0
  57. stratifyai-0.1.0.dist-info/top_level.txt +2 -0
stratifyai/chunking.py ADDED
@@ -0,0 +1,158 @@
1
+ """Content chunking utilities for splitting large files into manageable pieces."""
2
+
3
+ from typing import List, Optional
4
+ import re
5
+
6
+
7
+ def chunk_content(
8
+ content: str,
9
+ chunk_size: int = 50000,
10
+ overlap: int = 500,
11
+ preserve_boundaries: bool = True
12
+ ) -> List[str]:
13
+ """
14
+ Split content into chunks at natural boundaries.
15
+
16
+ Splits at paragraph boundaries when possible, falling back to sentence
17
+ boundaries, then character boundaries for very large paragraphs.
18
+
19
+ Args:
20
+ content: The text content to chunk
21
+ chunk_size: Target size for each chunk in characters (default: 50000)
22
+ overlap: Number of characters to overlap between chunks (default: 500)
23
+ preserve_boundaries: Whether to split at natural boundaries vs fixed positions
24
+
25
+ Returns:
26
+ List of content chunks
27
+
28
+ Examples:
29
+ >>> text = "Paragraph 1.\\n\\nParagraph 2.\\n\\nParagraph 3."
30
+ >>> chunks = chunk_content(text, chunk_size=20)
31
+ >>> len(chunks)
32
+ 3
33
+ """
34
+ if not content:
35
+ return []
36
+
37
+ # If content is smaller than chunk_size, return as-is
38
+ if len(content) <= chunk_size:
39
+ return [content]
40
+
41
+ chunks = []
42
+
43
+ if preserve_boundaries:
44
+ # Split at paragraph boundaries first
45
+ paragraphs = re.split(r'\n\s*\n', content)
46
+
47
+ current_chunk = ""
48
+ for paragraph in paragraphs:
49
+ # If adding this paragraph would exceed chunk_size
50
+ if len(current_chunk) + len(paragraph) + 2 > chunk_size:
51
+ # If current chunk is not empty, save it
52
+ if current_chunk:
53
+ chunks.append(current_chunk.strip())
54
+ # Start new chunk with overlap from previous
55
+ if overlap > 0 and len(current_chunk) > overlap:
56
+ current_chunk = current_chunk[-overlap:]
57
+ else:
58
+ current_chunk = ""
59
+
60
+ # If paragraph itself is larger than chunk_size, split it
61
+ if len(paragraph) > chunk_size:
62
+ sub_chunks = _split_large_paragraph(paragraph, chunk_size, overlap)
63
+ chunks.extend(sub_chunks[:-1]) # Add all but last
64
+ current_chunk = sub_chunks[-1] if sub_chunks else ""
65
+ else:
66
+ current_chunk = paragraph
67
+ else:
68
+ # Add paragraph to current chunk
69
+ if current_chunk:
70
+ current_chunk += "\n\n" + paragraph
71
+ else:
72
+ current_chunk = paragraph
73
+
74
+ # Add final chunk
75
+ if current_chunk.strip():
76
+ chunks.append(current_chunk.strip())
77
+
78
+ else:
79
+ # Fixed-position chunking (fallback)
80
+ position = 0
81
+ while position < len(content):
82
+ end_position = position + chunk_size
83
+ chunk = content[position:end_position]
84
+ chunks.append(chunk)
85
+ position = end_position - overlap
86
+
87
+ return chunks
88
+
89
+
90
+ def _split_large_paragraph(paragraph: str, chunk_size: int, overlap: int) -> List[str]:
91
+ """
92
+ Split a large paragraph at sentence boundaries.
93
+
94
+ Args:
95
+ paragraph: The paragraph to split
96
+ chunk_size: Target chunk size
97
+ overlap: Overlap between chunks
98
+
99
+ Returns:
100
+ List of paragraph chunks
101
+ """
102
+ # Split at sentence boundaries
103
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph)
104
+
105
+ chunks = []
106
+ current_chunk = ""
107
+
108
+ for sentence in sentences:
109
+ if len(current_chunk) + len(sentence) + 1 > chunk_size:
110
+ if current_chunk:
111
+ chunks.append(current_chunk.strip())
112
+ if overlap > 0 and len(current_chunk) > overlap:
113
+ current_chunk = current_chunk[-overlap:] + " " + sentence
114
+ else:
115
+ current_chunk = sentence
116
+ else:
117
+ # Sentence itself is too large - force split
118
+ current_chunk = sentence
119
+ else:
120
+ if current_chunk:
121
+ current_chunk += " " + sentence
122
+ else:
123
+ current_chunk = sentence
124
+
125
+ if current_chunk.strip():
126
+ chunks.append(current_chunk.strip())
127
+
128
+ return chunks
129
+
130
+
131
+ def get_chunk_metadata(chunks: List[str]) -> dict:
132
+ """
133
+ Get metadata about chunks.
134
+
135
+ Args:
136
+ chunks: List of content chunks
137
+
138
+ Returns:
139
+ Dictionary with chunk statistics
140
+ """
141
+ if not chunks:
142
+ return {
143
+ "num_chunks": 0,
144
+ "total_chars": 0,
145
+ "avg_chunk_size": 0,
146
+ "min_chunk_size": 0,
147
+ "max_chunk_size": 0,
148
+ }
149
+
150
+ chunk_sizes = [len(chunk) for chunk in chunks]
151
+
152
+ return {
153
+ "num_chunks": len(chunks),
154
+ "total_chars": sum(chunk_sizes),
155
+ "avg_chunk_size": int(sum(chunk_sizes) / len(chunks)),
156
+ "min_chunk_size": min(chunk_sizes),
157
+ "max_chunk_size": max(chunk_sizes),
158
+ }
stratifyai/client.py ADDED
@@ -0,0 +1,292 @@
1
+ """Unified client for accessing multiple LLM providers."""
2
+
3
+ import asyncio
4
+ import time
5
+ from enum import Enum
6
+ from typing import AsyncIterator, Dict, Optional, Type, Union
7
+
8
+ from .config import MODEL_CATALOG
9
+ from .exceptions import InvalidModelError, InvalidProviderError
10
+ from .models import ChatRequest, ChatResponse, Message
11
+ from .providers.base import BaseProvider
12
+ from .providers.openai import OpenAIProvider
13
+ from .providers.anthropic import AnthropicProvider
14
+ from .providers.google import GoogleProvider
15
+ from .providers.deepseek import DeepSeekProvider
16
+ from .providers.groq import GroqProvider
17
+ from .providers.grok import GrokProvider
18
+ from .providers.openrouter import OpenRouterProvider
19
+ from .providers.ollama import OllamaProvider
20
+ from .providers.bedrock import BedrockProvider
21
+
22
+
23
+ class ProviderType(str, Enum):
24
+ """Supported provider types."""
25
+ OPENAI = "openai"
26
+ ANTHROPIC = "anthropic"
27
+ GOOGLE = "google"
28
+ DEEPSEEK = "deepseek"
29
+ GROQ = "groq"
30
+ GROK = "grok"
31
+ OPENROUTER = "openrouter"
32
+ OLLAMA = "ollama"
33
+ BEDROCK = "bedrock"
34
+
35
+
36
+ class LLMClient:
37
+ """Unified client for all LLM providers."""
38
+
39
+ # Provider registry maps provider names to provider classes
40
+ _provider_registry: Dict[str, Type[BaseProvider]] = {
41
+ "openai": OpenAIProvider,
42
+ "anthropic": AnthropicProvider,
43
+ "google": GoogleProvider,
44
+ "deepseek": DeepSeekProvider,
45
+ "groq": GroqProvider,
46
+ "grok": GrokProvider,
47
+ "openrouter": OpenRouterProvider,
48
+ "ollama": OllamaProvider,
49
+ "bedrock": BedrockProvider,
50
+ }
51
+
52
+ def __init__(
53
+ self,
54
+ provider: Optional[str] = None,
55
+ api_key: Optional[str] = None,
56
+ config: dict = None
57
+ ):
58
+ """
59
+ Initialize unified LLM client.
60
+
61
+ Args:
62
+ provider: Provider name (openai, anthropic, etc.)
63
+ If None, provider will be auto-detected from model name
64
+ api_key: API key for the provider (defaults to env var)
65
+ config: Optional provider-specific configuration
66
+
67
+ Raises:
68
+ InvalidProviderError: If provider is not supported
69
+ """
70
+ self.provider_name = provider
71
+ self.api_key = api_key
72
+ self.config = config or {}
73
+ self._provider_instance = None
74
+
75
+ # Initialize provider if specified
76
+ if provider:
77
+ self._initialize_provider(provider)
78
+
79
+ def _initialize_provider(self, provider: str) -> None:
80
+ """
81
+ Initialize a specific provider.
82
+
83
+ Args:
84
+ provider: Provider name
85
+
86
+ Raises:
87
+ InvalidProviderError: If provider not supported
88
+ """
89
+ if provider not in self._provider_registry:
90
+ raise InvalidProviderError(
91
+ f"Provider '{provider}' not supported. "
92
+ f"Available providers: {list(self._provider_registry.keys())}"
93
+ )
94
+
95
+ provider_class = self._provider_registry[provider]
96
+ self._provider_instance = provider_class(
97
+ api_key=self.api_key,
98
+ config=self.config
99
+ )
100
+
101
+ def _detect_provider(self, model: str) -> str:
102
+ """
103
+ Auto-detect provider from model name.
104
+
105
+ Args:
106
+ model: Model name
107
+
108
+ Returns:
109
+ Provider name
110
+
111
+ Raises:
112
+ InvalidModelError: If model not found in any provider
113
+ """
114
+ for provider_name, models in MODEL_CATALOG.items():
115
+ if model in models:
116
+ return provider_name
117
+
118
+ raise InvalidModelError(
119
+ model,
120
+ "any provider"
121
+ )
122
+
123
+ async def chat(
124
+ self,
125
+ model: str,
126
+ messages: list[Message],
127
+ temperature: float = 0.7,
128
+ max_tokens: Optional[int] = None,
129
+ stream: bool = False,
130
+ **kwargs
131
+ ) -> Union[ChatResponse, AsyncIterator[ChatResponse]]:
132
+ """
133
+ Execute a chat completion request.
134
+
135
+ Args:
136
+ model: Model name (e.g., "gpt-4.1-mini", "claude-3-5-sonnet")
137
+ messages: List of conversation messages
138
+ temperature: Sampling temperature (0.0 - 2.0)
139
+ max_tokens: Maximum tokens to generate
140
+ stream: Whether to stream the response
141
+ **kwargs: Additional provider-specific parameters
142
+
143
+ Returns:
144
+ Chat completion response, or AsyncIterator if streaming
145
+
146
+ Raises:
147
+ InvalidModelError: If model not supported
148
+ InvalidProviderError: If provider not supported
149
+ """
150
+ # Auto-detect provider if not set
151
+ if not self._provider_instance:
152
+ provider = self._detect_provider(model)
153
+ self._initialize_provider(provider)
154
+
155
+ # Build request
156
+ request = ChatRequest(
157
+ model=model,
158
+ messages=messages,
159
+ temperature=temperature,
160
+ max_tokens=max_tokens,
161
+ stream=stream,
162
+ **kwargs
163
+ )
164
+
165
+ # Execute request
166
+ if stream:
167
+ return self._provider_instance.chat_completion_stream(request)
168
+ else:
169
+ return await self._provider_instance.chat_completion(request)
170
+
171
+ async def chat_completion(self, request: ChatRequest) -> ChatResponse:
172
+ """
173
+ Execute a chat completion request using ChatRequest object.
174
+
175
+ Args:
176
+ request: Unified chat request
177
+
178
+ Returns:
179
+ Chat completion response
180
+
181
+ Raises:
182
+ InvalidModelError: If model not supported
183
+ InvalidProviderError: If provider not supported
184
+ """
185
+ # Auto-detect provider if not set
186
+ if not self._provider_instance:
187
+ provider = self._detect_provider(request.model)
188
+ self._initialize_provider(provider)
189
+
190
+ # Capture timing
191
+ start_time = time.perf_counter()
192
+ response = await self._provider_instance.chat_completion(request)
193
+ latency_ms = (time.perf_counter() - start_time) * 1000
194
+
195
+ # Add latency to response
196
+ response.latency_ms = latency_ms
197
+ return response
198
+
199
+ async def chat_completion_stream(
200
+ self, request: ChatRequest
201
+ ) -> AsyncIterator[ChatResponse]:
202
+ """
203
+ Execute a streaming chat completion request.
204
+
205
+ Args:
206
+ request: Unified chat request
207
+
208
+ Yields:
209
+ Chat completion response chunks
210
+
211
+ Raises:
212
+ InvalidModelError: If model not supported
213
+ InvalidProviderError: If provider not supported
214
+ """
215
+ # Auto-detect provider if not set
216
+ if not self._provider_instance:
217
+ provider = self._detect_provider(request.model)
218
+ self._initialize_provider(provider)
219
+
220
+ return self._provider_instance.chat_completion_stream(request)
221
+
222
+ def chat_sync(
223
+ self,
224
+ model: str,
225
+ messages: list[Message],
226
+ temperature: float = 0.7,
227
+ max_tokens: Optional[int] = None,
228
+ **kwargs
229
+ ) -> ChatResponse:
230
+ """
231
+ Synchronous wrapper for chat().
232
+
233
+ Args:
234
+ model: Model name
235
+ messages: List of conversation messages
236
+ temperature: Sampling temperature
237
+ max_tokens: Maximum tokens to generate
238
+ **kwargs: Additional provider-specific parameters
239
+
240
+ Returns:
241
+ Chat completion response
242
+ """
243
+ return asyncio.run(self.chat(
244
+ model=model,
245
+ messages=messages,
246
+ temperature=temperature,
247
+ max_tokens=max_tokens,
248
+ stream=False,
249
+ **kwargs
250
+ ))
251
+
252
+ def chat_completion_sync(self, request: ChatRequest) -> ChatResponse:
253
+ """
254
+ Synchronous wrapper for chat_completion().
255
+
256
+ Args:
257
+ request: Unified chat request
258
+
259
+ Returns:
260
+ Chat completion response
261
+ """
262
+ return asyncio.run(self.chat_completion(request))
263
+
264
+ @classmethod
265
+ def get_supported_providers(cls) -> list[str]:
266
+ """
267
+ Get list of supported providers.
268
+
269
+ Returns:
270
+ List of provider names
271
+ """
272
+ return list(cls._provider_registry.keys())
273
+
274
+ @classmethod
275
+ def get_supported_models(cls, provider: Optional[str] = None) -> list[str]:
276
+ """
277
+ Get list of supported models.
278
+
279
+ Args:
280
+ provider: Optional provider name to filter models
281
+
282
+ Returns:
283
+ List of model names
284
+ """
285
+ if provider:
286
+ return list(MODEL_CATALOG.get(provider, {}).keys())
287
+
288
+ # Return all models from all providers
289
+ all_models = []
290
+ for models in MODEL_CATALOG.values():
291
+ all_models.extend(models.keys())
292
+ return all_models