solwyn 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
solwyn/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """Solwyn -- AI Agent Control Plane SDK.
2
+
3
+ Drop-in wrapper for ``openai.OpenAI`` and ``anthropic.Anthropic`` clients
4
+ that adds hard spending caps, automatic provider failover, and per-agent
5
+ cost attribution -- without ever seeing customer prompts.
6
+ """
7
+
8
+ from importlib.metadata import PackageNotFoundError, version
9
+
10
+ try:
11
+ __version__ = version("solwyn")
12
+ except PackageNotFoundError:
13
+ __version__ = "0.0.0-dev"
14
+
15
+ from solwyn.client import AsyncSolwyn, Solwyn
16
+ from solwyn.config import SolwynConfig
17
+ from solwyn.exceptions import (
18
+ BudgetExceededError,
19
+ ConfigurationError,
20
+ ProviderUnavailableError,
21
+ SolwynError,
22
+ )
23
+
24
+ __all__ = [
25
+ "__version__",
26
+ "Solwyn",
27
+ "AsyncSolwyn",
28
+ "SolwynConfig",
29
+ "SolwynError",
30
+ "BudgetExceededError",
31
+ "ProviderUnavailableError",
32
+ "ConfigurationError",
33
+ ]
solwyn/_base.py ADDED
@@ -0,0 +1,133 @@
1
+ """Shared sans-I/O logic for Solwyn clients.
2
+
3
+ Contains _SolwynBase with config, budget logic, metadata formatting,
4
+ and pricing calculations. No I/O -- sync and async clients inherit
5
+ from this and add their own HTTP layer.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import uuid
11
+ from datetime import UTC, datetime
12
+
13
+ from solwyn._token_details import TokenDetails
14
+ from solwyn._types import CallStatus, MetadataEvent, ProviderName
15
+ from solwyn.circuit_breaker import CircuitBreaker
16
+ from solwyn.config import SolwynConfig
17
+ from solwyn.exceptions import ProviderUnavailableError
18
+ from solwyn.tokenizer import TokenizerManager
19
+
20
+
21
+ class _SolwynBase:
22
+ """Shared sans-I/O base class for Solwyn sync and async clients.
23
+
24
+ Provides:
25
+ - Token estimation and cost calculation
26
+ - Metadata event construction
27
+ - Budget request construction
28
+ - Circuit breaker management and provider selection
29
+ - SDK instance identity
30
+ """
31
+
32
+ def __init__(self, config: SolwynConfig) -> None:
33
+ self._config = config
34
+ self._sdk_instance_id = str(uuid.uuid4())
35
+ self._tokenizer = TokenizerManager()
36
+
37
+ # One circuit breaker per configured provider
38
+ self._circuit_breakers: dict[str, CircuitBreaker] = {}
39
+ self._circuit_breakers[config.primary_provider.value] = CircuitBreaker(
40
+ failure_threshold=config.circuit_breaker_failure_threshold,
41
+ recovery_timeout=config.circuit_breaker_recovery_timeout,
42
+ success_threshold=config.circuit_breaker_success_threshold,
43
+ )
44
+ if config.fallback_provider is not None:
45
+ self._circuit_breakers[config.fallback_provider.value] = CircuitBreaker(
46
+ failure_threshold=config.circuit_breaker_failure_threshold,
47
+ recovery_timeout=config.circuit_breaker_recovery_timeout,
48
+ success_threshold=config.circuit_breaker_success_threshold,
49
+ )
50
+
51
+ def _build_metadata_event(
52
+ self,
53
+ *,
54
+ project_id: str,
55
+ model: str,
56
+ provider: str,
57
+ input_tokens: int,
58
+ output_tokens: int,
59
+ token_details: TokenDetails | None,
60
+ latency_ms: float,
61
+ status: CallStatus,
62
+ is_failover: bool,
63
+ sdk_instance_id: str | None = None,
64
+ timestamp: datetime | None = None,
65
+ ) -> MetadataEvent:
66
+ """Build a MetadataEvent for reporting to the cloud API."""
67
+ return MetadataEvent(
68
+ project_id=project_id,
69
+ model=model,
70
+ provider=ProviderName(provider),
71
+ input_tokens=input_tokens,
72
+ output_tokens=output_tokens,
73
+ token_details=token_details,
74
+ latency_ms=latency_ms,
75
+ status=status,
76
+ is_failover=is_failover,
77
+ sdk_instance_id=sdk_instance_id or self._sdk_instance_id,
78
+ timestamp=timestamp or datetime.now(UTC),
79
+ )
80
+
81
+ def _get_circuit_breaker(self, provider: str) -> CircuitBreaker:
82
+ """Get the circuit breaker for a provider.
83
+
84
+ Lazily creates a circuit breaker if one doesn't exist for this provider.
85
+ """
86
+ if provider not in self._circuit_breakers:
87
+ self._circuit_breakers[provider] = CircuitBreaker(
88
+ failure_threshold=self._config.circuit_breaker_failure_threshold,
89
+ recovery_timeout=self._config.circuit_breaker_recovery_timeout,
90
+ success_threshold=self._config.circuit_breaker_success_threshold,
91
+ )
92
+ return self._circuit_breakers[provider]
93
+
94
+ def _select_provider(self) -> str:
95
+ """Select the best available provider via circuit breaker checks.
96
+
97
+ Checks the primary provider first. If its circuit is open and a
98
+ fallback is configured, checks the fallback. If both are open,
99
+ raises ProviderUnavailableError.
100
+
101
+ Returns:
102
+ The selected provider name (e.g. "openai" or "anthropic").
103
+
104
+ Raises:
105
+ ProviderUnavailableError: If all providers have open circuits.
106
+ """
107
+ primary = self._config.primary_provider.value
108
+ primary_cb = self._get_circuit_breaker(primary)
109
+
110
+ if primary_cb.can_proceed():
111
+ return primary
112
+
113
+ # Primary is open -- try fallback
114
+ if self._config.fallback_provider is not None:
115
+ fallback = self._config.fallback_provider.value
116
+ fallback_cb = self._get_circuit_breaker(fallback)
117
+
118
+ if fallback_cb.can_proceed():
119
+ return fallback
120
+
121
+ # Both open
122
+ raise ProviderUnavailableError(
123
+ f"All providers unavailable: {primary} and {fallback} circuits are open",
124
+ provider=primary,
125
+ circuit_state=primary_cb.state.value,
126
+ )
127
+
128
+ # No fallback configured, primary is open
129
+ raise ProviderUnavailableError(
130
+ f"Provider {primary} is unavailable and no fallback is configured",
131
+ provider=primary,
132
+ circuit_state=primary_cb.state.value,
133
+ )
solwyn/_privacy.py ADDED
@@ -0,0 +1,93 @@
1
+ """Private, privacy-sensitive helpers — PRIVACY CRITICAL.
2
+
3
+ PRIVACY
4
+ =======
5
+ This module is the only place in the SDK that touches customer prompt
6
+ content directly. Code here must obey three rules:
7
+
8
+ 1. NEVER pass prompt content to a logger (`logger.*`) — not even in
9
+ a formatted string, not even at DEBUG level. CI enforces this
10
+ with `tests/unit/test_privacy_firewall.py`.
11
+ 2. NEVER store prompt content on a long-lived object — compute and
12
+ discard within the current function call.
13
+ 3. NEVER include prompt content in exception arguments. If a
14
+ computation fails, log `type(exc).__name__` only.
15
+
16
+ If you add a new helper here, add a corresponding enforcement test.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any
22
+
23
+
24
+ def estimate_content_length(kwargs: dict[str, Any]) -> int:
25
+ """Return the total character length of prompt content in kwargs.
26
+
27
+ Walks messages/system/contents and sums string lengths WITHOUT
28
+ concatenating them into a joined string. The returned integer is
29
+ safe to log — it is not reversible to prompt content.
30
+
31
+ Args:
32
+ kwargs: The LLM call kwargs dict. Handles OpenAI/Anthropic
33
+ messages, Anthropic system prompt, and Google contents.
34
+
35
+ Returns:
36
+ Total character count (0 if no recognizable content keys).
37
+ """
38
+ total = 0
39
+
40
+ messages = kwargs.get("messages", [])
41
+ for msg in messages:
42
+ if not isinstance(msg, dict):
43
+ continue
44
+ content = msg.get("content", "")
45
+ if isinstance(content, str):
46
+ total += len(content)
47
+ elif isinstance(content, list):
48
+ for block in content:
49
+ if isinstance(block, dict):
50
+ text = block.get("text", "")
51
+ if isinstance(text, str):
52
+ total += len(text)
53
+
54
+ system = kwargs.get("system")
55
+ if isinstance(system, str):
56
+ total += len(system)
57
+
58
+ contents = kwargs.get("contents")
59
+ if isinstance(contents, str):
60
+ total += len(contents)
61
+ elif isinstance(contents, list):
62
+ for item in contents:
63
+ if isinstance(item, str):
64
+ total += len(item)
65
+ elif isinstance(item, dict):
66
+ text = item.get("text", "")
67
+ if isinstance(text, str):
68
+ total += len(text)
69
+
70
+ return total
71
+
72
+
73
+ def estimate_tokens_from_length(char_count: int, provider: str) -> int:
74
+ """Convert a character count to a token estimate using per-provider ratios.
75
+
76
+ These are heuristic ratios that match tiktoken's observed behavior.
77
+ They are NOT tiktoken-exact — the exact path is intentionally removed
78
+ because it required materializing the joined prompt text.
79
+
80
+ Args:
81
+ char_count: Number of characters in the prompt content.
82
+ provider: One of "openai", "anthropic", "google".
83
+
84
+ Returns:
85
+ Estimated token count.
86
+ """
87
+ ratio_by_provider = {
88
+ "openai": 4.0,
89
+ "anthropic": 3.8,
90
+ "google": 4.0,
91
+ }
92
+ ratio = ratio_by_provider.get(provider, 4.0)
93
+ return max(1, int(char_count / ratio))
solwyn/_proxies.py ADDED
@@ -0,0 +1,165 @@
1
+ """Provider-specific proxy classes for LLM API interception.
2
+
3
+ These thin delegation wrappers let ``Solwyn.chat.completions.create()``
4
+ (and the Anthropic/Google equivalents) route through ``_intercepted_call``
5
+ while passing everything else through to the underlying client.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from solwyn._types import ProviderName
13
+
14
+ if TYPE_CHECKING:
15
+ from solwyn.client import AsyncSolwyn, Solwyn
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Sync proxies
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ class _SyncChatCompletionsProxy:
24
+ """Proxy for client.chat.completions that intercepts create()."""
25
+
26
+ def __init__(self, solwyn: Solwyn) -> None:
27
+ self._solwyn = solwyn
28
+
29
+ def create(self, **kwargs: Any) -> Any:
30
+ """Intercept chat.completions.create() with budget/circuit/reporting."""
31
+ return self._solwyn._intercepted_call(**kwargs)
32
+
33
+ def __getattr__(self, name: str) -> Any:
34
+ """Pass through non-create attributes to OpenAI's chat.completions."""
35
+ return getattr(self._solwyn._client.chat.completions, name)
36
+
37
+
38
+ class _SyncChatProxy:
39
+ """Proxy for client.chat that provides .completions.create()."""
40
+
41
+ def __init__(self, solwyn: Solwyn) -> None:
42
+ self._solwyn = solwyn
43
+ self.completions = _SyncChatCompletionsProxy(solwyn)
44
+
45
+ def __getattr__(self, name: str) -> Any:
46
+ """Pass through non-completions attributes (OpenAI only).
47
+
48
+ This proxy is only constructed for OpenAI clients. Any attribute
49
+ that is not ``completions`` (set in __init__) falls through here.
50
+ """
51
+ if self._solwyn._detected_provider == ProviderName.OPENAI:
52
+ return getattr(self._solwyn._client.chat, name)
53
+ raise AttributeError(
54
+ f"'chat.{name}' is not supported. "
55
+ f"The Solwyn chat proxy is OpenAI-specific; Anthropic uses "
56
+ f"'messages' and Google uses 'models'."
57
+ )
58
+
59
+
60
+ class _SyncMessagesProxy:
61
+ """Proxy for client.messages that intercepts create().
62
+
63
+ Enables ``client.messages.create()`` (Anthropic's documented API)
64
+ to go through _intercepted_call instead of __getattr__ pass-through.
65
+ """
66
+
67
+ def __init__(self, solwyn: Solwyn) -> None:
68
+ self._solwyn = solwyn
69
+
70
+ def create(self, **kwargs: Any) -> Any:
71
+ return self._solwyn._intercepted_call(**kwargs)
72
+
73
+ def __getattr__(self, name: str) -> Any:
74
+ return getattr(self._solwyn._client.messages, name)
75
+
76
+
77
+ class _SyncModelsProxy:
78
+ """Proxy for client.models that intercepts generate_content() and generate_content_stream().
79
+
80
+ Enables ``client.models.generate_content()`` (Google's documented API)
81
+ to go through _intercepted_call. The generate_content_stream() method
82
+ passes _force_stream=True so _intercepted_call dispatches to the correct
83
+ underlying SDK method.
84
+ """
85
+
86
+ def __init__(self, solwyn: Solwyn) -> None:
87
+ self._solwyn = solwyn
88
+
89
+ def generate_content(self, **kwargs: Any) -> Any:
90
+ return self._solwyn._intercepted_call(**kwargs)
91
+
92
+ def generate_content_stream(self, **kwargs: Any) -> Any:
93
+ return self._solwyn._intercepted_call(_force_stream=True, **kwargs)
94
+
95
+ def __getattr__(self, name: str) -> Any:
96
+ return getattr(self._solwyn._client.models, name)
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Async proxies
101
+ # ---------------------------------------------------------------------------
102
+
103
+
104
+ class _AsyncChatCompletionsProxy:
105
+ """Async proxy for client.chat.completions that intercepts create()."""
106
+
107
+ def __init__(self, solwyn: AsyncSolwyn) -> None:
108
+ self._solwyn = solwyn
109
+
110
+ async def create(self, **kwargs: Any) -> Any:
111
+ """Intercept chat.completions.create() with budget/circuit/reporting."""
112
+ return await self._solwyn._intercepted_call(**kwargs)
113
+
114
+ def __getattr__(self, name: str) -> Any:
115
+ """Pass through non-create attributes to OpenAI's chat.completions."""
116
+ return getattr(self._solwyn._client.chat.completions, name)
117
+
118
+
119
+ class _AsyncChatProxy:
120
+ """Async proxy for client.chat that provides .completions.create()."""
121
+
122
+ def __init__(self, solwyn: AsyncSolwyn) -> None:
123
+ self._solwyn = solwyn
124
+ self.completions = _AsyncChatCompletionsProxy(solwyn)
125
+
126
+ def __getattr__(self, name: str) -> Any:
127
+ if self._solwyn._detected_provider == ProviderName.OPENAI:
128
+ return getattr(self._solwyn._client.chat, name)
129
+ raise AttributeError(
130
+ f"'chat.{name}' is not supported. "
131
+ f"The Solwyn chat proxy is OpenAI-specific; Anthropic uses "
132
+ f"'messages' and Google uses 'models'."
133
+ )
134
+
135
+
136
+ class _AsyncMessagesProxy:
137
+ """Async proxy for client.messages that intercepts create()."""
138
+
139
+ def __init__(self, solwyn: AsyncSolwyn) -> None:
140
+ self._solwyn = solwyn
141
+
142
+ async def create(self, **kwargs: Any) -> Any:
143
+ return await self._solwyn._intercepted_call(**kwargs)
144
+
145
+ def __getattr__(self, name: str) -> Any:
146
+ return getattr(self._solwyn._client.messages, name)
147
+
148
+
149
+ class _AsyncModelsProxy:
150
+ """Async proxy for client.models.
151
+
152
+ Intercepts generate_content() and generate_content_stream().
153
+ """
154
+
155
+ def __init__(self, solwyn: AsyncSolwyn) -> None:
156
+ self._solwyn = solwyn
157
+
158
+ async def generate_content(self, **kwargs: Any) -> Any:
159
+ return await self._solwyn._intercepted_call(**kwargs)
160
+
161
+ async def generate_content_stream(self, **kwargs: Any) -> Any:
162
+ return await self._solwyn._intercepted_call(_force_stream=True, **kwargs)
163
+
164
+ def __getattr__(self, name: str) -> Any:
165
+ return getattr(self._solwyn._client.models, name)
@@ -0,0 +1,45 @@
1
+ """TokenDetails — normalized token usage breakdown.
2
+
3
+ Normalized token usage breakdown for one LLM call.
4
+ """
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+
9
+ class TokenDetails(BaseModel):
10
+ """Normalized token usage breakdown for one LLM call.
11
+
12
+ Provider adapters populate whichever fields their API exposes; the rest
13
+ stay at 0. The API uses this struct to compute exact costs rather than
14
+ trusting SDK-side estimates.
15
+ """
16
+
17
+ model_config = ConfigDict(extra="forbid")
18
+
19
+ input_tokens: int = Field(default=0, ge=0, description="Total input tokens (normalized)")
20
+ output_tokens: int = Field(default=0, ge=0, description="Total output tokens (normalized)")
21
+ cached_input_tokens: int = Field(
22
+ default=0, ge=0, description="Input tokens served from prompt cache"
23
+ )
24
+ cache_creation_tokens: int = Field(
25
+ default=0, ge=0, description="Input tokens written to prompt cache (Anthropic)"
26
+ )
27
+ reasoning_tokens: int = Field(
28
+ default=0, ge=0, description="Tokens used for chain-of-thought / thinking"
29
+ )
30
+ audio_input_tokens: int = Field(default=0, ge=0, description="Audio input tokens (OpenAI)")
31
+ audio_output_tokens: int = Field(default=0, ge=0, description="Audio output tokens (OpenAI)")
32
+ accepted_prediction_tokens: int = Field(
33
+ default=0, ge=0, description="Predicted output tokens accepted (OpenAI)"
34
+ )
35
+ rejected_prediction_tokens: int = Field(
36
+ default=0, ge=0, description="Predicted output tokens rejected (OpenAI)"
37
+ )
38
+ tool_use_input_tokens: int = Field(
39
+ default=0, ge=0, description="Tokens used for tool/function definitions (Google)"
40
+ )
41
+
42
+ @property
43
+ def total_tokens(self) -> int:
44
+ """Input plus output tokens. Excluded from serialization."""
45
+ return self.input_tokens + self.output_tokens
solwyn/_types.py ADDED
@@ -0,0 +1,122 @@
1
+ """Vendored enums and wire-format models for SDK <-> API contracts.
2
+
3
+ Pydantic models for API request/response contracts.
4
+ Excludes API-internal types: ProjectConfig, ProviderHealth,
5
+ NotificationEventType, Environment, BudgetPeriod.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from datetime import datetime
11
+ from enum import StrEnum
12
+
13
+ from pydantic import BaseModel, ConfigDict, Field
14
+
15
+ # TokenDetails lives in a separate module to avoid a circular import:
16
+ # _types -> TokenDetails -> (if merged here) _types.
17
+ from solwyn._token_details import TokenDetails
18
+
19
+ # ── Enums ────────────────────────────────────────────────────────────────
20
+
21
+
22
+ class BudgetMode(StrEnum):
23
+ """How the SDK reacts when a budget limit is reached."""
24
+
25
+ ALERT_ONLY = "alert_only"
26
+ HARD_DENY = "hard_deny"
27
+
28
+
29
+ class CircuitState(StrEnum):
30
+ """Circuit breaker states for provider health tracking."""
31
+
32
+ CLOSED = "closed" # Normal operation — requests flow through
33
+ OPEN = "open" # Failing — reject requests, try fallback
34
+ HALF_OPEN = "half_open" # Testing recovery — allow probe requests
35
+
36
+
37
+ class ProviderName(StrEnum):
38
+ """Supported LLM provider identifiers."""
39
+
40
+ OPENAI = "openai"
41
+ ANTHROPIC = "anthropic"
42
+ GOOGLE = "google"
43
+
44
+
45
+ class CallStatus(StrEnum):
46
+ """Outcome status for LLM call metadata events."""
47
+
48
+ SUCCESS = "success"
49
+ ERROR = "error"
50
+ BUDGET_DENIED = "budget_denied"
51
+
52
+
53
+ # ── Wire-format models ──────────────────────────────────────────────────
54
+
55
+
56
+ class MetadataEvent(BaseModel):
57
+ """Telemetry event sent from SDK to API after each LLM call.
58
+
59
+ Contains token/latency metadata only — never prompts, responses, or
60
+ SDK-computed costs.
61
+ """
62
+
63
+ model_config = ConfigDict(extra="forbid")
64
+
65
+ project_id: str = Field(..., description="Project identifier (proj_...)")
66
+ model: str = Field(..., max_length=100, description="LLM model name (e.g. gpt-4o)")
67
+ provider: ProviderName = Field(..., description="LLM provider")
68
+ input_tokens: int = Field(..., ge=0, description="Input token count")
69
+ output_tokens: int = Field(..., ge=0, description="Output token count")
70
+ token_details: TokenDetails | None = Field(
71
+ None, description="Full token breakdown from provider adapter"
72
+ )
73
+ latency_ms: float = Field(..., description="End-to-end call latency in ms")
74
+ status: CallStatus = Field(..., description="Call outcome")
75
+ is_failover: bool = Field(..., description="Whether this call used a fallback provider")
76
+ sdk_instance_id: str = Field(..., description="Unique SDK instance identifier")
77
+ timestamp: datetime = Field(..., description="When the LLM call completed (UTC)")
78
+
79
+
80
+ class BudgetCheckRequest(BaseModel):
81
+ """Pre-flight budget check sent before an LLM call."""
82
+
83
+ model_config = ConfigDict(extra="forbid")
84
+
85
+ project_id: str = Field(..., description="Project identifier (proj_...)")
86
+ estimated_input_tokens: int = Field(
87
+ ..., ge=0, description="Estimated input token count for the pending call"
88
+ )
89
+ model: str = Field(..., max_length=100, description="LLM model name")
90
+ provider: ProviderName = Field(..., description="Target provider")
91
+
92
+
93
+ class BudgetCheckResponse(BaseModel):
94
+ """API response to a budget check request."""
95
+
96
+ model_config = ConfigDict(extra="forbid")
97
+
98
+ allowed: bool = Field(..., description="Whether the call is within budget")
99
+ remaining_budget: float = Field(..., description="Remaining budget in USD for current period")
100
+ reservation_id: str | None = Field(
101
+ None, description="Budget reservation ID (for cost reconciliation)"
102
+ )
103
+ mode: BudgetMode = Field(..., description="Current budget enforcement mode")
104
+ budget_limit: float = Field(..., description="Total budget limit for current period in USD")
105
+ current_usage: float = Field(..., description="Current spend in USD for this period")
106
+ denied_by_period: str | None = Field(
107
+ ..., description="Which budget period triggered denial (e.g. 'daily')"
108
+ )
109
+
110
+
111
+ class BudgetConfirmRequest(BaseModel):
112
+ """Post-call budget confirmation sent after an LLM call completes."""
113
+
114
+ model_config = ConfigDict(extra="forbid")
115
+
116
+ reservation_id: str = Field(
117
+ ..., description="Budget reservation ID returned by BudgetCheckResponse"
118
+ )
119
+ model: str = Field(..., max_length=100, description="LLM model name used for the call")
120
+ token_details: TokenDetails = Field(
121
+ ..., description="Actual token breakdown from the provider adapter"
122
+ )
solwyn/_validation.py ADDED
@@ -0,0 +1,60 @@
1
+ """Project ID and API key validation.
2
+
3
+ API key and project ID format validation.
4
+
5
+ Security features applied to every validator:
6
+ - Unicode NFC normalization to prevent homograph attacks
7
+ - ASCII-only enforcement to prevent encoding exploits
8
+ - Regex pattern validation for allowed characters
9
+ - Path traversal prevention (reject ``..``, ``/``, ``\\``)
10
+ """
11
+
12
+ import re
13
+ import unicodedata
14
+ from typing import Final
15
+
16
+ PROJECT_ID_PATTERN: Final = re.compile(r"^proj_[a-zA-Z0-9]{8,32}$")
17
+ API_KEY_PATTERN: Final = re.compile(r"^sk_solwyn_[a-zA-Z0-9]{32,64}$")
18
+
19
+
20
+ def _security_checks(value: str, label: str) -> str:
21
+ """Common security checks shared by all validators."""
22
+ if not value:
23
+ raise ValueError(f"{label} cannot be empty")
24
+
25
+ value = unicodedata.normalize("NFC", value)
26
+
27
+ if not value.isascii():
28
+ raise ValueError(f"Invalid {label}: must contain only ASCII characters")
29
+
30
+ if ".." in value or "/" in value or "\\" in value:
31
+ raise ValueError(f"Invalid {label}: path traversal patterns not allowed")
32
+
33
+ return value
34
+
35
+
36
+ def validate_project_id(project_id: str) -> str:
37
+ """Validate and return a project ID (canonical implementation)."""
38
+ project_id = _security_checks(project_id, "project ID")
39
+
40
+ if not PROJECT_ID_PATTERN.match(project_id):
41
+ display = f"{project_id[:20]}..." if len(project_id) > 20 else project_id
42
+ raise ValueError(
43
+ f"Invalid project ID: must match proj_<8-32 alphanumeric chars>. Got: {display}"
44
+ )
45
+
46
+ return project_id
47
+
48
+
49
+ def validate_api_key_format(api_key: str) -> str:
50
+ """Validate and return an API key (format check only — not authentication)."""
51
+ api_key = _security_checks(api_key, "API key")
52
+
53
+ if not API_KEY_PATTERN.match(api_key):
54
+ display = f"{api_key[:12]}..." if len(api_key) > 12 else "<too short>"
55
+ raise ValueError(
56
+ f"Invalid API key format: must match sk_solwyn_<32-64 alphanumeric chars>. "
57
+ f"Got: {display}"
58
+ )
59
+
60
+ return api_key