sol-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
solana_mcp/config.py ADDED
@@ -0,0 +1,324 @@
1
+ """Configuration management for solana-mcp.
2
+
3
+ Supports loading configuration from:
4
+ 1. Default values
5
+ 2. Config file (~/.solana-mcp/config.yaml)
6
+ 3. Environment variables (for secrets like API keys)
7
+
8
+ Configuration precedence: env vars > config file > defaults
9
+ """
10
+
11
+ import os
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import yaml
17
+
18
+ from .logging import get_logger
19
+
20
+ logger = get_logger("config")
21
+
22
+ # Default values
23
+ DEFAULT_EMBEDDING_MODEL = "all-MiniLM-L6-v2"
24
+ DEFAULT_BATCH_SIZE = 32
25
+ DEFAULT_CHUNK_SIZE = 1000
26
+ DEFAULT_CHUNK_OVERLAP = 200
27
+
28
+ # Supported embedding models with their properties
29
+ EMBEDDING_MODELS = {
30
+ # Local models (sentence-transformers)
31
+ "all-MiniLM-L6-v2": {
32
+ "dimensions": 384,
33
+ "max_tokens": 256,
34
+ "type": "local",
35
+ "description": "Fast, lightweight fallback model",
36
+ },
37
+ "all-mpnet-base-v2": {
38
+ "dimensions": 768,
39
+ "max_tokens": 384,
40
+ "type": "local",
41
+ "description": "Better quality, moderate speed",
42
+ },
43
+ "codesage/codesage-large": {
44
+ "dimensions": 1024,
45
+ "max_tokens": 1024,
46
+ "type": "local",
47
+ "description": "Code-specialized, recommended for Rust source",
48
+ },
49
+ # API models (require API keys)
50
+ "voyage:voyage-code-3": {
51
+ "dimensions": 1024,
52
+ "max_tokens": 16000,
53
+ "type": "api",
54
+ "env_var": "VOYAGE_API_KEY",
55
+ "description": "Best quality, requires API key ($0.06/1M tokens)",
56
+ },
57
+ }
58
+
59
+
60
+ class ConfigError(Exception):
61
+ """Configuration error."""
62
+
63
+ pass
64
+
65
+
66
+ @dataclass
67
+ class EmbeddingConfig:
68
+ """Embedding model configuration."""
69
+
70
+ model: str = DEFAULT_EMBEDDING_MODEL
71
+ batch_size: int = DEFAULT_BATCH_SIZE
72
+
73
+ @property
74
+ def model_info(self) -> dict[str, Any]:
75
+ """Get model info from EMBEDDING_MODELS."""
76
+ return EMBEDDING_MODELS.get(self.model, {})
77
+
78
+ @property
79
+ def dimensions(self) -> int:
80
+ """Get embedding dimensions for the model."""
81
+ return self.model_info.get("dimensions", 384)
82
+
83
+ @property
84
+ def requires_api_key(self) -> bool:
85
+ """Check if model requires an API key."""
86
+ return self.model_info.get("type") == "api"
87
+
88
+ @property
89
+ def api_key_env_var(self) -> str | None:
90
+ """Get the environment variable name for the API key."""
91
+ return self.model_info.get("env_var")
92
+
93
+ def validate(self) -> None:
94
+ """Validate configuration.
95
+
96
+ Raises ConfigError if validation fails.
97
+ """
98
+ if self.model not in EMBEDDING_MODELS:
99
+ logger.warning(
100
+ "Unknown embedding model: %s. Using default settings.", self.model
101
+ )
102
+
103
+ if self.requires_api_key:
104
+ env_var = self.api_key_env_var
105
+ if env_var and not os.environ.get(env_var):
106
+ raise ConfigError(
107
+ f"Model {self.model} requires {env_var} environment variable"
108
+ )
109
+
110
+ if self.batch_size < 1:
111
+ raise ConfigError(f"batch_size must be >= 1, got {self.batch_size}")
112
+
113
+ if self.batch_size > 256:
114
+ logger.warning(
115
+ "Large batch_size (%d) may cause memory issues", self.batch_size
116
+ )
117
+
118
+
119
+ @dataclass
120
+ class ChunkingConfig:
121
+ """Document chunking configuration."""
122
+
123
+ chunk_size: int = DEFAULT_CHUNK_SIZE
124
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
125
+
126
+ def validate(self) -> None:
127
+ """Validate configuration.
128
+
129
+ Raises ConfigError if validation fails.
130
+ """
131
+ if self.chunk_size < 100:
132
+ raise ConfigError(f"chunk_size must be >= 100, got {self.chunk_size}")
133
+
134
+ if self.chunk_size > 10000:
135
+ raise ConfigError(f"chunk_size must be <= 10000, got {self.chunk_size}")
136
+
137
+ if self.chunk_overlap < 0:
138
+ raise ConfigError(f"chunk_overlap must be >= 0, got {self.chunk_overlap}")
139
+
140
+ if self.chunk_overlap >= self.chunk_size:
141
+ raise ConfigError(
142
+ f"chunk_overlap ({self.chunk_overlap}) must be < chunk_size ({self.chunk_size})"
143
+ )
144
+
145
+ def to_dict(self) -> dict[str, int]:
146
+ """Convert to dict for manifest storage."""
147
+ return {
148
+ "chunk_size": self.chunk_size,
149
+ "chunk_overlap": self.chunk_overlap,
150
+ }
151
+
152
+
153
+ @dataclass
154
+ class Config:
155
+ """Main configuration container."""
156
+
157
+ embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
158
+ chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
159
+
160
+ def validate(self) -> None:
161
+ """Validate all configuration."""
162
+ self.embedding.validate()
163
+ self.chunking.validate()
164
+
165
+
166
+ def load_config(config_path: Path | None = None, data_dir: Path | None = None) -> Config:
167
+ """
168
+ Load configuration from file and environment.
169
+
170
+ Args:
171
+ config_path: Explicit path to config file (optional)
172
+ data_dir: Data directory to look for config.yaml (optional)
173
+
174
+ Returns:
175
+ Validated Config object
176
+ """
177
+ config = Config()
178
+
179
+ # Determine config file path
180
+ if config_path is None and data_dir is not None:
181
+ config_path = data_dir / "config.yaml"
182
+
183
+ # Load from file if exists
184
+ if config_path and config_path.exists():
185
+ try:
186
+ config = _load_config_file(config_path)
187
+ logger.debug("Loaded config from %s", config_path)
188
+ except Exception as e:
189
+ logger.warning("Failed to load config from %s: %s", config_path, e)
190
+ config = Config()
191
+
192
+ # Override with environment variables
193
+ config = _apply_env_overrides(config)
194
+
195
+ # Validate
196
+ config.validate()
197
+
198
+ return config
199
+
200
+
201
+ def _load_config_file(config_path: Path) -> Config:
202
+ """Load configuration from YAML file."""
203
+ # Security: limit file size
204
+ max_size = 1024 * 1024 # 1MB
205
+ if config_path.stat().st_size > max_size:
206
+ raise ConfigError(f"Config file too large: {config_path.stat().st_size} > {max_size}")
207
+
208
+ with open(config_path, encoding="utf-8") as f:
209
+ # Use safe_load to prevent code execution
210
+ data = yaml.safe_load(f)
211
+
212
+ if data is None:
213
+ return Config()
214
+
215
+ if not isinstance(data, dict):
216
+ raise ConfigError("Config file must be a YAML mapping")
217
+
218
+ # Validate keys
219
+ allowed_keys = {"embedding", "chunking"}
220
+ unknown_keys = set(data.keys()) - allowed_keys
221
+ if unknown_keys:
222
+ logger.warning("Unknown config keys ignored: %s", unknown_keys)
223
+
224
+ # Parse embedding config
225
+ embedding_data = data.get("embedding", {})
226
+ if not isinstance(embedding_data, dict):
227
+ raise ConfigError("'embedding' must be a mapping")
228
+
229
+ embedding = EmbeddingConfig(
230
+ model=str(embedding_data.get("model", DEFAULT_EMBEDDING_MODEL)),
231
+ batch_size=int(embedding_data.get("batch_size", DEFAULT_BATCH_SIZE)),
232
+ )
233
+
234
+ # Parse chunking config
235
+ chunking_data = data.get("chunking", {})
236
+ if not isinstance(chunking_data, dict):
237
+ raise ConfigError("'chunking' must be a mapping")
238
+
239
+ chunking = ChunkingConfig(
240
+ chunk_size=int(chunking_data.get("chunk_size", DEFAULT_CHUNK_SIZE)),
241
+ chunk_overlap=int(chunking_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP)),
242
+ )
243
+
244
+ return Config(embedding=embedding, chunking=chunking)
245
+
246
+
247
+ def _apply_env_overrides(config: Config) -> Config:
248
+ """Apply environment variable overrides to config."""
249
+ # SOLANA_MCP_EMBEDDING_MODEL overrides config file
250
+ env_model = os.environ.get("SOLANA_MCP_EMBEDDING_MODEL")
251
+ if env_model:
252
+ config.embedding.model = env_model
253
+ logger.debug("Using embedding model from env: %s", env_model)
254
+
255
+ # SOLANA_MCP_BATCH_SIZE
256
+ env_batch = os.environ.get("SOLANA_MCP_BATCH_SIZE")
257
+ if env_batch:
258
+ try:
259
+ config.embedding.batch_size = int(env_batch)
260
+ except ValueError:
261
+ logger.warning("Invalid SOLANA_MCP_BATCH_SIZE: %s", env_batch)
262
+
263
+ return config
264
+
265
+
266
+ def save_config(config: Config, config_path: Path) -> None:
267
+ """
268
+ Save configuration to YAML file.
269
+
270
+ Args:
271
+ config: Configuration to save
272
+ config_path: Path to write config file
273
+ """
274
+ data = {
275
+ "embedding": {
276
+ "model": config.embedding.model,
277
+ "batch_size": config.embedding.batch_size,
278
+ },
279
+ "chunking": {
280
+ "chunk_size": config.chunking.chunk_size,
281
+ "chunk_overlap": config.chunking.chunk_overlap,
282
+ },
283
+ }
284
+
285
+ config_path.parent.mkdir(parents=True, exist_ok=True)
286
+
287
+ with open(config_path, "w", encoding="utf-8") as f:
288
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False)
289
+
290
+ logger.info("Saved config to %s", config_path)
291
+
292
+
293
+ def get_model_info(model_name: str | None = None) -> str:
294
+ """Get human-readable info about embedding models.
295
+
296
+ Args:
297
+ model_name: Specific model to get info for (None = all models)
298
+
299
+ Returns:
300
+ Formatted string with model information
301
+ """
302
+ if model_name:
303
+ if model_name not in EMBEDDING_MODELS:
304
+ return f"Unknown model: {model_name}"
305
+ info = EMBEDDING_MODELS[model_name]
306
+ return (
307
+ f"{model_name}:\n"
308
+ f" Dimensions: {info['dimensions']}\n"
309
+ f" Max tokens: {info['max_tokens']}\n"
310
+ f" Type: {info['type']}\n"
311
+ f" Description: {info['description']}"
312
+ )
313
+
314
+ lines = ["Available embedding models:\n"]
315
+ for name, info in EMBEDDING_MODELS.items():
316
+ marker = " (default)" if name == DEFAULT_EMBEDDING_MODEL else ""
317
+ api_note = " [requires API key]" if info["type"] == "api" else ""
318
+ lines.append(
319
+ f" {name}{marker}{api_note}\n"
320
+ f" {info['dimensions']} dims, {info['max_tokens']} tokens\n"
321
+ f" {info['description']}\n"
322
+ )
323
+
324
+ return "\n".join(lines)
@@ -0,0 +1,5 @@
1
+ """Expert guidance system for Solana."""
2
+
3
+ from .guidance import GuidanceEntry, get_expert_guidance, list_guidance_topics
4
+
5
+ __all__ = ["get_expert_guidance", "list_guidance_topics", "GuidanceEntry"]