sol-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sol_mcp-0.2.0.dist-info/METADATA +218 -0
- sol_mcp-0.2.0.dist-info/RECORD +20 -0
- sol_mcp-0.2.0.dist-info/WHEEL +4 -0
- sol_mcp-0.2.0.dist-info/entry_points.txt +3 -0
- solana_mcp/__init__.py +3 -0
- solana_mcp/cli.py +527 -0
- solana_mcp/config.py +324 -0
- solana_mcp/expert/__init__.py +5 -0
- solana_mcp/expert/guidance.py +452 -0
- solana_mcp/indexer/__init__.py +8 -0
- solana_mcp/indexer/chunker.py +457 -0
- solana_mcp/indexer/compiler.py +1101 -0
- solana_mcp/indexer/downloader.py +304 -0
- solana_mcp/indexer/embedder.py +755 -0
- solana_mcp/indexer/manifest.py +411 -0
- solana_mcp/logging.py +85 -0
- solana_mcp/models.py +62 -0
- solana_mcp/server.py +746 -0
- solana_mcp/tools/__init__.py +1 -0
- solana_mcp/versions.py +391 -0
solana_mcp/config.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Configuration management for solana-mcp.
|
|
2
|
+
|
|
3
|
+
Supports loading configuration from:
|
|
4
|
+
1. Default values
|
|
5
|
+
2. Config file (~/.solana-mcp/config.yaml)
|
|
6
|
+
3. Environment variables (for secrets like API keys)
|
|
7
|
+
|
|
8
|
+
Configuration precedence: env vars > config file > defaults
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
from .logging import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger("config")
|
|
21
|
+
|
|
22
|
+
# Default values
|
|
23
|
+
DEFAULT_EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
|
24
|
+
DEFAULT_BATCH_SIZE = 32
|
|
25
|
+
DEFAULT_CHUNK_SIZE = 1000
|
|
26
|
+
DEFAULT_CHUNK_OVERLAP = 200
|
|
27
|
+
|
|
28
|
+
# Supported embedding models with their properties
|
|
29
|
+
EMBEDDING_MODELS = {
|
|
30
|
+
# Local models (sentence-transformers)
|
|
31
|
+
"all-MiniLM-L6-v2": {
|
|
32
|
+
"dimensions": 384,
|
|
33
|
+
"max_tokens": 256,
|
|
34
|
+
"type": "local",
|
|
35
|
+
"description": "Fast, lightweight fallback model",
|
|
36
|
+
},
|
|
37
|
+
"all-mpnet-base-v2": {
|
|
38
|
+
"dimensions": 768,
|
|
39
|
+
"max_tokens": 384,
|
|
40
|
+
"type": "local",
|
|
41
|
+
"description": "Better quality, moderate speed",
|
|
42
|
+
},
|
|
43
|
+
"codesage/codesage-large": {
|
|
44
|
+
"dimensions": 1024,
|
|
45
|
+
"max_tokens": 1024,
|
|
46
|
+
"type": "local",
|
|
47
|
+
"description": "Code-specialized, recommended for Rust source",
|
|
48
|
+
},
|
|
49
|
+
# API models (require API keys)
|
|
50
|
+
"voyage:voyage-code-3": {
|
|
51
|
+
"dimensions": 1024,
|
|
52
|
+
"max_tokens": 16000,
|
|
53
|
+
"type": "api",
|
|
54
|
+
"env_var": "VOYAGE_API_KEY",
|
|
55
|
+
"description": "Best quality, requires API key ($0.06/1M tokens)",
|
|
56
|
+
},
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ConfigError(Exception):
|
|
61
|
+
"""Configuration error."""
|
|
62
|
+
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class EmbeddingConfig:
|
|
68
|
+
"""Embedding model configuration."""
|
|
69
|
+
|
|
70
|
+
model: str = DEFAULT_EMBEDDING_MODEL
|
|
71
|
+
batch_size: int = DEFAULT_BATCH_SIZE
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def model_info(self) -> dict[str, Any]:
|
|
75
|
+
"""Get model info from EMBEDDING_MODELS."""
|
|
76
|
+
return EMBEDDING_MODELS.get(self.model, {})
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def dimensions(self) -> int:
|
|
80
|
+
"""Get embedding dimensions for the model."""
|
|
81
|
+
return self.model_info.get("dimensions", 384)
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def requires_api_key(self) -> bool:
|
|
85
|
+
"""Check if model requires an API key."""
|
|
86
|
+
return self.model_info.get("type") == "api"
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def api_key_env_var(self) -> str | None:
|
|
90
|
+
"""Get the environment variable name for the API key."""
|
|
91
|
+
return self.model_info.get("env_var")
|
|
92
|
+
|
|
93
|
+
def validate(self) -> None:
|
|
94
|
+
"""Validate configuration.
|
|
95
|
+
|
|
96
|
+
Raises ConfigError if validation fails.
|
|
97
|
+
"""
|
|
98
|
+
if self.model not in EMBEDDING_MODELS:
|
|
99
|
+
logger.warning(
|
|
100
|
+
"Unknown embedding model: %s. Using default settings.", self.model
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if self.requires_api_key:
|
|
104
|
+
env_var = self.api_key_env_var
|
|
105
|
+
if env_var and not os.environ.get(env_var):
|
|
106
|
+
raise ConfigError(
|
|
107
|
+
f"Model {self.model} requires {env_var} environment variable"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if self.batch_size < 1:
|
|
111
|
+
raise ConfigError(f"batch_size must be >= 1, got {self.batch_size}")
|
|
112
|
+
|
|
113
|
+
if self.batch_size > 256:
|
|
114
|
+
logger.warning(
|
|
115
|
+
"Large batch_size (%d) may cause memory issues", self.batch_size
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class ChunkingConfig:
|
|
121
|
+
"""Document chunking configuration."""
|
|
122
|
+
|
|
123
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE
|
|
124
|
+
chunk_overlap: int = DEFAULT_CHUNK_OVERLAP
|
|
125
|
+
|
|
126
|
+
def validate(self) -> None:
|
|
127
|
+
"""Validate configuration.
|
|
128
|
+
|
|
129
|
+
Raises ConfigError if validation fails.
|
|
130
|
+
"""
|
|
131
|
+
if self.chunk_size < 100:
|
|
132
|
+
raise ConfigError(f"chunk_size must be >= 100, got {self.chunk_size}")
|
|
133
|
+
|
|
134
|
+
if self.chunk_size > 10000:
|
|
135
|
+
raise ConfigError(f"chunk_size must be <= 10000, got {self.chunk_size}")
|
|
136
|
+
|
|
137
|
+
if self.chunk_overlap < 0:
|
|
138
|
+
raise ConfigError(f"chunk_overlap must be >= 0, got {self.chunk_overlap}")
|
|
139
|
+
|
|
140
|
+
if self.chunk_overlap >= self.chunk_size:
|
|
141
|
+
raise ConfigError(
|
|
142
|
+
f"chunk_overlap ({self.chunk_overlap}) must be < chunk_size ({self.chunk_size})"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def to_dict(self) -> dict[str, int]:
|
|
146
|
+
"""Convert to dict for manifest storage."""
|
|
147
|
+
return {
|
|
148
|
+
"chunk_size": self.chunk_size,
|
|
149
|
+
"chunk_overlap": self.chunk_overlap,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class Config:
|
|
155
|
+
"""Main configuration container."""
|
|
156
|
+
|
|
157
|
+
embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
|
|
158
|
+
chunking: ChunkingConfig = field(default_factory=ChunkingConfig)
|
|
159
|
+
|
|
160
|
+
def validate(self) -> None:
|
|
161
|
+
"""Validate all configuration."""
|
|
162
|
+
self.embedding.validate()
|
|
163
|
+
self.chunking.validate()
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def load_config(config_path: Path | None = None, data_dir: Path | None = None) -> Config:
|
|
167
|
+
"""
|
|
168
|
+
Load configuration from file and environment.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
config_path: Explicit path to config file (optional)
|
|
172
|
+
data_dir: Data directory to look for config.yaml (optional)
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Validated Config object
|
|
176
|
+
"""
|
|
177
|
+
config = Config()
|
|
178
|
+
|
|
179
|
+
# Determine config file path
|
|
180
|
+
if config_path is None and data_dir is not None:
|
|
181
|
+
config_path = data_dir / "config.yaml"
|
|
182
|
+
|
|
183
|
+
# Load from file if exists
|
|
184
|
+
if config_path and config_path.exists():
|
|
185
|
+
try:
|
|
186
|
+
config = _load_config_file(config_path)
|
|
187
|
+
logger.debug("Loaded config from %s", config_path)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.warning("Failed to load config from %s: %s", config_path, e)
|
|
190
|
+
config = Config()
|
|
191
|
+
|
|
192
|
+
# Override with environment variables
|
|
193
|
+
config = _apply_env_overrides(config)
|
|
194
|
+
|
|
195
|
+
# Validate
|
|
196
|
+
config.validate()
|
|
197
|
+
|
|
198
|
+
return config
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _load_config_file(config_path: Path) -> Config:
|
|
202
|
+
"""Load configuration from YAML file."""
|
|
203
|
+
# Security: limit file size
|
|
204
|
+
max_size = 1024 * 1024 # 1MB
|
|
205
|
+
if config_path.stat().st_size > max_size:
|
|
206
|
+
raise ConfigError(f"Config file too large: {config_path.stat().st_size} > {max_size}")
|
|
207
|
+
|
|
208
|
+
with open(config_path, encoding="utf-8") as f:
|
|
209
|
+
# Use safe_load to prevent code execution
|
|
210
|
+
data = yaml.safe_load(f)
|
|
211
|
+
|
|
212
|
+
if data is None:
|
|
213
|
+
return Config()
|
|
214
|
+
|
|
215
|
+
if not isinstance(data, dict):
|
|
216
|
+
raise ConfigError("Config file must be a YAML mapping")
|
|
217
|
+
|
|
218
|
+
# Validate keys
|
|
219
|
+
allowed_keys = {"embedding", "chunking"}
|
|
220
|
+
unknown_keys = set(data.keys()) - allowed_keys
|
|
221
|
+
if unknown_keys:
|
|
222
|
+
logger.warning("Unknown config keys ignored: %s", unknown_keys)
|
|
223
|
+
|
|
224
|
+
# Parse embedding config
|
|
225
|
+
embedding_data = data.get("embedding", {})
|
|
226
|
+
if not isinstance(embedding_data, dict):
|
|
227
|
+
raise ConfigError("'embedding' must be a mapping")
|
|
228
|
+
|
|
229
|
+
embedding = EmbeddingConfig(
|
|
230
|
+
model=str(embedding_data.get("model", DEFAULT_EMBEDDING_MODEL)),
|
|
231
|
+
batch_size=int(embedding_data.get("batch_size", DEFAULT_BATCH_SIZE)),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Parse chunking config
|
|
235
|
+
chunking_data = data.get("chunking", {})
|
|
236
|
+
if not isinstance(chunking_data, dict):
|
|
237
|
+
raise ConfigError("'chunking' must be a mapping")
|
|
238
|
+
|
|
239
|
+
chunking = ChunkingConfig(
|
|
240
|
+
chunk_size=int(chunking_data.get("chunk_size", DEFAULT_CHUNK_SIZE)),
|
|
241
|
+
chunk_overlap=int(chunking_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP)),
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return Config(embedding=embedding, chunking=chunking)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _apply_env_overrides(config: Config) -> Config:
|
|
248
|
+
"""Apply environment variable overrides to config."""
|
|
249
|
+
# SOLANA_MCP_EMBEDDING_MODEL overrides config file
|
|
250
|
+
env_model = os.environ.get("SOLANA_MCP_EMBEDDING_MODEL")
|
|
251
|
+
if env_model:
|
|
252
|
+
config.embedding.model = env_model
|
|
253
|
+
logger.debug("Using embedding model from env: %s", env_model)
|
|
254
|
+
|
|
255
|
+
# SOLANA_MCP_BATCH_SIZE
|
|
256
|
+
env_batch = os.environ.get("SOLANA_MCP_BATCH_SIZE")
|
|
257
|
+
if env_batch:
|
|
258
|
+
try:
|
|
259
|
+
config.embedding.batch_size = int(env_batch)
|
|
260
|
+
except ValueError:
|
|
261
|
+
logger.warning("Invalid SOLANA_MCP_BATCH_SIZE: %s", env_batch)
|
|
262
|
+
|
|
263
|
+
return config
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def save_config(config: Config, config_path: Path) -> None:
|
|
267
|
+
"""
|
|
268
|
+
Save configuration to YAML file.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
config: Configuration to save
|
|
272
|
+
config_path: Path to write config file
|
|
273
|
+
"""
|
|
274
|
+
data = {
|
|
275
|
+
"embedding": {
|
|
276
|
+
"model": config.embedding.model,
|
|
277
|
+
"batch_size": config.embedding.batch_size,
|
|
278
|
+
},
|
|
279
|
+
"chunking": {
|
|
280
|
+
"chunk_size": config.chunking.chunk_size,
|
|
281
|
+
"chunk_overlap": config.chunking.chunk_overlap,
|
|
282
|
+
},
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
286
|
+
|
|
287
|
+
with open(config_path, "w", encoding="utf-8") as f:
|
|
288
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
|
289
|
+
|
|
290
|
+
logger.info("Saved config to %s", config_path)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def get_model_info(model_name: str | None = None) -> str:
|
|
294
|
+
"""Get human-readable info about embedding models.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
model_name: Specific model to get info for (None = all models)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Formatted string with model information
|
|
301
|
+
"""
|
|
302
|
+
if model_name:
|
|
303
|
+
if model_name not in EMBEDDING_MODELS:
|
|
304
|
+
return f"Unknown model: {model_name}"
|
|
305
|
+
info = EMBEDDING_MODELS[model_name]
|
|
306
|
+
return (
|
|
307
|
+
f"{model_name}:\n"
|
|
308
|
+
f" Dimensions: {info['dimensions']}\n"
|
|
309
|
+
f" Max tokens: {info['max_tokens']}\n"
|
|
310
|
+
f" Type: {info['type']}\n"
|
|
311
|
+
f" Description: {info['description']}"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
lines = ["Available embedding models:\n"]
|
|
315
|
+
for name, info in EMBEDDING_MODELS.items():
|
|
316
|
+
marker = " (default)" if name == DEFAULT_EMBEDDING_MODEL else ""
|
|
317
|
+
api_note = " [requires API key]" if info["type"] == "api" else ""
|
|
318
|
+
lines.append(
|
|
319
|
+
f" {name}{marker}{api_note}\n"
|
|
320
|
+
f" {info['dimensions']} dims, {info['max_tokens']} tokens\n"
|
|
321
|
+
f" {info['description']}\n"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return "\n".join(lines)
|