solar-data-mcp-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. solar_data_mcp_core-0.1.0/.gitignore +20 -0
  2. solar_data_mcp_core-0.1.0/LICENSE +21 -0
  3. solar_data_mcp_core-0.1.0/PKG-INFO +41 -0
  4. solar_data_mcp_core-0.1.0/README.md +22 -0
  5. solar_data_mcp_core-0.1.0/pyproject.toml +34 -0
  6. solar_data_mcp_core-0.1.0/src/solar_mcp_core/__init__.py +3 -0
  7. solar_data_mcp_core-0.1.0/src/solar_mcp_core/bulk.py +256 -0
  8. solar_data_mcp_core-0.1.0/src/solar_mcp_core/cache.py +111 -0
  9. solar_data_mcp_core-0.1.0/src/solar_mcp_core/cli.py +112 -0
  10. solar_data_mcp_core-0.1.0/src/solar_mcp_core/config.py +175 -0
  11. solar_data_mcp_core-0.1.0/src/solar_mcp_core/envelope.py +64 -0
  12. solar_data_mcp_core-0.1.0/src/solar_mcp_core/errors.py +42 -0
  13. solar_data_mcp_core-0.1.0/src/solar_mcp_core/http.py +253 -0
  14. solar_data_mcp_core-0.1.0/src/solar_mcp_core/localfile.py +40 -0
  15. solar_data_mcp_core-0.1.0/src/solar_mcp_core/net.py +67 -0
  16. solar_data_mcp_core-0.1.0/src/solar_mcp_core/py.typed +0 -0
  17. solar_data_mcp_core-0.1.0/src/solar_mcp_core/ratelimit.py +64 -0
  18. solar_data_mcp_core-0.1.0/src/solar_mcp_core/redact.py +27 -0
  19. solar_data_mcp_core-0.1.0/src/solar_mcp_core/units.py +28 -0
  20. solar_data_mcp_core-0.1.0/src/solar_mcp_core/validation.py +119 -0
  21. solar_data_mcp_core-0.1.0/tests/test_bulk.py +51 -0
  22. solar_data_mcp_core-0.1.0/tests/test_cache.py +90 -0
  23. solar_data_mcp_core-0.1.0/tests/test_cli.py +161 -0
  24. solar_data_mcp_core-0.1.0/tests/test_config.py +44 -0
  25. solar_data_mcp_core-0.1.0/tests/test_core_download_guard.py +45 -0
  26. solar_data_mcp_core-0.1.0/tests/test_core_localfile.py +42 -0
  27. solar_data_mcp_core-0.1.0/tests/test_core_redact_perms.py +71 -0
  28. solar_data_mcp_core-0.1.0/tests/test_envelope.py +32 -0
  29. solar_data_mcp_core-0.1.0/tests/test_errors.py +33 -0
  30. solar_data_mcp_core-0.1.0/tests/test_fixture_hygiene.py +38 -0
  31. solar_data_mcp_core-0.1.0/tests/test_http.py +235 -0
  32. solar_data_mcp_core-0.1.0/tests/test_ratelimit.py +49 -0
  33. solar_data_mcp_core-0.1.0/tests/test_validation.py +23 -0
@@ -0,0 +1,20 @@
1
+ # Environments & secrets
2
+ .env
3
+ .venv/
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+
12
+ # Tooling
13
+ .pytest_cache/
14
+ .mypy_cache/
15
+ .ruff_cache/
16
+ .coverage
17
+ coverage.xml
18
+
19
+ # OS
20
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Logan Bernard
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: solar-data-mcp-core
3
+ Version: 0.1.0
4
+ Summary: Shared infrastructure for solar-data-mcp servers: HTTP client, cache, rate limiting, result envelope
5
+ Project-URL: Homepage, https://github.com/hoodsy/solar-data-mcp
6
+ Author: Logan Bernard
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: mcp,nrel,open-data,solar
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering
15
+ Requires-Python: >=3.11
16
+ Requires-Dist: httpx<1,>=0.27
17
+ Requires-Dist: pydantic<3,>=2.7
18
+ Description-Content-Type: text/markdown
19
+
20
+ # solar-data-mcp-core
21
+
22
+ Shared infrastructure for [solar-data-mcp](https://github.com/hoodsy/solar-data-mcp) —
23
+ US open solar data, agent-accessible over MCP. One install gives an agent production
24
+ modeling, solar economics, market intelligence, and generation forecasts, with every
25
+ number carrying `data + units + source + assumptions + warnings`.
26
+
27
+ This package is the plumbing the servers share: the HTTP client (retry, token-bucket
28
+ rate limiting, SQLite cache), the `ToolResult` envelope, the DuckDB bulk store, the
29
+ error taxonomy, and `solar-data-mcp doctor`. You usually don't install it directly.
30
+
31
+ Where to go instead:
32
+
33
+ | You want | Go to |
34
+ |---|---|
35
+ | Everything (18 tools + 11 skills + 4 report prompts, one install) | [`solar-data-mcp`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-data-mcp/README.md) |
36
+ | Production & sizing only (PVWatts, NSRDB) | [`solar-data-mcp-nrel`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/nrel-solar/README.md) |
37
+ | Tariffs, incentives & ROI only | [`solar-data-mcp-economics`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-economics/README.md) |
38
+ | Installed prices, permitting & utility-scale only | [`solar-data-mcp-market`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-market/README.md) |
39
+ | 48-hour generation forecasts only | [`solar-data-mcp-forecast`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-forecast/README.md) |
40
+ | The skill catalog, routing design & report templates | [`docs/skills.md`](https://github.com/hoodsy/solar-data-mcp/blob/main/docs/skills.md) |
41
+ | Quickstart & agent config snippets | [repo README](https://github.com/hoodsy/solar-data-mcp#quickstart) |
@@ -0,0 +1,22 @@
1
+ # solar-data-mcp-core
2
+
3
+ Shared infrastructure for [solar-data-mcp](https://github.com/hoodsy/solar-data-mcp) —
4
+ US open solar data, agent-accessible over MCP. One install gives an agent production
5
+ modeling, solar economics, market intelligence, and generation forecasts, with every
6
+ number carrying `data + units + source + assumptions + warnings`.
7
+
8
+ This package is the plumbing the servers share: the HTTP client (retry, token-bucket
9
+ rate limiting, SQLite cache), the `ToolResult` envelope, the DuckDB bulk store, the
10
+ error taxonomy, and `solar-data-mcp doctor`. You usually don't install it directly.
11
+
12
+ Where to go instead:
13
+
14
+ | You want | Go to |
15
+ |---|---|
16
+ | Everything (18 tools + 11 skills + 4 report prompts, one install) | [`solar-data-mcp`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-data-mcp/README.md) |
17
+ | Production & sizing only (PVWatts, NSRDB) | [`solar-data-mcp-nrel`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/nrel-solar/README.md) |
18
+ | Tariffs, incentives & ROI only | [`solar-data-mcp-economics`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-economics/README.md) |
19
+ | Installed prices, permitting & utility-scale only | [`solar-data-mcp-market`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-market/README.md) |
20
+ | 48-hour generation forecasts only | [`solar-data-mcp-forecast`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-forecast/README.md) |
21
+ | The skill catalog, routing design & report templates | [`docs/skills.md`](https://github.com/hoodsy/solar-data-mcp/blob/main/docs/skills.md) |
22
+ | Quickstart & agent config snippets | [repo README](https://github.com/hoodsy/solar-data-mcp#quickstart) |
@@ -0,0 +1,34 @@
1
+ [project]
2
+ name = "solar-data-mcp-core"
3
+ version = "0.1.0"
4
+ description = "Shared infrastructure for solar-data-mcp servers: HTTP client, cache, rate limiting, result envelope"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.11"
8
+ authors = [{ name = "Logan Bernard" }]
9
+ keywords = ["solar", "mcp", "nrel", "open-data"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Scientific/Engineering",
16
+ ]
17
+ dependencies = [
18
+ "httpx>=0.27,<1",
19
+ "pydantic>=2.7,<3",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/hoodsy/solar-data-mcp"
24
+
25
+ # No console script: the user-facing command (server + doctor passthrough) is
26
+ # `solar-data-mcp`, owned by the umbrella package. PyPI's `solar-mcp` name
27
+ # belongs to an unrelated project, so nothing here may claim that command.
28
+
29
+ [build-system]
30
+ requires = ["hatchling"]
31
+ build-backend = "hatchling.build"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["src/solar_mcp_core"]
@@ -0,0 +1,3 @@
1
+ """Shared infrastructure for solar-data-mcp servers."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,256 @@
1
+ """DuckDB-backed bulk store: the second cache tier, for datasets too large
2
+ for the HTTP cache (DSIRE snapshots, Tracking the Sun, SolarTRACE).
3
+
4
+ Populated only by explicit `sync_*` tools — never implicitly — and every
5
+ dataset records its vintage so query results can cite how fresh the data is.
6
+ The `duckdb` dependency is declared by the packages that use this module
7
+ (solar-data-mcp-economics, solar-data-mcp-market), keeping solar-data-mcp-core
8
+ lightweight.
9
+ """
10
+
11
+ import asyncio
12
+ import os
13
+ import re
14
+ import tempfile
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import httpx
20
+
21
+ from solar_mcp_core import units
22
+ from solar_mcp_core.config import (
23
+ SourceConfig,
24
+ cache_dir,
25
+ ensure_private_dir,
26
+ harden_file_perms,
27
+ )
28
+ from solar_mcp_core.envelope import SourceRef, ToolResult, utc_now_iso
29
+ from solar_mcp_core.errors import BadInput, SourceUnavailable
30
+ from solar_mcp_core.net import assert_allowed_download_url
31
+
32
+ _META_SCHEMA = """
33
+ CREATE TABLE IF NOT EXISTS _meta (
34
+ dataset TEXT PRIMARY KEY,
35
+ vintage TEXT NOT NULL,
36
+ loaded_at TEXT NOT NULL,
37
+ schema_version INTEGER NOT NULL
38
+ )
39
+ """
40
+
41
+ _IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
42
+
43
+ # Shared-dataset contract: these names are used across packages (economics
44
+ # reads the Tracking the Sun table that market syncs), so they live here.
45
+ TTS_DATASET = "tts"
46
+ TTS_TABLE = "tts_systems"
47
+ SOLARTRACE_DATASET = "solartrace"
48
+ SOLARTRACE_TABLE = "solartrace"
49
+ DSIRE_DATASET = "dsire_programs"
50
+ DSIRE_TABLE = "dsire_programs"
51
+
52
+
53
+ @dataclass
54
+ class DatasetVintage:
55
+ dataset: str
56
+ vintage: str
57
+ loaded_at: str # ISO 8601 UTC
58
+ schema_version: int
59
+
60
+
61
+ class BulkStore:
62
+ def __init__(self, path: Path | str | None = None) -> None:
63
+ """Open (or create) the bulk store. Pass ":memory:" for tests."""
64
+ try:
65
+ import duckdb
66
+ except ImportError as exc: # pragma: no cover — dev/CI envs always have it
67
+ raise RuntimeError(
68
+ "BulkStore needs the 'duckdb' package. It is installed automatically "
69
+ "with solar-data-mcp-economics and solar-data-mcp-market; for "
70
+ "standalone use: pip install duckdb"
71
+ ) from exc
72
+ db = path if path is not None else cache_dir() / "bulk.duckdb"
73
+ if isinstance(db, Path):
74
+ ensure_private_dir(db.parent) # bulk store holds locally synced data
75
+ self._conn = duckdb.connect(str(db))
76
+ if isinstance(db, Path):
77
+ harden_file_perms(db)
78
+ self._conn.execute(_META_SCHEMA)
79
+ # Serializes sync_* stage-validate-swap sections. The umbrella shares one
80
+ # store across all tool families, and every sync uses the same staging
81
+ # table on a single (non-thread-safe) DuckDB connection, so concurrent
82
+ # syncs must not overlap.
83
+ self.write_lock = asyncio.Lock()
84
+
85
+ def stage_csv(self, table: str, csv_path: Path) -> int:
86
+ """Load a CSV into `table` WITHOUT touching any vintage; return row count.
87
+
88
+ DuckDB streams the file — it is never held in memory, which is what
89
+ makes multi-GB sources like Tracking the Sun tractable. Sync loaders
90
+ stage into a scratch table, validate, swap, and only then record the
91
+ vintage — so a bad file can never corrupt a good snapshot or its
92
+ provenance.
93
+ """
94
+ _check_identifier(table)
95
+ self._conn.execute(
96
+ f'CREATE OR REPLACE TABLE "{table}" AS SELECT * FROM read_csv_auto(?)',
97
+ [str(csv_path)],
98
+ )
99
+ row = self._conn.execute(f'SELECT count(*) FROM "{table}"').fetchone()
100
+ return int(row[0]) if row else 0
101
+
102
+ def set_vintage(self, dataset: str, vintage: str, schema_version: int = 1) -> None:
103
+ self._conn.execute(
104
+ "INSERT OR REPLACE INTO _meta VALUES (?, ?, ?, ?)",
105
+ [dataset, vintage, utc_now_iso(), schema_version],
106
+ )
107
+
108
+ def load_csv(
109
+ self,
110
+ dataset: str,
111
+ table: str,
112
+ csv_path: Path,
113
+ vintage: str,
114
+ schema_version: int = 1,
115
+ ) -> int:
116
+ """stage_csv + set_vintage in one step, for loads needing no validation
117
+ or transform between the two."""
118
+ count = self.stage_csv(table, csv_path)
119
+ self.set_vintage(dataset, vintage, schema_version)
120
+ return count
121
+
122
+ def vintage(self, dataset: str) -> DatasetVintage | None:
123
+ row = self._conn.execute(
124
+ "SELECT dataset, vintage, loaded_at, schema_version FROM _meta WHERE dataset = ?",
125
+ [dataset],
126
+ ).fetchone()
127
+ if row is None:
128
+ return None
129
+ return DatasetVintage(*row)
130
+
131
+ def has_table(self, table: str) -> bool:
132
+ _check_identifier(table)
133
+ row = self._conn.execute(
134
+ "SELECT count(*) FROM information_schema.tables WHERE table_name = ?", [table]
135
+ ).fetchone()
136
+ return bool(row and row[0])
137
+
138
+ def query(self, sql: str, params: list[Any] | None = None) -> list[tuple[Any, ...]]:
139
+ """Run a read query. Callers own the SQL; parameters are always bound."""
140
+ result = self._conn.execute(sql, params or [])
141
+ return [tuple(row) for row in result.fetchall()]
142
+
143
+ def execute(self, sql: str, params: list[Any] | None = None) -> None:
144
+ """Run DDL/DML (CREATE TABLE AS, DROP) — for sync loaders' transforms."""
145
+ self._conn.execute(sql, params or [])
146
+
147
+ def close(self) -> None:
148
+ self._conn.close()
149
+
150
+
151
+ def _check_identifier(name: str) -> None:
152
+ if not _IDENTIFIER.match(name):
153
+ raise ValueError(f"invalid table name: {name!r}")
154
+
155
+
156
+ # Ceiling above the largest real dataset (Tracking the Sun is ~1-2 GB); bounds
157
+ # disk use from a hostile or misconfigured endpoint without blocking real files.
158
+ MAX_DOWNLOAD_BYTES = 4 * 1024**3
159
+ _STREAM_TIMEOUT = 60.0 # per-connect/read; a stall aborts the socket
160
+ _TOTAL_TIMEOUT = 3600.0 # wall-clock ceiling; defeats a slow-drip that resets read timeouts
161
+
162
+
163
+ async def fetch_to_tempfile(url: str, *, config: SourceConfig, suffix: str = ".csv") -> Path:
164
+ """Stream a dataset's bulk file to a temp path (sync_* tools); caller deletes it.
165
+
166
+ Restricted to the dataset's official https host (see net.assert_allowed_download_url):
167
+ the agent-supplied `source` is untrusted, so this must not become an SSRF or
168
+ internal-fetch primitive. Redirects are refused, and the transfer is bounded
169
+ in both size and wall-clock time. Plain httpx rather than SolarHttpClient —
170
+ bulk files are not JSON and must never enter the HTTP cache tier.
171
+ """
172
+ assert_allowed_download_url(url, config)
173
+ descriptor, name = tempfile.mkstemp(suffix=suffix)
174
+ os.close(descriptor)
175
+ path = Path(name)
176
+ try:
177
+ await asyncio.wait_for(_stream_to_file(url, config.name, path), _TOTAL_TIMEOUT)
178
+ except TimeoutError as exc:
179
+ path.unlink(missing_ok=True)
180
+ raise SourceUnavailable(config.name, f"download exceeded {_TOTAL_TIMEOUT:.0f}s") from exc
181
+ except httpx.TransportError as exc:
182
+ path.unlink(missing_ok=True)
183
+ raise SourceUnavailable(config.name, f"download failed: {type(exc).__name__}") from exc
184
+ except BaseException:
185
+ # BadInput, HTTP-status/size failures, cancellation — never leak the temp file.
186
+ path.unlink(missing_ok=True)
187
+ raise
188
+ return path
189
+
190
+
191
+ async def _stream_to_file(url: str, source: str, path: Path) -> None:
192
+ async with (
193
+ httpx.AsyncClient(follow_redirects=False, timeout=_STREAM_TIMEOUT) as client,
194
+ client.stream("GET", url) as response,
195
+ ):
196
+ if response.is_redirect:
197
+ raise BadInput(
198
+ field="source",
199
+ value=url,
200
+ allowed=(
201
+ "a direct download URL (redirects are refused; "
202
+ "pass the final URL or a local file)"
203
+ ),
204
+ )
205
+ if response.status_code != 200:
206
+ raise SourceUnavailable(source, f"download failed: HTTP {response.status_code}")
207
+ declared = response.headers.get("Content-Length")
208
+ if declared is not None and declared.isdigit() and int(declared) > MAX_DOWNLOAD_BYTES:
209
+ raise SourceUnavailable(
210
+ source, f"file too large ({declared} bytes; cap {MAX_DOWNLOAD_BYTES})"
211
+ )
212
+ written = 0
213
+ with path.open("wb") as out:
214
+ async for chunk in response.aiter_bytes():
215
+ written += len(chunk)
216
+ if written > MAX_DOWNLOAD_BYTES:
217
+ raise SourceUnavailable(
218
+ source, f"download exceeded size cap of {MAX_DOWNLOAD_BYTES} bytes"
219
+ )
220
+ out.write(chunk)
221
+
222
+
223
+ def default_vintage(vintage: str | None) -> tuple[str, list[str]]:
224
+ """Today's date when the caller gave no vintage, with the assumption line."""
225
+ if vintage is not None:
226
+ return vintage, []
227
+ today = utc_now_iso()[:10]
228
+ return today, [f"vintage not provided; defaulted to today ({today})"]
229
+
230
+
231
+ def sync_result(
232
+ *,
233
+ dataset: str,
234
+ rows_loaded: int,
235
+ vintage: str,
236
+ source_name: str,
237
+ source_url: str,
238
+ license_note: str,
239
+ assumptions: list[str],
240
+ extra_data: dict[str, Any] | None = None,
241
+ ) -> ToolResult:
242
+ """The one envelope shape every sync_* tool returns."""
243
+ data: dict[str, Any] = {"dataset": dataset, "rows_loaded": rows_loaded, "vintage": vintage}
244
+ units_map = {"dataset": units.LABEL, "rows_loaded": units.COUNT, "vintage": units.LABEL}
245
+ for key in extra_data or {}:
246
+ units_map[key] = units.LABEL
247
+ data.update(extra_data or {})
248
+ return ToolResult(
249
+ data=data,
250
+ units=units_map,
251
+ source=SourceRef(
252
+ name=source_name, url=source_url, retrieved_at=utc_now_iso(), license=license_note
253
+ ),
254
+ assumptions=assumptions,
255
+ warnings=[],
256
+ )
@@ -0,0 +1,111 @@
1
+ """SQLite HTTP cache keyed on canonicalized URL+params.
2
+
3
+ Caching is a correctness feature here, not just a UX one: every source has a
4
+ rate limit, and agents retry a lot. Sources whose responses are deterministic
5
+ per request (e.g. TMY-based modeling) get long TTLs via SourceConfig. Stale
6
+ entries are kept and can be served explicitly when a quota is exhausted.
7
+ """
8
+
9
+ import math
10
+ import sqlite3
11
+ import time
12
+ from collections.abc import Callable, Mapping
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+ from urllib.parse import urlencode
16
+
17
+ from solar_mcp_core.config import cache_dir, ensure_private_dir, harden_file_perms
18
+
19
+ _SCHEMA = """
20
+ CREATE TABLE IF NOT EXISTS http_cache (
21
+ key TEXT PRIMARY KEY,
22
+ source TEXT NOT NULL,
23
+ status INTEGER NOT NULL,
24
+ body TEXT NOT NULL,
25
+ retrieved_at REAL NOT NULL,
26
+ expires_at REAL NOT NULL
27
+ )
28
+ """
29
+
30
+
31
+ def canonicalize(base_url: str, path: str, params: Mapping[str, object]) -> str:
32
+ """Stable cache/fixture key for a request: sorted params, api_key excluded.
33
+
34
+ The api_key is excluded so cache entries survive key rotation and recorded
35
+ fixtures never embed a secret in their key.
36
+ """
37
+ filtered = {k: _normalize(v) for k, v in sorted(params.items()) if k != "api_key"}
38
+ return f"{base_url.rstrip('/')}/{path.lstrip('/')}?{urlencode(filtered)}"
39
+
40
+
41
+ def _normalize(value: object) -> str:
42
+ if isinstance(value, bool):
43
+ return "1" if value else "0"
44
+ if isinstance(value, float) and math.isfinite(value) and value == int(value):
45
+ return str(int(value))
46
+ return str(value)
47
+
48
+
49
+ @dataclass
50
+ class CacheEntry:
51
+ key: str
52
+ source: str
53
+ status: int
54
+ body: str
55
+ retrieved_at: float
56
+ expires_at: float
57
+
58
+ def is_fresh(self, now: float) -> bool:
59
+ return now < self.expires_at
60
+
61
+
62
+ class HttpCache:
63
+ def __init__(
64
+ self,
65
+ path: Path | str | None = None,
66
+ clock: Callable[[], float] = time.time,
67
+ ) -> None:
68
+ """SQLite-backed cache. Pass ":memory:" for an ephemeral cache
69
+ (e.g. doctor's liveness ping, which must never be answered from disk)."""
70
+ self._clock = clock
71
+ db_path = path if path is not None else cache_dir() / "http.db"
72
+ if isinstance(db_path, Path):
73
+ ensure_private_dir(db_path.parent) # cache holds api-key-adjacent bodies
74
+ self._conn = sqlite3.connect(db_path)
75
+ if isinstance(db_path, Path):
76
+ harden_file_perms(db_path)
77
+ self._conn.execute(_SCHEMA)
78
+ self._conn.commit()
79
+
80
+ def get(self, key: str, *, allow_stale: bool = False) -> CacheEntry | None:
81
+ row = self._conn.execute(
82
+ "SELECT key, source, status, body, retrieved_at, expires_at"
83
+ " FROM http_cache WHERE key = ?",
84
+ (key,),
85
+ ).fetchone()
86
+ if row is None:
87
+ return None
88
+ entry = CacheEntry(*row)
89
+ if entry.is_fresh(self._clock()) or allow_stale:
90
+ return entry
91
+ return None
92
+
93
+ def put(self, key: str, source: str, status: int, body: str, ttl_seconds: int) -> CacheEntry:
94
+ now = self._clock()
95
+ entry = CacheEntry(
96
+ key=key,
97
+ source=source,
98
+ status=status,
99
+ body=body,
100
+ retrieved_at=now,
101
+ expires_at=now + ttl_seconds,
102
+ )
103
+ self._conn.execute(
104
+ "INSERT OR REPLACE INTO http_cache VALUES (?, ?, ?, ?, ?, ?)",
105
+ (key, source, status, body, entry.retrieved_at, entry.expires_at),
106
+ )
107
+ self._conn.commit()
108
+ return entry
109
+
110
+ def close(self) -> None:
111
+ self._conn.close()
@@ -0,0 +1,112 @@
1
+ """The `doctor` command: checks keys and pings each registered source. Users
2
+ reach it as `solar-data-mcp doctor` — the umbrella console script delegates here.
3
+
4
+ This is the one component that intentionally makes live API calls — it exists
5
+ so users can verify their setup before pointing an agent at a server.
6
+ """
7
+
8
+ import argparse
9
+ import asyncio
10
+ import sys
11
+ from collections.abc import Callable
12
+
13
+ from solar_mcp_core.cache import HttpCache
14
+ from solar_mcp_core.config import SOURCES, SourceConfig, api_key_for, cache_dir
15
+ from solar_mcp_core.errors import QuotaExceeded, SourceUnavailable
16
+ from solar_mcp_core.http import SolarHttpClient, configure_debug_logging
17
+ from solar_mcp_core.ratelimit import TokenBucket
18
+
19
+ # Test seam: tests swap this factory to inject a MockTransport-backed client.
20
+ ClientFactory = Callable[[SourceConfig], SolarHttpClient]
21
+
22
+
23
+ def _default_client_factory(config: SourceConfig) -> SolarHttpClient:
24
+ return SolarHttpClient(
25
+ config,
26
+ # Doctor must observe the live source, never a disk cache entry —
27
+ # an in-memory cache is created fresh and dies with the process.
28
+ cache=HttpCache(path=":memory:"),
29
+ bucket=TokenBucket(capacity=5, refill_per_second=1),
30
+ )
31
+
32
+
33
+ def doctor(client_factory: ClientFactory = _default_client_factory) -> int:
34
+ ok = True
35
+ print(f"cache dir: {cache_dir()} ({'writable' if _cache_writable() else 'NOT WRITABLE'})")
36
+ for config in SOURCES.values():
37
+ ok &= _check_source(config, client_factory)
38
+ return 0 if ok else 1
39
+
40
+
41
+ def _check_source(config: SourceConfig, client_factory: ClientFactory) -> bool:
42
+ label = f"[{config.name}]"
43
+ if config.api_key_env is None:
44
+ print(f"{label} no key required")
45
+ elif api_key_for(config) is None:
46
+ setup = f" Setup: {config.signup_url}" if config.signup_url else ""
47
+ if not config.required:
48
+ print(f"{label} SKIP — optional source; set {config.api_key_env} to enable.{setup}")
49
+ return True
50
+ print(f"{label} FAIL — {config.api_key_env} not set.{setup}")
51
+ return False
52
+ else:
53
+ print(f"{label} key present ({config.api_key_env})")
54
+
55
+ if config.ping_path is None:
56
+ print(f"{label} SKIP — no liveness ping defined for this source")
57
+ return True
58
+ try:
59
+ result = asyncio.run(
60
+ _ping(config, client_factory, config.ping_path, dict(config.ping_params))
61
+ )
62
+ except QuotaExceeded as exc:
63
+ print(f"{label} FAIL — {exc}")
64
+ return False
65
+ except SourceUnavailable as exc:
66
+ print(f"{label} FAIL — {exc.detail} (signup: {config.signup_url})")
67
+ return False
68
+
69
+ suffix = f", {result} requests remaining this hour" if result is not None else ""
70
+ print(f"{label} PASS — live ping OK{suffix}")
71
+ return True
72
+
73
+
74
+ async def _ping(
75
+ config: SourceConfig,
76
+ client_factory: ClientFactory,
77
+ path: str,
78
+ params: dict[str, object],
79
+ ) -> int | None:
80
+ client = client_factory(config)
81
+ try:
82
+ fetched = await client.get_json(path, dict(params))
83
+ return fetched.ratelimit_remaining
84
+ finally:
85
+ await client.aclose()
86
+
87
+
88
+ def _cache_writable() -> bool:
89
+ try:
90
+ cache_dir().mkdir(parents=True, exist_ok=True)
91
+ probe = cache_dir() / ".write-probe"
92
+ probe.touch()
93
+ probe.unlink()
94
+ return True
95
+ except OSError:
96
+ return False
97
+
98
+
99
+ def main(argv: list[str] | None = None) -> int:
100
+ configure_debug_logging()
101
+ parser = argparse.ArgumentParser(prog="solar-data-mcp", description="solar-data-mcp utilities")
102
+ sub = parser.add_subparsers(dest="command", required=True)
103
+ sub.add_parser("doctor", help="check API keys and ping each data source")
104
+ args = parser.parse_args(argv)
105
+
106
+ if args.command == "doctor":
107
+ return doctor()
108
+ return 2 # pragma: no cover — argparse enforces the choices
109
+
110
+
111
+ if __name__ == "__main__":
112
+ sys.exit(main())