solar-data-mcp-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- solar_data_mcp_core-0.1.0/.gitignore +20 -0
- solar_data_mcp_core-0.1.0/LICENSE +21 -0
- solar_data_mcp_core-0.1.0/PKG-INFO +41 -0
- solar_data_mcp_core-0.1.0/README.md +22 -0
- solar_data_mcp_core-0.1.0/pyproject.toml +34 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/__init__.py +3 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/bulk.py +256 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/cache.py +111 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/cli.py +112 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/config.py +175 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/envelope.py +64 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/errors.py +42 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/http.py +253 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/localfile.py +40 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/net.py +67 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/py.typed +0 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/ratelimit.py +64 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/redact.py +27 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/units.py +28 -0
- solar_data_mcp_core-0.1.0/src/solar_mcp_core/validation.py +119 -0
- solar_data_mcp_core-0.1.0/tests/test_bulk.py +51 -0
- solar_data_mcp_core-0.1.0/tests/test_cache.py +90 -0
- solar_data_mcp_core-0.1.0/tests/test_cli.py +161 -0
- solar_data_mcp_core-0.1.0/tests/test_config.py +44 -0
- solar_data_mcp_core-0.1.0/tests/test_core_download_guard.py +45 -0
- solar_data_mcp_core-0.1.0/tests/test_core_localfile.py +42 -0
- solar_data_mcp_core-0.1.0/tests/test_core_redact_perms.py +71 -0
- solar_data_mcp_core-0.1.0/tests/test_envelope.py +32 -0
- solar_data_mcp_core-0.1.0/tests/test_errors.py +33 -0
- solar_data_mcp_core-0.1.0/tests/test_fixture_hygiene.py +38 -0
- solar_data_mcp_core-0.1.0/tests/test_http.py +235 -0
- solar_data_mcp_core-0.1.0/tests/test_ratelimit.py +49 -0
- solar_data_mcp_core-0.1.0/tests/test_validation.py +23 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Logan Bernard
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: solar-data-mcp-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared infrastructure for solar-data-mcp servers: HTTP client, cache, rate limiting, result envelope
|
|
5
|
+
Project-URL: Homepage, https://github.com/hoodsy/solar-data-mcp
|
|
6
|
+
Author: Logan Bernard
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: mcp,nrel,open-data,solar
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Requires-Dist: httpx<1,>=0.27
|
|
17
|
+
Requires-Dist: pydantic<3,>=2.7
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# solar-data-mcp-core
|
|
21
|
+
|
|
22
|
+
Shared infrastructure for [solar-data-mcp](https://github.com/hoodsy/solar-data-mcp) —
|
|
23
|
+
US open solar data, agent-accessible over MCP. One install gives an agent production
|
|
24
|
+
modeling, solar economics, market intelligence, and generation forecasts, with every
|
|
25
|
+
number carrying `data + units + source + assumptions + warnings`.
|
|
26
|
+
|
|
27
|
+
This package is the plumbing the servers share: the HTTP client (retry, token-bucket
|
|
28
|
+
rate limiting, SQLite cache), the `ToolResult` envelope, the DuckDB bulk store, the
|
|
29
|
+
error taxonomy, and `solar-data-mcp doctor`. You usually don't install it directly.
|
|
30
|
+
|
|
31
|
+
Where to go instead:
|
|
32
|
+
|
|
33
|
+
| You want | Go to |
|
|
34
|
+
|---|---|
|
|
35
|
+
| Everything (18 tools + 11 skills + 4 report prompts, one install) | [`solar-data-mcp`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-data-mcp/README.md) |
|
|
36
|
+
| Production & sizing only (PVWatts, NSRDB) | [`solar-data-mcp-nrel`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/nrel-solar/README.md) |
|
|
37
|
+
| Tariffs, incentives & ROI only | [`solar-data-mcp-economics`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-economics/README.md) |
|
|
38
|
+
| Installed prices, permitting & utility-scale only | [`solar-data-mcp-market`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-market/README.md) |
|
|
39
|
+
| 48-hour generation forecasts only | [`solar-data-mcp-forecast`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-forecast/README.md) |
|
|
40
|
+
| The skill catalog, routing design & report templates | [`docs/skills.md`](https://github.com/hoodsy/solar-data-mcp/blob/main/docs/skills.md) |
|
|
41
|
+
| Quickstart & agent config snippets | [repo README](https://github.com/hoodsy/solar-data-mcp#quickstart) |
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# solar-data-mcp-core
|
|
2
|
+
|
|
3
|
+
Shared infrastructure for [solar-data-mcp](https://github.com/hoodsy/solar-data-mcp) —
|
|
4
|
+
US open solar data, agent-accessible over MCP. One install gives an agent production
|
|
5
|
+
modeling, solar economics, market intelligence, and generation forecasts, with every
|
|
6
|
+
number carrying `data + units + source + assumptions + warnings`.
|
|
7
|
+
|
|
8
|
+
This package is the plumbing the servers share: the HTTP client (retry, token-bucket
|
|
9
|
+
rate limiting, SQLite cache), the `ToolResult` envelope, the DuckDB bulk store, the
|
|
10
|
+
error taxonomy, and `solar-data-mcp doctor`. You usually don't install it directly.
|
|
11
|
+
|
|
12
|
+
Where to go instead:
|
|
13
|
+
|
|
14
|
+
| You want | Go to |
|
|
15
|
+
|---|---|
|
|
16
|
+
| Everything (18 tools + 11 skills + 4 report prompts, one install) | [`solar-data-mcp`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-data-mcp/README.md) |
|
|
17
|
+
| Production & sizing only (PVWatts, NSRDB) | [`solar-data-mcp-nrel`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/nrel-solar/README.md) |
|
|
18
|
+
| Tariffs, incentives & ROI only | [`solar-data-mcp-economics`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-economics/README.md) |
|
|
19
|
+
| Installed prices, permitting & utility-scale only | [`solar-data-mcp-market`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-market/README.md) |
|
|
20
|
+
| 48-hour generation forecasts only | [`solar-data-mcp-forecast`](https://github.com/hoodsy/solar-data-mcp/blob/main/packages/solar-forecast/README.md) |
|
|
21
|
+
| The skill catalog, routing design & report templates | [`docs/skills.md`](https://github.com/hoodsy/solar-data-mcp/blob/main/docs/skills.md) |
|
|
22
|
+
| Quickstart & agent config snippets | [repo README](https://github.com/hoodsy/solar-data-mcp#quickstart) |
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "solar-data-mcp-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Shared infrastructure for solar-data-mcp servers: HTTP client, cache, rate limiting, result envelope"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
authors = [{ name = "Logan Bernard" }]
|
|
9
|
+
keywords = ["solar", "mcp", "nrel", "open-data"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"Programming Language :: Python :: 3.11",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Topic :: Scientific/Engineering",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"httpx>=0.27,<1",
|
|
19
|
+
"pydantic>=2.7,<3",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://github.com/hoodsy/solar-data-mcp"
|
|
24
|
+
|
|
25
|
+
# No console script: the user-facing command (server + doctor passthrough) is
|
|
26
|
+
# `solar-data-mcp`, owned by the umbrella package. PyPI's `solar-mcp` name
|
|
27
|
+
# belongs to an unrelated project, so nothing here may claim that command.
|
|
28
|
+
|
|
29
|
+
[build-system]
|
|
30
|
+
requires = ["hatchling"]
|
|
31
|
+
build-backend = "hatchling.build"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/solar_mcp_core"]
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""DuckDB-backed bulk store: the second cache tier, for datasets too large
|
|
2
|
+
for the HTTP cache (DSIRE snapshots, Tracking the Sun, SolarTRACE).
|
|
3
|
+
|
|
4
|
+
Populated only by explicit `sync_*` tools — never implicitly — and every
|
|
5
|
+
dataset records its vintage so query results can cite how fresh the data is.
|
|
6
|
+
The `duckdb` dependency is declared by the packages that use this module
|
|
7
|
+
(solar-data-mcp-economics, solar-data-mcp-market), keeping solar-data-mcp-core
|
|
8
|
+
lightweight.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import tempfile
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from solar_mcp_core import units
|
|
22
|
+
from solar_mcp_core.config import (
|
|
23
|
+
SourceConfig,
|
|
24
|
+
cache_dir,
|
|
25
|
+
ensure_private_dir,
|
|
26
|
+
harden_file_perms,
|
|
27
|
+
)
|
|
28
|
+
from solar_mcp_core.envelope import SourceRef, ToolResult, utc_now_iso
|
|
29
|
+
from solar_mcp_core.errors import BadInput, SourceUnavailable
|
|
30
|
+
from solar_mcp_core.net import assert_allowed_download_url
|
|
31
|
+
|
|
32
|
+
_META_SCHEMA = """
|
|
33
|
+
CREATE TABLE IF NOT EXISTS _meta (
|
|
34
|
+
dataset TEXT PRIMARY KEY,
|
|
35
|
+
vintage TEXT NOT NULL,
|
|
36
|
+
loaded_at TEXT NOT NULL,
|
|
37
|
+
schema_version INTEGER NOT NULL
|
|
38
|
+
)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
_IDENTIFIER = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
42
|
+
|
|
43
|
+
# Shared-dataset contract: these names are used across packages (economics
|
|
44
|
+
# reads the Tracking the Sun table that market syncs), so they live here.
|
|
45
|
+
TTS_DATASET = "tts"
|
|
46
|
+
TTS_TABLE = "tts_systems"
|
|
47
|
+
SOLARTRACE_DATASET = "solartrace"
|
|
48
|
+
SOLARTRACE_TABLE = "solartrace"
|
|
49
|
+
DSIRE_DATASET = "dsire_programs"
|
|
50
|
+
DSIRE_TABLE = "dsire_programs"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class DatasetVintage:
|
|
55
|
+
dataset: str
|
|
56
|
+
vintage: str
|
|
57
|
+
loaded_at: str # ISO 8601 UTC
|
|
58
|
+
schema_version: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class BulkStore:
|
|
62
|
+
def __init__(self, path: Path | str | None = None) -> None:
|
|
63
|
+
"""Open (or create) the bulk store. Pass ":memory:" for tests."""
|
|
64
|
+
try:
|
|
65
|
+
import duckdb
|
|
66
|
+
except ImportError as exc: # pragma: no cover — dev/CI envs always have it
|
|
67
|
+
raise RuntimeError(
|
|
68
|
+
"BulkStore needs the 'duckdb' package. It is installed automatically "
|
|
69
|
+
"with solar-data-mcp-economics and solar-data-mcp-market; for "
|
|
70
|
+
"standalone use: pip install duckdb"
|
|
71
|
+
) from exc
|
|
72
|
+
db = path if path is not None else cache_dir() / "bulk.duckdb"
|
|
73
|
+
if isinstance(db, Path):
|
|
74
|
+
ensure_private_dir(db.parent) # bulk store holds locally synced data
|
|
75
|
+
self._conn = duckdb.connect(str(db))
|
|
76
|
+
if isinstance(db, Path):
|
|
77
|
+
harden_file_perms(db)
|
|
78
|
+
self._conn.execute(_META_SCHEMA)
|
|
79
|
+
# Serializes sync_* stage-validate-swap sections. The umbrella shares one
|
|
80
|
+
# store across all tool families, and every sync uses the same staging
|
|
81
|
+
# table on a single (non-thread-safe) DuckDB connection, so concurrent
|
|
82
|
+
# syncs must not overlap.
|
|
83
|
+
self.write_lock = asyncio.Lock()
|
|
84
|
+
|
|
85
|
+
def stage_csv(self, table: str, csv_path: Path) -> int:
|
|
86
|
+
"""Load a CSV into `table` WITHOUT touching any vintage; return row count.
|
|
87
|
+
|
|
88
|
+
DuckDB streams the file — it is never held in memory, which is what
|
|
89
|
+
makes multi-GB sources like Tracking the Sun tractable. Sync loaders
|
|
90
|
+
stage into a scratch table, validate, swap, and only then record the
|
|
91
|
+
vintage — so a bad file can never corrupt a good snapshot or its
|
|
92
|
+
provenance.
|
|
93
|
+
"""
|
|
94
|
+
_check_identifier(table)
|
|
95
|
+
self._conn.execute(
|
|
96
|
+
f'CREATE OR REPLACE TABLE "{table}" AS SELECT * FROM read_csv_auto(?)',
|
|
97
|
+
[str(csv_path)],
|
|
98
|
+
)
|
|
99
|
+
row = self._conn.execute(f'SELECT count(*) FROM "{table}"').fetchone()
|
|
100
|
+
return int(row[0]) if row else 0
|
|
101
|
+
|
|
102
|
+
def set_vintage(self, dataset: str, vintage: str, schema_version: int = 1) -> None:
|
|
103
|
+
self._conn.execute(
|
|
104
|
+
"INSERT OR REPLACE INTO _meta VALUES (?, ?, ?, ?)",
|
|
105
|
+
[dataset, vintage, utc_now_iso(), schema_version],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def load_csv(
|
|
109
|
+
self,
|
|
110
|
+
dataset: str,
|
|
111
|
+
table: str,
|
|
112
|
+
csv_path: Path,
|
|
113
|
+
vintage: str,
|
|
114
|
+
schema_version: int = 1,
|
|
115
|
+
) -> int:
|
|
116
|
+
"""stage_csv + set_vintage in one step, for loads needing no validation
|
|
117
|
+
or transform between the two."""
|
|
118
|
+
count = self.stage_csv(table, csv_path)
|
|
119
|
+
self.set_vintage(dataset, vintage, schema_version)
|
|
120
|
+
return count
|
|
121
|
+
|
|
122
|
+
def vintage(self, dataset: str) -> DatasetVintage | None:
|
|
123
|
+
row = self._conn.execute(
|
|
124
|
+
"SELECT dataset, vintage, loaded_at, schema_version FROM _meta WHERE dataset = ?",
|
|
125
|
+
[dataset],
|
|
126
|
+
).fetchone()
|
|
127
|
+
if row is None:
|
|
128
|
+
return None
|
|
129
|
+
return DatasetVintage(*row)
|
|
130
|
+
|
|
131
|
+
def has_table(self, table: str) -> bool:
|
|
132
|
+
_check_identifier(table)
|
|
133
|
+
row = self._conn.execute(
|
|
134
|
+
"SELECT count(*) FROM information_schema.tables WHERE table_name = ?", [table]
|
|
135
|
+
).fetchone()
|
|
136
|
+
return bool(row and row[0])
|
|
137
|
+
|
|
138
|
+
def query(self, sql: str, params: list[Any] | None = None) -> list[tuple[Any, ...]]:
|
|
139
|
+
"""Run a read query. Callers own the SQL; parameters are always bound."""
|
|
140
|
+
result = self._conn.execute(sql, params or [])
|
|
141
|
+
return [tuple(row) for row in result.fetchall()]
|
|
142
|
+
|
|
143
|
+
def execute(self, sql: str, params: list[Any] | None = None) -> None:
|
|
144
|
+
"""Run DDL/DML (CREATE TABLE AS, DROP) — for sync loaders' transforms."""
|
|
145
|
+
self._conn.execute(sql, params or [])
|
|
146
|
+
|
|
147
|
+
def close(self) -> None:
|
|
148
|
+
self._conn.close()
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _check_identifier(name: str) -> None:
|
|
152
|
+
if not _IDENTIFIER.match(name):
|
|
153
|
+
raise ValueError(f"invalid table name: {name!r}")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# Ceiling above the largest real dataset (Tracking the Sun is ~1-2 GB); bounds
|
|
157
|
+
# disk use from a hostile or misconfigured endpoint without blocking real files.
|
|
158
|
+
MAX_DOWNLOAD_BYTES = 4 * 1024**3
|
|
159
|
+
_STREAM_TIMEOUT = 60.0 # per-connect/read; a stall aborts the socket
|
|
160
|
+
_TOTAL_TIMEOUT = 3600.0 # wall-clock ceiling; defeats a slow-drip that resets read timeouts
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
async def fetch_to_tempfile(url: str, *, config: SourceConfig, suffix: str = ".csv") -> Path:
|
|
164
|
+
"""Stream a dataset's bulk file to a temp path (sync_* tools); caller deletes it.
|
|
165
|
+
|
|
166
|
+
Restricted to the dataset's official https host (see net.assert_allowed_download_url):
|
|
167
|
+
the agent-supplied `source` is untrusted, so this must not become an SSRF or
|
|
168
|
+
internal-fetch primitive. Redirects are refused, and the transfer is bounded
|
|
169
|
+
in both size and wall-clock time. Plain httpx rather than SolarHttpClient —
|
|
170
|
+
bulk files are not JSON and must never enter the HTTP cache tier.
|
|
171
|
+
"""
|
|
172
|
+
assert_allowed_download_url(url, config)
|
|
173
|
+
descriptor, name = tempfile.mkstemp(suffix=suffix)
|
|
174
|
+
os.close(descriptor)
|
|
175
|
+
path = Path(name)
|
|
176
|
+
try:
|
|
177
|
+
await asyncio.wait_for(_stream_to_file(url, config.name, path), _TOTAL_TIMEOUT)
|
|
178
|
+
except TimeoutError as exc:
|
|
179
|
+
path.unlink(missing_ok=True)
|
|
180
|
+
raise SourceUnavailable(config.name, f"download exceeded {_TOTAL_TIMEOUT:.0f}s") from exc
|
|
181
|
+
except httpx.TransportError as exc:
|
|
182
|
+
path.unlink(missing_ok=True)
|
|
183
|
+
raise SourceUnavailable(config.name, f"download failed: {type(exc).__name__}") from exc
|
|
184
|
+
except BaseException:
|
|
185
|
+
# BadInput, HTTP-status/size failures, cancellation — never leak the temp file.
|
|
186
|
+
path.unlink(missing_ok=True)
|
|
187
|
+
raise
|
|
188
|
+
return path
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
async def _stream_to_file(url: str, source: str, path: Path) -> None:
|
|
192
|
+
async with (
|
|
193
|
+
httpx.AsyncClient(follow_redirects=False, timeout=_STREAM_TIMEOUT) as client,
|
|
194
|
+
client.stream("GET", url) as response,
|
|
195
|
+
):
|
|
196
|
+
if response.is_redirect:
|
|
197
|
+
raise BadInput(
|
|
198
|
+
field="source",
|
|
199
|
+
value=url,
|
|
200
|
+
allowed=(
|
|
201
|
+
"a direct download URL (redirects are refused; "
|
|
202
|
+
"pass the final URL or a local file)"
|
|
203
|
+
),
|
|
204
|
+
)
|
|
205
|
+
if response.status_code != 200:
|
|
206
|
+
raise SourceUnavailable(source, f"download failed: HTTP {response.status_code}")
|
|
207
|
+
declared = response.headers.get("Content-Length")
|
|
208
|
+
if declared is not None and declared.isdigit() and int(declared) > MAX_DOWNLOAD_BYTES:
|
|
209
|
+
raise SourceUnavailable(
|
|
210
|
+
source, f"file too large ({declared} bytes; cap {MAX_DOWNLOAD_BYTES})"
|
|
211
|
+
)
|
|
212
|
+
written = 0
|
|
213
|
+
with path.open("wb") as out:
|
|
214
|
+
async for chunk in response.aiter_bytes():
|
|
215
|
+
written += len(chunk)
|
|
216
|
+
if written > MAX_DOWNLOAD_BYTES:
|
|
217
|
+
raise SourceUnavailable(
|
|
218
|
+
source, f"download exceeded size cap of {MAX_DOWNLOAD_BYTES} bytes"
|
|
219
|
+
)
|
|
220
|
+
out.write(chunk)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def default_vintage(vintage: str | None) -> tuple[str, list[str]]:
|
|
224
|
+
"""Today's date when the caller gave no vintage, with the assumption line."""
|
|
225
|
+
if vintage is not None:
|
|
226
|
+
return vintage, []
|
|
227
|
+
today = utc_now_iso()[:10]
|
|
228
|
+
return today, [f"vintage not provided; defaulted to today ({today})"]
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def sync_result(
|
|
232
|
+
*,
|
|
233
|
+
dataset: str,
|
|
234
|
+
rows_loaded: int,
|
|
235
|
+
vintage: str,
|
|
236
|
+
source_name: str,
|
|
237
|
+
source_url: str,
|
|
238
|
+
license_note: str,
|
|
239
|
+
assumptions: list[str],
|
|
240
|
+
extra_data: dict[str, Any] | None = None,
|
|
241
|
+
) -> ToolResult:
|
|
242
|
+
"""The one envelope shape every sync_* tool returns."""
|
|
243
|
+
data: dict[str, Any] = {"dataset": dataset, "rows_loaded": rows_loaded, "vintage": vintage}
|
|
244
|
+
units_map = {"dataset": units.LABEL, "rows_loaded": units.COUNT, "vintage": units.LABEL}
|
|
245
|
+
for key in extra_data or {}:
|
|
246
|
+
units_map[key] = units.LABEL
|
|
247
|
+
data.update(extra_data or {})
|
|
248
|
+
return ToolResult(
|
|
249
|
+
data=data,
|
|
250
|
+
units=units_map,
|
|
251
|
+
source=SourceRef(
|
|
252
|
+
name=source_name, url=source_url, retrieved_at=utc_now_iso(), license=license_note
|
|
253
|
+
),
|
|
254
|
+
assumptions=assumptions,
|
|
255
|
+
warnings=[],
|
|
256
|
+
)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""SQLite HTTP cache keyed on canonicalized URL+params.
|
|
2
|
+
|
|
3
|
+
Caching is a correctness feature here, not just a UX one: every source has a
|
|
4
|
+
rate limit, and agents retry a lot. Sources whose responses are deterministic
|
|
5
|
+
per request (e.g. TMY-based modeling) get long TTLs via SourceConfig. Stale
|
|
6
|
+
entries are kept and can be served explicitly when a quota is exhausted.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
import sqlite3
|
|
11
|
+
import time
|
|
12
|
+
from collections.abc import Callable, Mapping
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from urllib.parse import urlencode
|
|
16
|
+
|
|
17
|
+
from solar_mcp_core.config import cache_dir, ensure_private_dir, harden_file_perms
|
|
18
|
+
|
|
19
|
+
_SCHEMA = """
|
|
20
|
+
CREATE TABLE IF NOT EXISTS http_cache (
|
|
21
|
+
key TEXT PRIMARY KEY,
|
|
22
|
+
source TEXT NOT NULL,
|
|
23
|
+
status INTEGER NOT NULL,
|
|
24
|
+
body TEXT NOT NULL,
|
|
25
|
+
retrieved_at REAL NOT NULL,
|
|
26
|
+
expires_at REAL NOT NULL
|
|
27
|
+
)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def canonicalize(base_url: str, path: str, params: Mapping[str, object]) -> str:
|
|
32
|
+
"""Stable cache/fixture key for a request: sorted params, api_key excluded.
|
|
33
|
+
|
|
34
|
+
The api_key is excluded so cache entries survive key rotation and recorded
|
|
35
|
+
fixtures never embed a secret in their key.
|
|
36
|
+
"""
|
|
37
|
+
filtered = {k: _normalize(v) for k, v in sorted(params.items()) if k != "api_key"}
|
|
38
|
+
return f"{base_url.rstrip('/')}/{path.lstrip('/')}?{urlencode(filtered)}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _normalize(value: object) -> str:
|
|
42
|
+
if isinstance(value, bool):
|
|
43
|
+
return "1" if value else "0"
|
|
44
|
+
if isinstance(value, float) and math.isfinite(value) and value == int(value):
|
|
45
|
+
return str(int(value))
|
|
46
|
+
return str(value)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class CacheEntry:
|
|
51
|
+
key: str
|
|
52
|
+
source: str
|
|
53
|
+
status: int
|
|
54
|
+
body: str
|
|
55
|
+
retrieved_at: float
|
|
56
|
+
expires_at: float
|
|
57
|
+
|
|
58
|
+
def is_fresh(self, now: float) -> bool:
|
|
59
|
+
return now < self.expires_at
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class HttpCache:
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
path: Path | str | None = None,
|
|
66
|
+
clock: Callable[[], float] = time.time,
|
|
67
|
+
) -> None:
|
|
68
|
+
"""SQLite-backed cache. Pass ":memory:" for an ephemeral cache
|
|
69
|
+
(e.g. doctor's liveness ping, which must never be answered from disk)."""
|
|
70
|
+
self._clock = clock
|
|
71
|
+
db_path = path if path is not None else cache_dir() / "http.db"
|
|
72
|
+
if isinstance(db_path, Path):
|
|
73
|
+
ensure_private_dir(db_path.parent) # cache holds api-key-adjacent bodies
|
|
74
|
+
self._conn = sqlite3.connect(db_path)
|
|
75
|
+
if isinstance(db_path, Path):
|
|
76
|
+
harden_file_perms(db_path)
|
|
77
|
+
self._conn.execute(_SCHEMA)
|
|
78
|
+
self._conn.commit()
|
|
79
|
+
|
|
80
|
+
def get(self, key: str, *, allow_stale: bool = False) -> CacheEntry | None:
|
|
81
|
+
row = self._conn.execute(
|
|
82
|
+
"SELECT key, source, status, body, retrieved_at, expires_at"
|
|
83
|
+
" FROM http_cache WHERE key = ?",
|
|
84
|
+
(key,),
|
|
85
|
+
).fetchone()
|
|
86
|
+
if row is None:
|
|
87
|
+
return None
|
|
88
|
+
entry = CacheEntry(*row)
|
|
89
|
+
if entry.is_fresh(self._clock()) or allow_stale:
|
|
90
|
+
return entry
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
def put(self, key: str, source: str, status: int, body: str, ttl_seconds: int) -> CacheEntry:
|
|
94
|
+
now = self._clock()
|
|
95
|
+
entry = CacheEntry(
|
|
96
|
+
key=key,
|
|
97
|
+
source=source,
|
|
98
|
+
status=status,
|
|
99
|
+
body=body,
|
|
100
|
+
retrieved_at=now,
|
|
101
|
+
expires_at=now + ttl_seconds,
|
|
102
|
+
)
|
|
103
|
+
self._conn.execute(
|
|
104
|
+
"INSERT OR REPLACE INTO http_cache VALUES (?, ?, ?, ?, ?, ?)",
|
|
105
|
+
(key, source, status, body, entry.retrieved_at, entry.expires_at),
|
|
106
|
+
)
|
|
107
|
+
self._conn.commit()
|
|
108
|
+
return entry
|
|
109
|
+
|
|
110
|
+
def close(self) -> None:
|
|
111
|
+
self._conn.close()
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""The `doctor` command: checks keys and pings each registered source. Users
|
|
2
|
+
reach it as `solar-data-mcp doctor` — the umbrella console script delegates here.
|
|
3
|
+
|
|
4
|
+
This is the one component that intentionally makes live API calls — it exists
|
|
5
|
+
so users can verify their setup before pointing an agent at a server.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import asyncio
|
|
10
|
+
import sys
|
|
11
|
+
from collections.abc import Callable
|
|
12
|
+
|
|
13
|
+
from solar_mcp_core.cache import HttpCache
|
|
14
|
+
from solar_mcp_core.config import SOURCES, SourceConfig, api_key_for, cache_dir
|
|
15
|
+
from solar_mcp_core.errors import QuotaExceeded, SourceUnavailable
|
|
16
|
+
from solar_mcp_core.http import SolarHttpClient, configure_debug_logging
|
|
17
|
+
from solar_mcp_core.ratelimit import TokenBucket
|
|
18
|
+
|
|
19
|
+
# Test seam: tests swap this factory to inject a MockTransport-backed client.
|
|
20
|
+
ClientFactory = Callable[[SourceConfig], SolarHttpClient]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _default_client_factory(config: SourceConfig) -> SolarHttpClient:
|
|
24
|
+
return SolarHttpClient(
|
|
25
|
+
config,
|
|
26
|
+
# Doctor must observe the live source, never a disk cache entry —
|
|
27
|
+
# an in-memory cache is created fresh and dies with the process.
|
|
28
|
+
cache=HttpCache(path=":memory:"),
|
|
29
|
+
bucket=TokenBucket(capacity=5, refill_per_second=1),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def doctor(client_factory: ClientFactory = _default_client_factory) -> int:
|
|
34
|
+
ok = True
|
|
35
|
+
print(f"cache dir: {cache_dir()} ({'writable' if _cache_writable() else 'NOT WRITABLE'})")
|
|
36
|
+
for config in SOURCES.values():
|
|
37
|
+
ok &= _check_source(config, client_factory)
|
|
38
|
+
return 0 if ok else 1
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _check_source(config: SourceConfig, client_factory: ClientFactory) -> bool:
|
|
42
|
+
label = f"[{config.name}]"
|
|
43
|
+
if config.api_key_env is None:
|
|
44
|
+
print(f"{label} no key required")
|
|
45
|
+
elif api_key_for(config) is None:
|
|
46
|
+
setup = f" Setup: {config.signup_url}" if config.signup_url else ""
|
|
47
|
+
if not config.required:
|
|
48
|
+
print(f"{label} SKIP — optional source; set {config.api_key_env} to enable.{setup}")
|
|
49
|
+
return True
|
|
50
|
+
print(f"{label} FAIL — {config.api_key_env} not set.{setup}")
|
|
51
|
+
return False
|
|
52
|
+
else:
|
|
53
|
+
print(f"{label} key present ({config.api_key_env})")
|
|
54
|
+
|
|
55
|
+
if config.ping_path is None:
|
|
56
|
+
print(f"{label} SKIP — no liveness ping defined for this source")
|
|
57
|
+
return True
|
|
58
|
+
try:
|
|
59
|
+
result = asyncio.run(
|
|
60
|
+
_ping(config, client_factory, config.ping_path, dict(config.ping_params))
|
|
61
|
+
)
|
|
62
|
+
except QuotaExceeded as exc:
|
|
63
|
+
print(f"{label} FAIL — {exc}")
|
|
64
|
+
return False
|
|
65
|
+
except SourceUnavailable as exc:
|
|
66
|
+
print(f"{label} FAIL — {exc.detail} (signup: {config.signup_url})")
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
suffix = f", {result} requests remaining this hour" if result is not None else ""
|
|
70
|
+
print(f"{label} PASS — live ping OK{suffix}")
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
async def _ping(
|
|
75
|
+
config: SourceConfig,
|
|
76
|
+
client_factory: ClientFactory,
|
|
77
|
+
path: str,
|
|
78
|
+
params: dict[str, object],
|
|
79
|
+
) -> int | None:
|
|
80
|
+
client = client_factory(config)
|
|
81
|
+
try:
|
|
82
|
+
fetched = await client.get_json(path, dict(params))
|
|
83
|
+
return fetched.ratelimit_remaining
|
|
84
|
+
finally:
|
|
85
|
+
await client.aclose()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _cache_writable() -> bool:
|
|
89
|
+
try:
|
|
90
|
+
cache_dir().mkdir(parents=True, exist_ok=True)
|
|
91
|
+
probe = cache_dir() / ".write-probe"
|
|
92
|
+
probe.touch()
|
|
93
|
+
probe.unlink()
|
|
94
|
+
return True
|
|
95
|
+
except OSError:
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def main(argv: list[str] | None = None) -> int:
|
|
100
|
+
configure_debug_logging()
|
|
101
|
+
parser = argparse.ArgumentParser(prog="solar-data-mcp", description="solar-data-mcp utilities")
|
|
102
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
103
|
+
sub.add_parser("doctor", help="check API keys and ping each data source")
|
|
104
|
+
args = parser.parse_args(argv)
|
|
105
|
+
|
|
106
|
+
if args.command == "doctor":
|
|
107
|
+
return doctor()
|
|
108
|
+
return 2 # pragma: no cover — argparse enforces the choices
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
if __name__ == "__main__":
|
|
112
|
+
sys.exit(main())
|