statcan-mcp-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statcan_mcp_server-0.1.0/.github/workflows/publish-mcp-registry.yml +54 -0
- statcan_mcp_server-0.1.0/CONTRIBUTING.md +432 -0
- statcan_mcp_server-0.1.0/IMPLEMENTATIONS.md +144 -0
- statcan_mcp_server-0.1.0/LICENSE +21 -0
- statcan_mcp_server-0.1.0/PKG-INFO +232 -0
- statcan_mcp_server-0.1.0/README.md +211 -0
- statcan_mcp_server-0.1.0/assets/StatCan-Header.png +0 -0
- statcan_mcp_server-0.1.0/pyproject.toml +43 -0
- statcan_mcp_server-0.1.0/server.json +25 -0
- statcan_mcp_server-0.1.0/src/__init__.py +8 -0
- statcan_mcp_server-0.1.0/src/api/__init__.py +6 -0
- statcan_mcp_server-0.1.0/src/api/client.py +36 -0
- statcan_mcp_server-0.1.0/src/api/cube_tools.py +398 -0
- statcan_mcp_server-0.1.0/src/api/metadata_tools.py +30 -0
- statcan_mcp_server-0.1.0/src/api/vector_tools.py +285 -0
- statcan_mcp_server-0.1.0/src/config.py +27 -0
- statcan_mcp_server-0.1.0/src/db/__init__.py +6 -0
- statcan_mcp_server-0.1.0/src/db/connection.py +9 -0
- statcan_mcp_server-0.1.0/src/db/queries.py +250 -0
- statcan_mcp_server-0.1.0/src/db/schema.py +76 -0
- statcan_mcp_server-0.1.0/src/models/__init__.py +6 -0
- statcan_mcp_server-0.1.0/src/models/api_models.py +38 -0
- statcan_mcp_server-0.1.0/src/models/db_models.py +12 -0
- statcan_mcp_server-0.1.0/src/server.py +94 -0
- statcan_mcp_server-0.1.0/src/util/__init__.py +6 -0
- statcan_mcp_server-0.1.0/src/util/cache.py +67 -0
- statcan_mcp_server-0.1.0/src/util/coordinate.py +24 -0
- statcan_mcp_server-0.1.0/src/util/logger.py +33 -0
- statcan_mcp_server-0.1.0/src/util/registry.py +116 -0
- statcan_mcp_server-0.1.0/src/util/sql_helpers.py +23 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
name: Publish to PyPI & MCP Registry
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish-pypi:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write
|
|
12
|
+
environment: pypi
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout code
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Install build tools
|
|
24
|
+
run: pip install build
|
|
25
|
+
|
|
26
|
+
- name: Build package
|
|
27
|
+
run: python -m build
|
|
28
|
+
|
|
29
|
+
- name: Publish to PyPI
|
|
30
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
31
|
+
|
|
32
|
+
publish-registry:
|
|
33
|
+
needs: publish-pypi
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
permissions:
|
|
36
|
+
id-token: write
|
|
37
|
+
contents: read
|
|
38
|
+
|
|
39
|
+
steps:
|
|
40
|
+
- name: Checkout code
|
|
41
|
+
uses: actions/checkout@v4
|
|
42
|
+
|
|
43
|
+
- name: Install mcp-publisher
|
|
44
|
+
run: |
|
|
45
|
+
curl -L "https://github.com/modelcontextprotocol/registry/releases/latest/download/mcp-publisher_$(uname -s | tr '[:upper:]' '[:lower:]')_$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/').tar.gz" | tar xz mcp-publisher
|
|
46
|
+
|
|
47
|
+
- name: Authenticate to MCP Registry
|
|
48
|
+
run: ./mcp-publisher login github-oidc
|
|
49
|
+
|
|
50
|
+
- name: Validate server.json
|
|
51
|
+
run: ./mcp-publisher validate
|
|
52
|
+
|
|
53
|
+
- name: Publish to MCP Registry
|
|
54
|
+
run: ./mcp-publisher publish
|
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
# Contributing to Statistics Canada MCP Server
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing to the Statistics Canada MCP Server! This project provides tools for interacting with Statistics Canada data APIs through the Model Context Protocol (MCP), enabling LLMs and other clients to access Canadian statistical data.
|
|
4
|
+
|
|
5
|
+
## 📋 Table of Contents
|
|
6
|
+
|
|
7
|
+
- [Getting Started](#getting-started)
|
|
8
|
+
- [Development Environment Setup](#development-environment-setup)
|
|
9
|
+
- [Project Architecture](#project-architecture)
|
|
10
|
+
- [Development Workflow](#development-workflow)
|
|
11
|
+
- [Code Standards](#code-standards)
|
|
12
|
+
- [Testing](#testing)
|
|
13
|
+
- [Documentation](#documentation)
|
|
14
|
+
- [Pull Request Process](#pull-request-process)
|
|
15
|
+
- [Types of Contributions](#types-of-contributions)
|
|
16
|
+
- [Future Development](#future-development)
|
|
17
|
+
|
|
18
|
+
## Getting Started
|
|
19
|
+
|
|
20
|
+
### Prerequisites
|
|
21
|
+
|
|
22
|
+
- **Python 3.10+** - Download from [python.org](https://www.python.org/downloads/)
|
|
23
|
+
- **UV** - Fast Python package installer
|
|
24
|
+
- **Git** - For version control
|
|
25
|
+
- Basic familiarity with:
|
|
26
|
+
- Python async/await patterns
|
|
27
|
+
- REST APIs
|
|
28
|
+
- Model Context Protocol (MCP) concepts
|
|
29
|
+
- Statistics Canada data structure (helpful but not required)
|
|
30
|
+
|
|
31
|
+
### Quick Setup
|
|
32
|
+
|
|
33
|
+
1. **Fork and clone the repository**:
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/YOUR_USERNAME/mcp-statcan.git
|
|
36
|
+
cd mcp-statcan
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
2. **Install UV** (if not already installed):
|
|
40
|
+
```bash
|
|
41
|
+
curl -fsSL https://astral.sh/uv/install.sh | bash
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
3. **Install dependencies**:
|
|
45
|
+
```bash
|
|
46
|
+
uv pip install fastmcp httpx pydantic
|
|
47
|
+
# Development dependencies
|
|
48
|
+
uv pip install pytest black isort
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Development Environment Setup
|
|
52
|
+
|
|
53
|
+
### Environment Variables
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Database configuration
|
|
58
|
+
STATCAN_DB_FILE=temp_statcan_data.db
|
|
59
|
+
|
|
60
|
+
# Debug flags
|
|
61
|
+
STATCAN_SERVER_DEBUG=true
|
|
62
|
+
STATCAN_SSL_WARNINGS=false
|
|
63
|
+
STATCAN_SQL_DEBUG=false
|
|
64
|
+
STATCAN_DATA_VALIDATION_WARNINGS=true
|
|
65
|
+
STATCAN_SEARCH_PROGRESS=true
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Project Architecture
|
|
69
|
+
|
|
70
|
+
### Directory Structure
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
mcp-statcan/
|
|
74
|
+
├── src/
|
|
75
|
+
│ ├── __init__.py
|
|
76
|
+
│ ├── server.py # Main FastMCP server entry point
|
|
77
|
+
│ ├── config.py # Configuration management
|
|
78
|
+
│ ├── api/ # MCP tools for StatCan API
|
|
79
|
+
│ │ ├── cube_tools.py # Data cube operations
|
|
80
|
+
│ │ ├── vector_tools.py # Vector data operations
|
|
81
|
+
│ │ └── metadata_tools.py # Metadata and code sets
|
|
82
|
+
│ ├── db/ # Database operations
|
|
83
|
+
│ │ ├── connection.py # SQLite connection management
|
|
84
|
+
│ │ ├── queries.py # Database tools registration
|
|
85
|
+
│ │ └── schema.py # Table schema operations
|
|
86
|
+
│ ├── models/ # Pydantic data models
|
|
87
|
+
│ │ ├── api_models.py # API request/response models
|
|
88
|
+
│ │ └── db_models.py # Database models
|
|
89
|
+
│ └── util/ # Utility functions
|
|
90
|
+
│ ├── coordinate.py # Coordinate padding utilities
|
|
91
|
+
│ └── sql_helpers.py # SQL query helpers
|
|
92
|
+
├── pyproject.toml # Project configuration
|
|
93
|
+
└── README.md # Project overview
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Key Components
|
|
97
|
+
|
|
98
|
+
1. **FastMCP Server** (`server.py`): Main entry point that registers all MCP tools
|
|
99
|
+
2. **API Tools**: Wrappers around Statistics Canada Web Data Service API
|
|
100
|
+
3. **Database Layer**: SQLite integration for data persistence and querying
|
|
101
|
+
4. **Models**: Pydantic models for data validation and serialization
|
|
102
|
+
5. **Configuration**: Environment-based configuration management
|
|
103
|
+
|
|
104
|
+
## Development Workflow
|
|
105
|
+
|
|
106
|
+
### Branch Naming Convention
|
|
107
|
+
|
|
108
|
+
Use descriptive branch names with prefixes:
|
|
109
|
+
|
|
110
|
+
- `feature/` - New features
|
|
111
|
+
- `fix/` - Bug fixes
|
|
112
|
+
- `docs/` - Documentation updates
|
|
113
|
+
- `refactor/` - Code refactoring
|
|
114
|
+
- `test/` - Test improvements
|
|
115
|
+
|
|
116
|
+
Examples:
|
|
117
|
+
- `feature/add-windows-installation-guide`
|
|
118
|
+
- `fix/ssl-verification-error`
|
|
119
|
+
- `docs/update-api-examples`
|
|
120
|
+
|
|
121
|
+
### Making Changes
|
|
122
|
+
|
|
123
|
+
1. **Create a feature branch**:
|
|
124
|
+
```bash
|
|
125
|
+
git checkout -b feature/your-feature-name
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
2. **Make your changes** following the code standards below
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
git add .
|
|
132
|
+
git commit -m "Add descriptive commit message"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
3. **Push and create a pull request**:
|
|
136
|
+
```bash
|
|
137
|
+
git push origin feature/your-feature-name
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Code Standards
|
|
141
|
+
|
|
142
|
+
### Python Style Guidelines
|
|
143
|
+
|
|
144
|
+
- **Type hints** - Use type hints for all function parameters and return values
|
|
145
|
+
- **Async/await** - Use async functions for API calls and database operations
|
|
146
|
+
- **Error handling** - Use try/except blocks with specific exception types
|
|
147
|
+
- **Logging** - Use the project's logging configuration for debug output
|
|
148
|
+
|
|
149
|
+
### Code Organization
|
|
150
|
+
|
|
151
|
+
- **Imports**: Use `isort` for consistent import ordering
|
|
152
|
+
- **Functions**: Keep functions focused and single-purpose
|
|
153
|
+
- **Classes**: Use Pydantic models for data validation
|
|
154
|
+
- **Constants**: Define constants in `config.py`
|
|
155
|
+
|
|
156
|
+
### Example Code Style
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from typing import List, Optional
|
|
160
|
+
from pydantic import BaseModel
|
|
161
|
+
import httpx
|
|
162
|
+
|
|
163
|
+
class VectorDataInput(BaseModel):
|
|
164
|
+
"""Input model for vector data requests."""
|
|
165
|
+
vector_id: int
|
|
166
|
+
latest_n: int
|
|
167
|
+
|
|
168
|
+
async def get_vector_data(
|
|
169
|
+
input_data: VectorDataInput
|
|
170
|
+
) -> Optional[List[Dict[str, Any]]]:
|
|
171
|
+
"""
|
|
172
|
+
Retrieve vector data from Statistics Canada API.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
input_data: Vector data request parameters
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
List of data points or None if request fails
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
httpx.HTTPError: If API request fails
|
|
182
|
+
"""
|
|
183
|
+
try:
|
|
184
|
+
# Implementation here
|
|
185
|
+
pass
|
|
186
|
+
except httpx.HTTPError as e:
|
|
187
|
+
log_server_debug(f"API request failed: {e}")
|
|
188
|
+
raise
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Security Considerations
|
|
192
|
+
|
|
193
|
+
- **Never commit sensitive data** - Use environment variables
|
|
194
|
+
- **SSL verification** - Enable SSL verification in production
|
|
195
|
+
- **Input validation** - Always validate user inputs with Pydantic models
|
|
196
|
+
- **SQL injection prevention** - Use parameterized queries
|
|
197
|
+
|
|
198
|
+
## Testing
|
|
199
|
+
|
|
200
|
+
### Running Tests
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# Run all tests
|
|
204
|
+
pytest
|
|
205
|
+
|
|
206
|
+
# Run with coverage
|
|
207
|
+
pytest --cov=src
|
|
208
|
+
|
|
209
|
+
# Run specific test file
|
|
210
|
+
pytest tests/test_api_tools.py
|
|
211
|
+
|
|
212
|
+
# Run with debug output
|
|
213
|
+
pytest -v -s
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Test Structure
|
|
217
|
+
|
|
218
|
+
Tests should be organized to mirror the source structure:
|
|
219
|
+
|
|
220
|
+
```
|
|
221
|
+
tests/
|
|
222
|
+
├── test_api/
|
|
223
|
+
│ ├── test_cube_tools.py
|
|
224
|
+
│ ├── test_vector_tools.py
|
|
225
|
+
│ └── test_metadata_tools.py
|
|
226
|
+
├── test_db/
|
|
227
|
+
│ └── test_queries.py
|
|
228
|
+
└── test_models/
|
|
229
|
+
└── test_api_models.py
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Writing Tests
|
|
233
|
+
|
|
234
|
+
- **Unit tests** - Test individual functions and classes
|
|
235
|
+
- **Integration tests** - Test API interactions (with mocking)
|
|
236
|
+
- **Database tests** - Test database operations with temporary databases
|
|
237
|
+
- **Use fixtures** - Create reusable test data with pytest fixtures
|
|
238
|
+
|
|
239
|
+
Example test:
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
import pytest
|
|
243
|
+
from src.models.api_models import VectorLatestNInput
|
|
244
|
+
|
|
245
|
+
def test_vector_latest_n_input_validation():
|
|
246
|
+
"""Test VectorLatestNInput model validation."""
|
|
247
|
+
# Valid input
|
|
248
|
+
valid_input = VectorLatestNInput(vectorId=12345, latestN=5)
|
|
249
|
+
assert valid_input.vectorId == 12345
|
|
250
|
+
assert valid_input.latestN == 5
|
|
251
|
+
|
|
252
|
+
# Invalid input
|
|
253
|
+
with pytest.raises(ValueError):
|
|
254
|
+
VectorLatestNInput(vectorId="invalid", latestN=5)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Documentation
|
|
258
|
+
|
|
259
|
+
### Code Documentation
|
|
260
|
+
|
|
261
|
+
- **Docstrings** - Use Google-style docstrings for all public functions and classes
|
|
262
|
+
- **Type hints** - Include comprehensive type annotations
|
|
263
|
+
- **Comments** - Add comments for complex logic, not obvious code
|
|
264
|
+
- **API documentation** - Update tool descriptions when adding new MCP tools
|
|
265
|
+
|
|
266
|
+
### Documentation Updates
|
|
267
|
+
|
|
268
|
+
When making changes, update relevant documentation:
|
|
269
|
+
|
|
270
|
+
- **README.md** - For new features or installation changes
|
|
271
|
+
- **API examples** - Add examples in `docs/examples/`
|
|
272
|
+
- **Code comments** - Update docstrings and inline comments
|
|
273
|
+
- **Configuration docs** - Update environment variable documentation
|
|
274
|
+
|
|
275
|
+
### Example Documentation
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
async def search_cubes_by_title(title: str) -> List[CubeMetadata]:
|
|
279
|
+
"""
|
|
280
|
+
Search for data cubes by title keyword.
|
|
281
|
+
|
|
282
|
+
This function searches Statistics Canada's data cubes using a title
|
|
283
|
+
keyword and returns matching cube metadata.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
title: Search keyword for cube titles
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
List of CubeMetadata objects containing cube information
|
|
290
|
+
|
|
291
|
+
Raises:
|
|
292
|
+
httpx.HTTPError: If the API request fails
|
|
293
|
+
ValueError: If the title parameter is empty
|
|
294
|
+
|
|
295
|
+
Example:
|
|
296
|
+
>>> cubes = await search_cubes_by_title("employment")
|
|
297
|
+
>>> print(f"Found {len(cubes)} cubes")
|
|
298
|
+
"""
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Pull Request Process
|
|
302
|
+
|
|
303
|
+
### Before Submitting
|
|
304
|
+
|
|
305
|
+
1. **Run all checks**:
|
|
306
|
+
```bash
|
|
307
|
+
black src/
|
|
308
|
+
isort src/
|
|
309
|
+
pytest
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
2. **Update documentation** if needed
|
|
313
|
+
|
|
314
|
+
3. **Test your changes** thoroughly
|
|
315
|
+
|
|
316
|
+
4. **Update IMPLEMENTATIONS.md** if implementing planned features
|
|
317
|
+
|
|
318
|
+
### Pull Request Template
|
|
319
|
+
|
|
320
|
+
When creating a pull request, include:
|
|
321
|
+
|
|
322
|
+
```markdown
|
|
323
|
+
## Description
|
|
324
|
+
Brief description of changes made.
|
|
325
|
+
|
|
326
|
+
## Type of Change
|
|
327
|
+
- [ ] Bug fix
|
|
328
|
+
- [ ] New feature
|
|
329
|
+
- [ ] Documentation update
|
|
330
|
+
- [ ] Refactoring
|
|
331
|
+
- [ ] Other (please describe)
|
|
332
|
+
|
|
333
|
+
## Testing
|
|
334
|
+
- [ ] All existing tests pass
|
|
335
|
+
- [ ] New tests added for new functionality
|
|
336
|
+
- [ ] Manual testing completed
|
|
337
|
+
|
|
338
|
+
## Checklist
|
|
339
|
+
- [ ] Code follows project style guidelines
|
|
340
|
+
- [ ] Self-review completed
|
|
341
|
+
- [ ] Documentation updated
|
|
342
|
+
- [ ] No sensitive data included
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
### Review Process
|
|
346
|
+
|
|
347
|
+
1. **Automated checks** - CI/CD pipeline runs tests and formatting checks
|
|
348
|
+
2. **Code review** - Project maintainers review code quality and design
|
|
349
|
+
3. **Testing** - Ensure changes don't break existing functionality
|
|
350
|
+
4. **Documentation** - Verify documentation is updated and accurate
|
|
351
|
+
|
|
352
|
+
## Types of Contributions
|
|
353
|
+
|
|
354
|
+
### 🐛 Bug Fixes
|
|
355
|
+
|
|
356
|
+
- SSL verification issues
|
|
357
|
+
- API endpoint errors
|
|
358
|
+
- Database connection problems
|
|
359
|
+
- Data validation failures
|
|
360
|
+
|
|
361
|
+
### ✨ New Features
|
|
362
|
+
|
|
363
|
+
Priority areas from `IMPLEMENTATIONS.md`:
|
|
364
|
+
|
|
365
|
+
- Windows installation guides
|
|
366
|
+
- Package installer improvements
|
|
367
|
+
- Enhanced tool prompts for LLM efficiency
|
|
368
|
+
- Additional database math tools
|
|
369
|
+
- Graph visualization tools
|
|
370
|
+
- Scheduled report generation
|
|
371
|
+
|
|
372
|
+
### 📚 Documentation
|
|
373
|
+
|
|
374
|
+
- API usage examples
|
|
375
|
+
- Installation guides for different platforms
|
|
376
|
+
- Integration documentation
|
|
377
|
+
- Code examples and tutorials
|
|
378
|
+
|
|
379
|
+
### 🧪 Testing
|
|
380
|
+
|
|
381
|
+
- Unit test coverage improvements
|
|
382
|
+
- Integration test development
|
|
383
|
+
- Performance testing
|
|
384
|
+
- API mocking improvements
|
|
385
|
+
|
|
386
|
+
### 🛠️ Infrastructure
|
|
387
|
+
|
|
388
|
+
- CI/CD pipeline improvements
|
|
389
|
+
- Development environment enhancements
|
|
390
|
+
- Performance optimizations
|
|
391
|
+
- Security improvements
|
|
392
|
+
|
|
393
|
+
## Future Development
|
|
394
|
+
|
|
395
|
+
### Current Roadmap
|
|
396
|
+
|
|
397
|
+
Based on `IMPLEMENTATIONS.md`, priority areas include:
|
|
398
|
+
|
|
399
|
+
1. **Package Management**: UV/Smithery installer for direct LLM client installation
|
|
400
|
+
2. **Platform Support**: Windows installation guides and setup scripts
|
|
401
|
+
3. **LLM Optimization**: Improved tool prompts to reduce unnecessary API calls
|
|
402
|
+
4. **Database Enhancements**: Math tools and visualization capabilities
|
|
403
|
+
5. **Automation**: Scheduled reporting and data update systems
|
|
404
|
+
|
|
405
|
+
### Architecture Considerations
|
|
406
|
+
|
|
407
|
+
- **Multi-agent systems**: Potential A2A + MCP integration
|
|
408
|
+
- **Performance**: Bulk operation optimization
|
|
409
|
+
- **Scalability**: Database connection pooling
|
|
410
|
+
- **Security**: SSL verification and input validation improvements
|
|
411
|
+
|
|
412
|
+
### Getting Involved
|
|
413
|
+
|
|
414
|
+
To contribute to future development:
|
|
415
|
+
|
|
416
|
+
1. **Check IMPLEMENTATIONS.md** for current priorities
|
|
417
|
+
2. **Join discussions** on GitHub issues
|
|
418
|
+
3. **Propose new features** through issue templates
|
|
419
|
+
4. **Review architecture** diagrams and provide feedback
|
|
420
|
+
|
|
421
|
+
## Questions or Need Help?
|
|
422
|
+
|
|
423
|
+
- **GitHub Issues**: For bug reports and feature requests
|
|
424
|
+
- **GitHub Discussions**: For general questions and brainstorming
|
|
425
|
+
- **Documentation**: Check the `docs/` directory for detailed guides
|
|
426
|
+
- **Code Examples**: See `docs/examples/` for usage patterns
|
|
427
|
+
|
|
428
|
+
## License
|
|
429
|
+
|
|
430
|
+
By contributing to this project, you agree that your contributions will be licensed under the MIT License.
|
|
431
|
+
|
|
432
|
+
---
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# IMPLEMENTATIONS.MD
|
|
2
|
+
|
|
3
|
+
# 🗺️ Roadmap & Ecosystem Alignment
|
|
4
|
+
*Updated Feb 2026 — informed by MCP Apps, Registry, and ecosystem best practices research*
|
|
5
|
+
|
|
6
|
+
## Tier 1: Foundational Fixes (Weekend Project)
|
|
7
|
+
|
|
8
|
+
- [ ] **Create `pyproject.toml`** — Replace `requirements.txt` + manual `uv pip install` with proper Python packaging. Define project name, version (semver), description, dependencies (`fastmcp`, `httpx`, `pydantic`), Python ≥3.10, entry point for the server, and metadata (author, homepage, repo URL, keywords: `mcp`, `statistics-canada`, `statcan`, `open-data`). This enables PyPI publishing and is required for registry listing.
|
|
9
|
+
- [ ] **Enable SSL verification** — SSL is currently disabled for development. Fix httpx SSL settings and document any proxy/corporate certificate workarounds.
|
|
10
|
+
- [ ] **Harden SQL input validation** — Database tools accept raw SQL with "basic" validation. Implement: whitelist of allowed SQL operations (SELECT only), parameterized queries to prevent injection, and query size/timeout limits.
|
|
11
|
+
- [ ] **Add a uv or smithery package installer** — Install packages to Claude or other LLM clients directly instead of having to adjust working directories *(carried from June 1, 2025)*
|
|
12
|
+
|
|
13
|
+
## Tier 2: Ecosystem Integration (One-Week Sprint)
|
|
14
|
+
|
|
15
|
+
- [ ] **Publish to PyPI** — With `pyproject.toml` in place, build and upload. Enables `pip install mcp-statcan` and `uvx mcp-statcan`. Update README with simplified installation.
|
|
16
|
+
- [ ] **Register on Official MCP Registry** — Use `mcp-publisher` CLI, generate `server.json` with reverse-DNS naming (`io.github.aryan-jhaveri/mcp-statcan`), reference PyPI package, publish. Currently missing from: Official Registry, Smithery, PulseMCP, Docker Catalog.
|
|
17
|
+
- [ ] **Submit to remaining directories** — PR to `punkpeye/awesome-mcp-servers` (syncs to Glama), register on Smithery.ai, submit to PulseMCP (`pulsemcp.com/submit`), consider Docker MCP Catalog.
|
|
18
|
+
- [ ] **Add GitHub Actions CI/CD** — Linting (ruff), type checking (mypy), tests on every push/PR. Release workflow publishes to PyPI on tagged releases. Registry publishing step using `mcp-publisher` with GitHub OIDC auth. (DONT DO THIS YET - Because I need to polish up on CI/CD)
|
|
19
|
+
- [ ] **Create a Dockerfile** — Slim Python base image for sandboxed deployment. Enables Docker MCP Catalog listing. (DONT DO THIS YET - Because I need to polish up on Docker)
|
|
20
|
+
- [ ] **Create setup/installation guides for Windows** *(carried from June 1, 2025)* (DONT DO THIS YET - Because I need to test on windows virtual machine)
|
|
21
|
+
|
|
22
|
+
## Tier 3: Quality & Completeness (Two-Week Sprint)
|
|
23
|
+
|
|
24
|
+
- [ ] **Write tests** — Unit tests per tool function (pytest), mock StatCan API responses. Integration tests via FastMCP in-memory client. Test edge cases: empty results, API timeouts, malformed responses, pagination boundaries. Measure **tool hit rate** (LLM correctly picks right tool for 20 natural-language queries). (Need to research and plan)
|
|
25
|
+
|
|
26
|
+
- [ ] **Complete StatCan WDS API coverage** — The Web Data Service provides **15 methods**; implement all of them. Missing ones likely include `getAllCubesListLite`, `getCubeMetadata`, `getBulkVectorDataByRange`, `getChangedCubeList`, `getChangedSeriesDataFromVector`, etc. Each tool needs clear name, detailed description (explain StatCan "cubes" and "vectors" for LLMs), and well-defined input schema.
|
|
27
|
+
(There's trade offs between wds and sdmx tools, currently the LLM are fetching and readon one data point at a time maybe )
|
|
28
|
+
|
|
29
|
+
- [ ] **Add MCP Resources & Prompts** — Currently only exposes tools. Add **resources** for: available StatCan subject categories, StatCan data model explainer (cubes, vectors, coordinates, reference periods), API rate limits/constraints. Add **prompt templates** for: "Find and analyze a StatCan time series", "Compare regional statistics across provinces", "Get the latest economic indicators".
|
|
30
|
+
(Need subject matter experts to review)
|
|
31
|
+
|
|
32
|
+
- [ ] **Implement cursor-based pagination** — For tools returning large result sets, implement the MCP spec's pagination pattern with opaque cursor tokens and server-determined page sizes. Prevents timeouts on large queries.
|
|
33
|
+
(Need to research and plan and underdstand pagination and cursor tokens)
|
|
34
|
+
|
|
35
|
+
- [ ] Fix `get_bulk_vector` truncated output exceeding LLM context — better implementation: read heads of fetched data, or always route through db tools *(carried from Jan 7, 2026)*
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
- [ ] Fix `create_table_from_data` not filling DB — LLM needs additional tool call to manually insert data, causes errors and context token exhaustion *(carried from Jan 7, 2026)*
|
|
39
|
+
|
|
40
|
+
## Tier 4: Differentiation (Month-Long Effort)
|
|
41
|
+
|
|
42
|
+
- [ ] **Add MCP Apps support for data visualization** — Declare `ui://` resources that render interactive charts/tables. Time series → interactive Chart.js/Plotly chart in sandboxed iframe. Data tables → sortable/filterable HTML table. Use `ext-apps` SDK (`add-app-to-server` agent skill). Text fallback for non-supporting clients. Would make mcp-statcan one of the few data MCP servers with visual output.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
- [ ] **Support Streamable HTTP transport** — Add HTTP server mode alongside stdio for remote deployment. Unlocks hosting on Cloudflare Workers, Render, Railway. Consider deploying a free public instance for zero-setup access.
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
- [ ] **Add structured output schemas** — Define typed output schemas for each tool's response, enabling downstream tools and MCP Apps UIs to parse results programmatically.
|
|
49
|
+
- [ ] **Implement caching** — StatCan data updates at 8:30 AM ET on business days. Cache API responses with time-based invalidation aligned to this schedule. Reduces StatCan API load, improves response times.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
- [ ] **Look into SDMX implementation** — Allow Claude to create files or exact URIs for vector and metadata fetching; mix of REST and SDMX tools available *(carried from Jan 7, 2026)*
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
- [ ] **Maybe: Look into A2A + MCP** — (https://arxiv.org/pdf/2506.01804) to create an extended multi-agent system *(carried from June 3, 2025, only for curiosity)*
|
|
56
|
+
|
|
57
|
+
## Documentation Improvements
|
|
58
|
+
|
|
59
|
+
- [ ] **Badges section** — PyPI version, CI status, license, MCP registry link
|
|
60
|
+
- [ ] **Tool reference table** — Every tool with parameters, return types, example usage
|
|
61
|
+
- [ ] **CONTRIBUTING.md** with contribution guidelines
|
|
62
|
+
- [ ] **CHANGELOG.md** tracking versions
|
|
63
|
+
- [ ] **Multi-client config examples** — Claude Desktop, Claude Code, Cursor, VS Code Copilot, Windsurf (not just Claude Desktop)
|
|
64
|
+
- [ ] **StatCan explainer section** — What StatCan data is and why it's useful, for international users
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
# 📓 Development Log
|
|
69
|
+
|
|
70
|
+
## Jan 7, 2026
|
|
71
|
+
|
|
72
|
+
[x] Adjust and make more detailed tool prompts to prevent the LLM from making separate calls for finding data and then inputting to database.
|
|
73
|
+
|
|
74
|
+
[x] Need to add db specific math tools. Add additional graph tools if needed.
|
|
75
|
+
|
|
76
|
+
## January 2, 2026 — Refactor Data Retrieval Pipeline
|
|
77
|
+
|
|
78
|
+
[x] Identify issue with `get_bulk_vector_data_by_range` returning nested JSON incompatible with DB tools.
|
|
79
|
+
|
|
80
|
+
[x] **Priority** Shift strategy to **Flatten API Response**: Bulk Tool Flattening → Database Ingestion.
|
|
81
|
+
|
|
82
|
+
[x] Modify `get_bulk_vector_data_by_range` to return flat list of data points with `vectorId` injected.
|
|
83
|
+
|
|
84
|
+
[x] Ensure compatibility with `create_table_from_data` for seamless "Fetch → Store" workflow.
|
|
85
|
+
|
|
86
|
+
## Notes
|
|
87
|
+
- Potential use case: Create scheduled calls for the LLM to create weekly reports for specific data sets.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
# 🏗️ Server Architecture & Data Flow
|
|
92
|
+
*June 1, 2025*
|
|
93
|
+
|
|
94
|
+
```mermaid
|
|
95
|
+
flowchart TD
|
|
96
|
+
A[Claude/MCP Client] -->|MCP Protocol| B[FastMCP Server]
|
|
97
|
+
|
|
98
|
+
B --> C{Tool Type}
|
|
99
|
+
C -->|API Tools| D[Statistics Canada API]
|
|
100
|
+
C -->|DB Tools| M[Database Tools]
|
|
101
|
+
C -->|Metadata Tools| F[Code Sets & Classifications]
|
|
102
|
+
|
|
103
|
+
D --> G[Cube Tools]
|
|
104
|
+
D --> H[Vector Tools]
|
|
105
|
+
D --> I[Metadata Tools]
|
|
106
|
+
|
|
107
|
+
E[SQLite Database]
|
|
108
|
+
|
|
109
|
+
G -->|get_cube_metadata<br/>search_cubes_by_title<br/>get_data_from_cube| J[StatCan WDS API<br/>statcan.gc.ca/t1/wds/rest]
|
|
110
|
+
H -->|get_series_info_from_vector<br/>get_data_from_vectors<br/>get_bulk_vector_data| J
|
|
111
|
+
I -->|get_code_sets<br/>get_changed_cube_list| J
|
|
112
|
+
|
|
113
|
+
J -->|JSON Response| K[API Response Processing]
|
|
114
|
+
K -->|Flattened Data Points| L{Data Usage}
|
|
115
|
+
|
|
116
|
+
L -->|Return to Client| A
|
|
117
|
+
L -->|Store in DB| M[Database Tools]
|
|
118
|
+
|
|
119
|
+
M --> N[create_table_from_data]
|
|
120
|
+
M --> O[insert_data_into_table]
|
|
121
|
+
M --> P[query_database]
|
|
122
|
+
M --> Q[list_tables]
|
|
123
|
+
M --> R[get_table_schema]
|
|
124
|
+
|
|
125
|
+
N --> E
|
|
126
|
+
O --> E
|
|
127
|
+
P --> E
|
|
128
|
+
Q --> E
|
|
129
|
+
R --> E
|
|
130
|
+
|
|
131
|
+
E --> S[Dynamic Tables]
|
|
132
|
+
|
|
133
|
+
S -->|SQL Results| T[Formatted Response]
|
|
134
|
+
T -->|MCP Response| A
|
|
135
|
+
|
|
136
|
+
F -->|get_code_sets| J
|
|
137
|
+
|
|
138
|
+
style A fill:#210d70
|
|
139
|
+
style B fill:#70190d
|
|
140
|
+
style E fill:#700d49
|
|
141
|
+
style L fill:#450d70
|
|
142
|
+
style T fill:#35700d
|
|
143
|
+
style J fill:#700d1c
|
|
144
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Aryan Jhaveri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|