statwrapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statwrapper/__init__.py +31 -0
- statwrapper/api_clients/__init__.py +11 -0
- statwrapper/api_clients/dst_client.py +249 -0
- statwrapper/api_clients/eurostat_client.py +359 -0
- statwrapper/api_clients/pxweb2_client.py +170 -0
- statwrapper/api_clients/pxweb_client.py +244 -0
- statwrapper/base_api_client.py +79 -0
- statwrapper/exceptions.py +22 -0
- statwrapper/http.py +126 -0
- statwrapper/models.py +103 -0
- statwrapper/parsers.py +260 -0
- statwrapper/provider_registry.py +74 -0
- statwrapper/providers.json +662 -0
- statwrapper/statwrapper.py +103 -0
- statwrapper/utils.py +134 -0
- statwrapper-0.1.0.dist-info/METADATA +123 -0
- statwrapper-0.1.0.dist-info/RECORD +19 -0
- statwrapper-0.1.0.dist-info/WHEEL +5 -0
- statwrapper-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import uuid
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .http import RateLimitedSession
|
|
10
|
+
from .models import DiscoveredDataset, Provider, ResolvedDatasetMetadata
|
|
11
|
+
from .provider_registry import create_wrapper, get_provider, load_providers
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class StatWrapper:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
*,
|
|
18
|
+
providers_path: str | Path | None = None,
|
|
19
|
+
session: RateLimitedSession | None = None,
|
|
20
|
+
logger: logging.Logger | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
self.providers_path = providers_path
|
|
23
|
+
self.providers = load_providers(providers_path)
|
|
24
|
+
self.provider_map = {
|
|
25
|
+
provider.provider_code: provider for provider in self.providers
|
|
26
|
+
}
|
|
27
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
28
|
+
self.session = session or RateLimitedSession(
|
|
29
|
+
default_rate=1.0,
|
|
30
|
+
host_rates={
|
|
31
|
+
provider.base_api_url or provider.provider_code: (
|
|
32
|
+
1.0 / provider.rate_limit if provider.rate_limit > 0 else 1.0
|
|
33
|
+
)
|
|
34
|
+
for provider in self.providers
|
|
35
|
+
},
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def get_provider(self, provider_code: str) -> Provider:
|
|
39
|
+
return get_provider(provider_code, self.providers)
|
|
40
|
+
|
|
41
|
+
def get_wrapper(self, provider_code: str, language: str):
|
|
42
|
+
provider = self.get_provider(provider_code)
|
|
43
|
+
return create_wrapper(
|
|
44
|
+
provider,
|
|
45
|
+
language=language,
|
|
46
|
+
json_request_handler=self.session.get_json,
|
|
47
|
+
text_request_handler=self.session.get_text,
|
|
48
|
+
bytes_request_handler=self.session.get_bytes,
|
|
49
|
+
logger=self.logger,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
async def discover_datasets(
|
|
53
|
+
self,
|
|
54
|
+
provider_code: str,
|
|
55
|
+
language: str,
|
|
56
|
+
*,
|
|
57
|
+
task_id: uuid.UUID | None = None,
|
|
58
|
+
) -> list[DiscoveredDataset]:
|
|
59
|
+
wrapper = self.get_wrapper(provider_code, language)
|
|
60
|
+
return await wrapper.discover_datasets(task_id or uuid.uuid4())
|
|
61
|
+
|
|
62
|
+
async def resolve_dataset_metadata(
|
|
63
|
+
self,
|
|
64
|
+
discovered: DiscoveredDataset,
|
|
65
|
+
*,
|
|
66
|
+
task_id: uuid.UUID | None = None,
|
|
67
|
+
) -> ResolvedDatasetMetadata:
|
|
68
|
+
wrapper = self.get_wrapper(discovered.provider_code, discovered.language)
|
|
69
|
+
return await wrapper.resolve_dataset_metadata(discovered, task_id=task_id)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def discover_provider_datasets(
|
|
73
|
+
provider_code: str,
|
|
74
|
+
language: str,
|
|
75
|
+
*,
|
|
76
|
+
providers_path: str | Path | None = None,
|
|
77
|
+
) -> list[DiscoveredDataset]:
|
|
78
|
+
wrapper = StatWrapper(providers_path=providers_path)
|
|
79
|
+
return await wrapper.discover_datasets(provider_code, language)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def resolve_dataset_metadata(
|
|
83
|
+
discovered: DiscoveredDataset,
|
|
84
|
+
*,
|
|
85
|
+
providers_path: str | Path | None = None,
|
|
86
|
+
) -> ResolvedDatasetMetadata:
|
|
87
|
+
wrapper = StatWrapper(providers_path=providers_path)
|
|
88
|
+
return await wrapper.resolve_dataset_metadata(discovered)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_datasets(
|
|
92
|
+
provider_code: str,
|
|
93
|
+
language: str,
|
|
94
|
+
*,
|
|
95
|
+
providers_path: str | Path | None = None,
|
|
96
|
+
) -> list[dict[str, Any]]:
|
|
97
|
+
async def _run() -> list[dict[str, Any]]:
|
|
98
|
+
wrapper = StatWrapper(providers_path=providers_path)
|
|
99
|
+
discovered = await wrapper.discover_datasets(provider_code, language)
|
|
100
|
+
resolved = [await wrapper.resolve_dataset_metadata(item) for item in discovered]
|
|
101
|
+
return [item.__dict__ for item in resolved]
|
|
102
|
+
|
|
103
|
+
return asyncio.run(_run())
|
statwrapper/utils.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
TIME_LABEL_PATTERNS: dict[str, re.Pattern[str]] = {
|
|
8
|
+
"Annual": re.compile(r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)$"),
|
|
9
|
+
"Quarterly": re.compile(r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)[QK][1-4]$"),
|
|
10
|
+
"Monthly": re.compile(
|
|
11
|
+
r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(M0?[1-9]|M1[0-2]|0?[1-9]|1[0-2])$"
|
|
12
|
+
),
|
|
13
|
+
"Weekly": re.compile(
|
|
14
|
+
r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(W0?[1-9]|W[1-4][0-9]|W5[0-3])$"
|
|
15
|
+
),
|
|
16
|
+
"Other": re.compile(
|
|
17
|
+
r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(W0?[1-9]|W[1-4][0-9]|W5[0-3]|V0?[1-9]|V[1-4][0-9]|V5[0-3])$"
|
|
18
|
+
),
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
TIME_ALTERNATIVES = {
|
|
22
|
+
"time",
|
|
23
|
+
"tid",
|
|
24
|
+
"year",
|
|
25
|
+
"quarter",
|
|
26
|
+
"month",
|
|
27
|
+
"week",
|
|
28
|
+
"period",
|
|
29
|
+
"time period",
|
|
30
|
+
}
|
|
31
|
+
GEO_ALTERNATIVES = {
|
|
32
|
+
"geo",
|
|
33
|
+
"region",
|
|
34
|
+
"country",
|
|
35
|
+
"municipality",
|
|
36
|
+
"county",
|
|
37
|
+
"location",
|
|
38
|
+
"area",
|
|
39
|
+
}
|
|
40
|
+
METRIC_ALTERNATIVES = {
|
|
41
|
+
"unit",
|
|
42
|
+
"contents",
|
|
43
|
+
"measure",
|
|
44
|
+
"metric",
|
|
45
|
+
"value",
|
|
46
|
+
"content",
|
|
47
|
+
"contentscode",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def determine_time_unit(first_period: str | None, last_period: str | None) -> str:
|
|
52
|
+
if not first_period or not last_period:
|
|
53
|
+
return "Other"
|
|
54
|
+
for time_format, pattern in TIME_LABEL_PATTERNS.items():
|
|
55
|
+
if pattern.match(first_period) and pattern.match(last_period):
|
|
56
|
+
return time_format
|
|
57
|
+
return "Other"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def detect_role(code: str, label: str, is_time: bool = False) -> str | None:
|
|
61
|
+
if is_time:
|
|
62
|
+
return "time"
|
|
63
|
+
|
|
64
|
+
norm_code = code.strip().lower()
|
|
65
|
+
norm_label = label.strip().lower()
|
|
66
|
+
if norm_code == "contentscode" or norm_code in METRIC_ALTERNATIVES:
|
|
67
|
+
return "metric"
|
|
68
|
+
if norm_label in METRIC_ALTERNATIVES:
|
|
69
|
+
return "metric"
|
|
70
|
+
if norm_code in GEO_ALTERNATIVES or norm_label in GEO_ALTERNATIVES:
|
|
71
|
+
return "geo"
|
|
72
|
+
if norm_code in TIME_ALTERNATIVES or norm_label in TIME_ALTERNATIVES:
|
|
73
|
+
return "time"
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def parse_dt(value: str | datetime | None) -> datetime | None:
|
|
78
|
+
if value is None:
|
|
79
|
+
return None
|
|
80
|
+
if isinstance(value, datetime):
|
|
81
|
+
return (
|
|
82
|
+
value.astimezone(timezone.utc)
|
|
83
|
+
if value.tzinfo
|
|
84
|
+
else value.replace(tzinfo=timezone.utc)
|
|
85
|
+
)
|
|
86
|
+
if isinstance(value, str):
|
|
87
|
+
raw = value.strip()
|
|
88
|
+
if not raw:
|
|
89
|
+
return None
|
|
90
|
+
normalized = raw.replace("Z", "+00:00")
|
|
91
|
+
try:
|
|
92
|
+
parsed = datetime.fromisoformat(normalized)
|
|
93
|
+
except ValueError:
|
|
94
|
+
return None
|
|
95
|
+
return (
|
|
96
|
+
parsed.astimezone(timezone.utc)
|
|
97
|
+
if parsed.tzinfo
|
|
98
|
+
else parsed.replace(tzinfo=timezone.utc)
|
|
99
|
+
)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def normalize_note(value: Any) -> list[str] | None:
|
|
104
|
+
if value is None:
|
|
105
|
+
return None
|
|
106
|
+
if isinstance(value, str):
|
|
107
|
+
cleaned = value.strip()
|
|
108
|
+
return [cleaned] if cleaned else None
|
|
109
|
+
if isinstance(value, list):
|
|
110
|
+
notes = [str(item).strip() for item in value if str(item).strip()]
|
|
111
|
+
return notes or None
|
|
112
|
+
if isinstance(value, dict):
|
|
113
|
+
text = str(value.get("text") or "").strip()
|
|
114
|
+
return [text] if text else None
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def normalize_contact(raw_contact: Any) -> dict[str, Any] | None:
|
|
119
|
+
if raw_contact is None:
|
|
120
|
+
return None
|
|
121
|
+
if isinstance(raw_contact, dict):
|
|
122
|
+
return raw_contact
|
|
123
|
+
if isinstance(raw_contact, list):
|
|
124
|
+
contacts = [item for item in raw_contact if isinstance(item, dict)]
|
|
125
|
+
if contacts:
|
|
126
|
+
return {"contacts": contacts}
|
|
127
|
+
values = [str(item).strip() for item in raw_contact if str(item).strip()]
|
|
128
|
+
return {"values": values} if values else None
|
|
129
|
+
value = str(raw_contact).strip()
|
|
130
|
+
return {"value": value} if value else None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def ensure_datetime(value: datetime | None, fallback: datetime) -> datetime:
|
|
134
|
+
return value if value is not None else fallback
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: statwrapper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A completely dependency-free unified wrapper around common statistical api types
|
|
5
|
+
Author-email: Nordic Intel <info@nordicintel.com>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8.3.5; extra == "dev"
|
|
16
|
+
Requires-Dist: pytest-asyncio>=0.26.0; extra == "dev"
|
|
17
|
+
Requires-Dist: build>=1.2.2; extra == "dev"
|
|
18
|
+
Requires-Dist: ruff>=0.11.0; extra == "dev"
|
|
19
|
+
|
|
20
|
+
# statwrapper
|
|
21
|
+
|
|
22
|
+
`statwrapper` is a dependency-free Python library that provides a unified interface for common statistical API families. It currently ships wrapper support for:
|
|
23
|
+
|
|
24
|
+
- `pxweb`
|
|
25
|
+
- `pxweb2`
|
|
26
|
+
- `dst`
|
|
27
|
+
- `eurostat`
|
|
28
|
+
|
|
29
|
+
The package standardizes three operations across providers:
|
|
30
|
+
|
|
31
|
+
- health checks
|
|
32
|
+
- dataset discovery
|
|
33
|
+
- dataset metadata resolution
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install statwrapper
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import asyncio
|
|
45
|
+
|
|
46
|
+
from statwrapper import StatWrapper
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def main() -> None:
|
|
50
|
+
wrapper = StatWrapper()
|
|
51
|
+
|
|
52
|
+
discovered = await wrapper.discover_datasets("scb", "en")
|
|
53
|
+
first = discovered[0]
|
|
54
|
+
|
|
55
|
+
metadata = await wrapper.resolve_dataset_metadata(first)
|
|
56
|
+
print(metadata.label)
|
|
57
|
+
print(metadata.dimension_ids)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
asyncio.run(main())
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Public API
|
|
64
|
+
|
|
65
|
+
### `StatWrapper`
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from statwrapper import StatWrapper
|
|
69
|
+
|
|
70
|
+
wrapper = StatWrapper()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Methods:
|
|
74
|
+
|
|
75
|
+
- `await wrapper.discover_datasets(provider_code, language, task_id=None)`
|
|
76
|
+
- `await wrapper.resolve_dataset_metadata(discovered, task_id=None)`
|
|
77
|
+
- `wrapper.get_provider(provider_code)`
|
|
78
|
+
- `wrapper.get_wrapper(provider_code, language)`
|
|
79
|
+
|
|
80
|
+
### Convenience Functions
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from statwrapper import discover_provider_datasets, get_datasets
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
- `await discover_provider_datasets(provider_code, language)`
|
|
87
|
+
- `get_datasets(provider_code, language)`
|
|
88
|
+
|
|
89
|
+
## Request Layer
|
|
90
|
+
|
|
91
|
+
The default request layer is a stdlib-backed async helper:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from statwrapper import RateLimitedSession
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
`StatWrapper` uses `RateLimitedSession` automatically, but you can inject your own session object if it exposes:
|
|
98
|
+
|
|
99
|
+
- `async def get_json(url, **kwargs)`
|
|
100
|
+
- `async def get_text(url, **kwargs)`
|
|
101
|
+
- `async def get_bytes(url, **kwargs)`
|
|
102
|
+
|
|
103
|
+
## Provider Registry
|
|
104
|
+
|
|
105
|
+
Provider metadata is loaded from `PROVIDERS.json`. During development, the package reads the repository file. In built distributions, the same data is bundled inside the package.
|
|
106
|
+
|
|
107
|
+
## Development
|
|
108
|
+
|
|
109
|
+
Run tests:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
python -m pytest tests/unit tests/integration -q
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Build locally:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
python -m build
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Publishing
|
|
122
|
+
|
|
123
|
+
PyPI publishing is handled by GitHub Actions through `.github/workflows/pypi_publish.yml` and the repository secret `PYPI_API_TOKEN`.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
statwrapper/__init__.py,sha256=CdE6EEri0RV7rS3j7P0UqG--gA0zh2_xKD84WI8ua6w,717
|
|
2
|
+
statwrapper/base_api_client.py,sha256=yRdQFqVivspndudmqV3BObdKYsOLu3cO9Obt7UAgcns,2753
|
|
3
|
+
statwrapper/exceptions.py,sha256=c4RQ64Y09qy0iSMuEmN44IlQ6ovGs3EvL5oru_QDAzE,696
|
|
4
|
+
statwrapper/http.py,sha256=2clxVis_pe_OztivWztEkQEriBG4C0PdNp0YZosCVMM,3992
|
|
5
|
+
statwrapper/models.py,sha256=XCmzxoTL63aQeYNKVgfY5TT9rKVMVRkdEpWk8DYg8uw,3043
|
|
6
|
+
statwrapper/parsers.py,sha256=UT18AsV7c4iGhOieTlh6pA-_eTA6RgHmeCUeo0vC-s4,9098
|
|
7
|
+
statwrapper/provider_registry.py,sha256=mszPwnLzDJZgy5IEa5Bt_AvMJqEtfrIBx5CHot4JlF4,2129
|
|
8
|
+
statwrapper/providers.json,sha256=szqdqRg0KmHxzoIZn3GC0391FksaeSgTJP0QNOMDC6M,17391
|
|
9
|
+
statwrapper/statwrapper.py,sha256=lDmpdSPyIsKYWsUAB81H10o0J_ay_1zFwOHa2ZF2pdk,3426
|
|
10
|
+
statwrapper/utils.py,sha256=2TcyI4wvN2G9xOkBcYv2xd4UPzfgh1m-1zWA4HAkmS8,3867
|
|
11
|
+
statwrapper/api_clients/__init__.py,sha256=E6yujDDgv8nRfrjquS_isWQaHa55Pw-zRmYX5AV1ajE,249
|
|
12
|
+
statwrapper/api_clients/dst_client.py,sha256=iJj3OHWbNYpHPRp3CG5dAG_BGLfBUyot3o-9aWlJkzA,11368
|
|
13
|
+
statwrapper/api_clients/eurostat_client.py,sha256=7O6-e5HVoGhPaxbm9xVkgJNPsqhyKs53wYcSjt3Z2Tg,15164
|
|
14
|
+
statwrapper/api_clients/pxweb2_client.py,sha256=eIK4Ebia8uxlombk7jmbf7_NGz-D9TF2VaBllDdacB4,7256
|
|
15
|
+
statwrapper/api_clients/pxweb_client.py,sha256=J1vdvJBaakEF8I8Qn5OoLaPbccnemWfGfq9oheJEhs0,10522
|
|
16
|
+
statwrapper-0.1.0.dist-info/METADATA,sha256=BHruDjBAveNPcjB2tfbmXEGw9VZ2B9puqkCA2wg_Z4c,2928
|
|
17
|
+
statwrapper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
18
|
+
statwrapper-0.1.0.dist-info/top_level.txt,sha256=8UiAVmS9MtJZrYmiJgBWYpgoCQ1jvW-Y8YN9E4Pm96E,12
|
|
19
|
+
statwrapper-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
statwrapper
|