statwrapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import uuid
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from .http import RateLimitedSession
10
+ from .models import DiscoveredDataset, Provider, ResolvedDatasetMetadata
11
+ from .provider_registry import create_wrapper, get_provider, load_providers
12
+
13
+
14
+ class StatWrapper:
15
+ def __init__(
16
+ self,
17
+ *,
18
+ providers_path: str | Path | None = None,
19
+ session: RateLimitedSession | None = None,
20
+ logger: logging.Logger | None = None,
21
+ ) -> None:
22
+ self.providers_path = providers_path
23
+ self.providers = load_providers(providers_path)
24
+ self.provider_map = {
25
+ provider.provider_code: provider for provider in self.providers
26
+ }
27
+ self.logger = logger or logging.getLogger(__name__)
28
+ self.session = session or RateLimitedSession(
29
+ default_rate=1.0,
30
+ host_rates={
31
+ provider.base_api_url or provider.provider_code: (
32
+ 1.0 / provider.rate_limit if provider.rate_limit > 0 else 1.0
33
+ )
34
+ for provider in self.providers
35
+ },
36
+ )
37
+
38
+ def get_provider(self, provider_code: str) -> Provider:
39
+ return get_provider(provider_code, self.providers)
40
+
41
+ def get_wrapper(self, provider_code: str, language: str):
42
+ provider = self.get_provider(provider_code)
43
+ return create_wrapper(
44
+ provider,
45
+ language=language,
46
+ json_request_handler=self.session.get_json,
47
+ text_request_handler=self.session.get_text,
48
+ bytes_request_handler=self.session.get_bytes,
49
+ logger=self.logger,
50
+ )
51
+
52
+ async def discover_datasets(
53
+ self,
54
+ provider_code: str,
55
+ language: str,
56
+ *,
57
+ task_id: uuid.UUID | None = None,
58
+ ) -> list[DiscoveredDataset]:
59
+ wrapper = self.get_wrapper(provider_code, language)
60
+ return await wrapper.discover_datasets(task_id or uuid.uuid4())
61
+
62
+ async def resolve_dataset_metadata(
63
+ self,
64
+ discovered: DiscoveredDataset,
65
+ *,
66
+ task_id: uuid.UUID | None = None,
67
+ ) -> ResolvedDatasetMetadata:
68
+ wrapper = self.get_wrapper(discovered.provider_code, discovered.language)
69
+ return await wrapper.resolve_dataset_metadata(discovered, task_id=task_id)
70
+
71
+
72
+ async def discover_provider_datasets(
73
+ provider_code: str,
74
+ language: str,
75
+ *,
76
+ providers_path: str | Path | None = None,
77
+ ) -> list[DiscoveredDataset]:
78
+ wrapper = StatWrapper(providers_path=providers_path)
79
+ return await wrapper.discover_datasets(provider_code, language)
80
+
81
+
82
+ async def resolve_dataset_metadata(
83
+ discovered: DiscoveredDataset,
84
+ *,
85
+ providers_path: str | Path | None = None,
86
+ ) -> ResolvedDatasetMetadata:
87
+ wrapper = StatWrapper(providers_path=providers_path)
88
+ return await wrapper.resolve_dataset_metadata(discovered)
89
+
90
+
91
+ def get_datasets(
92
+ provider_code: str,
93
+ language: str,
94
+ *,
95
+ providers_path: str | Path | None = None,
96
+ ) -> list[dict[str, Any]]:
97
+ async def _run() -> list[dict[str, Any]]:
98
+ wrapper = StatWrapper(providers_path=providers_path)
99
+ discovered = await wrapper.discover_datasets(provider_code, language)
100
+ resolved = [await wrapper.resolve_dataset_metadata(item) for item in discovered]
101
+ return [item.__dict__ for item in resolved]
102
+
103
+ return asyncio.run(_run())
statwrapper/utils.py ADDED
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from datetime import datetime, timezone
5
+ from typing import Any
6
+
7
+ TIME_LABEL_PATTERNS: dict[str, re.Pattern[str]] = {
8
+ "Annual": re.compile(r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)$"),
9
+ "Quarterly": re.compile(r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)[QK][1-4]$"),
10
+ "Monthly": re.compile(
11
+ r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(M0?[1-9]|M1[0-2]|0?[1-9]|1[0-2])$"
12
+ ),
13
+ "Weekly": re.compile(
14
+ r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(W0?[1-9]|W[1-4][0-9]|W5[0-3])$"
15
+ ),
16
+ "Other": re.compile(
17
+ r"^(1[5-9][0-9]{2}|2[0-9]{3}|3000)(W0?[1-9]|W[1-4][0-9]|W5[0-3]|V0?[1-9]|V[1-4][0-9]|V5[0-3])$"
18
+ ),
19
+ }
20
+
21
+ TIME_ALTERNATIVES = {
22
+ "time",
23
+ "tid",
24
+ "year",
25
+ "quarter",
26
+ "month",
27
+ "week",
28
+ "period",
29
+ "time period",
30
+ }
31
+ GEO_ALTERNATIVES = {
32
+ "geo",
33
+ "region",
34
+ "country",
35
+ "municipality",
36
+ "county",
37
+ "location",
38
+ "area",
39
+ }
40
+ METRIC_ALTERNATIVES = {
41
+ "unit",
42
+ "contents",
43
+ "measure",
44
+ "metric",
45
+ "value",
46
+ "content",
47
+ "contentscode",
48
+ }
49
+
50
+
51
+ def determine_time_unit(first_period: str | None, last_period: str | None) -> str:
52
+ if not first_period or not last_period:
53
+ return "Other"
54
+ for time_format, pattern in TIME_LABEL_PATTERNS.items():
55
+ if pattern.match(first_period) and pattern.match(last_period):
56
+ return time_format
57
+ return "Other"
58
+
59
+
60
+ def detect_role(code: str, label: str, is_time: bool = False) -> str | None:
61
+ if is_time:
62
+ return "time"
63
+
64
+ norm_code = code.strip().lower()
65
+ norm_label = label.strip().lower()
66
+ if norm_code == "contentscode" or norm_code in METRIC_ALTERNATIVES:
67
+ return "metric"
68
+ if norm_label in METRIC_ALTERNATIVES:
69
+ return "metric"
70
+ if norm_code in GEO_ALTERNATIVES or norm_label in GEO_ALTERNATIVES:
71
+ return "geo"
72
+ if norm_code in TIME_ALTERNATIVES or norm_label in TIME_ALTERNATIVES:
73
+ return "time"
74
+ return None
75
+
76
+
77
+ def parse_dt(value: str | datetime | None) -> datetime | None:
78
+ if value is None:
79
+ return None
80
+ if isinstance(value, datetime):
81
+ return (
82
+ value.astimezone(timezone.utc)
83
+ if value.tzinfo
84
+ else value.replace(tzinfo=timezone.utc)
85
+ )
86
+ if isinstance(value, str):
87
+ raw = value.strip()
88
+ if not raw:
89
+ return None
90
+ normalized = raw.replace("Z", "+00:00")
91
+ try:
92
+ parsed = datetime.fromisoformat(normalized)
93
+ except ValueError:
94
+ return None
95
+ return (
96
+ parsed.astimezone(timezone.utc)
97
+ if parsed.tzinfo
98
+ else parsed.replace(tzinfo=timezone.utc)
99
+ )
100
+ return None
101
+
102
+
103
+ def normalize_note(value: Any) -> list[str] | None:
104
+ if value is None:
105
+ return None
106
+ if isinstance(value, str):
107
+ cleaned = value.strip()
108
+ return [cleaned] if cleaned else None
109
+ if isinstance(value, list):
110
+ notes = [str(item).strip() for item in value if str(item).strip()]
111
+ return notes or None
112
+ if isinstance(value, dict):
113
+ text = str(value.get("text") or "").strip()
114
+ return [text] if text else None
115
+ return None
116
+
117
+
118
+ def normalize_contact(raw_contact: Any) -> dict[str, Any] | None:
119
+ if raw_contact is None:
120
+ return None
121
+ if isinstance(raw_contact, dict):
122
+ return raw_contact
123
+ if isinstance(raw_contact, list):
124
+ contacts = [item for item in raw_contact if isinstance(item, dict)]
125
+ if contacts:
126
+ return {"contacts": contacts}
127
+ values = [str(item).strip() for item in raw_contact if str(item).strip()]
128
+ return {"values": values} if values else None
129
+ value = str(raw_contact).strip()
130
+ return {"value": value} if value else None
131
+
132
+
133
+ def ensure_datetime(value: datetime | None, fallback: datetime) -> datetime:
134
+ return value if value is not None else fallback
@@ -0,0 +1,123 @@
1
+ Metadata-Version: 2.4
2
+ Name: statwrapper
3
+ Version: 0.1.0
4
+ Summary: A completely dependency-free unified wrapper around common statistical api types
5
+ Author-email: Nordic Intel <info@nordicintel.com>
6
+ License-Expression: Apache-2.0
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8.3.5; extra == "dev"
16
+ Requires-Dist: pytest-asyncio>=0.26.0; extra == "dev"
17
+ Requires-Dist: build>=1.2.2; extra == "dev"
18
+ Requires-Dist: ruff>=0.11.0; extra == "dev"
19
+
20
+ # statwrapper
21
+
22
+ `statwrapper` is a dependency-free Python library that provides a unified interface for common statistical API families. It currently ships wrapper support for:
23
+
24
+ - `pxweb`
25
+ - `pxweb2`
26
+ - `dst`
27
+ - `eurostat`
28
+
29
+ The package standardizes three operations across providers:
30
+
31
+ - health checks
32
+ - dataset discovery
33
+ - dataset metadata resolution
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ pip install statwrapper
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ```python
44
+ import asyncio
45
+
46
+ from statwrapper import StatWrapper
47
+
48
+
49
+ async def main() -> None:
50
+ wrapper = StatWrapper()
51
+
52
+ discovered = await wrapper.discover_datasets("scb", "en")
53
+ first = discovered[0]
54
+
55
+ metadata = await wrapper.resolve_dataset_metadata(first)
56
+ print(metadata.label)
57
+ print(metadata.dimension_ids)
58
+
59
+
60
+ asyncio.run(main())
61
+ ```
62
+
63
+ ## Public API
64
+
65
+ ### `StatWrapper`
66
+
67
+ ```python
68
+ from statwrapper import StatWrapper
69
+
70
+ wrapper = StatWrapper()
71
+ ```
72
+
73
+ Methods:
74
+
75
+ - `await wrapper.discover_datasets(provider_code, language, task_id=None)`
76
+ - `await wrapper.resolve_dataset_metadata(discovered, task_id=None)`
77
+ - `wrapper.get_provider(provider_code)`
78
+ - `wrapper.get_wrapper(provider_code, language)`
79
+
80
+ ### Convenience Functions
81
+
82
+ ```python
83
+ from statwrapper import discover_provider_datasets, get_datasets
84
+ ```
85
+
86
+ - `await discover_provider_datasets(provider_code, language)`
87
+ - `get_datasets(provider_code, language)`
88
+
89
+ ## Request Layer
90
+
91
+ The default request layer is a stdlib-backed async helper:
92
+
93
+ ```python
94
+ from statwrapper import RateLimitedSession
95
+ ```
96
+
97
+ `StatWrapper` uses `RateLimitedSession` automatically, but you can inject your own session object if it exposes:
98
+
99
+ - `async def get_json(url, **kwargs)`
100
+ - `async def get_text(url, **kwargs)`
101
+ - `async def get_bytes(url, **kwargs)`
102
+
103
+ ## Provider Registry
104
+
105
+ Provider metadata is loaded from `PROVIDERS.json`. During development, the package reads the repository file. In built distributions, the same data is bundled inside the package.
106
+
107
+ ## Development
108
+
109
+ Run tests:
110
+
111
+ ```bash
112
+ python -m pytest tests/unit tests/integration -q
113
+ ```
114
+
115
+ Build locally:
116
+
117
+ ```bash
118
+ python -m build
119
+ ```
120
+
121
+ ## Publishing
122
+
123
+ PyPI publishing is handled by GitHub Actions through `.github/workflows/pypi_publish.yml` and the repository secret `PYPI_API_TOKEN`.
@@ -0,0 +1,19 @@
1
+ statwrapper/__init__.py,sha256=CdE6EEri0RV7rS3j7P0UqG--gA0zh2_xKD84WI8ua6w,717
2
+ statwrapper/base_api_client.py,sha256=yRdQFqVivspndudmqV3BObdKYsOLu3cO9Obt7UAgcns,2753
3
+ statwrapper/exceptions.py,sha256=c4RQ64Y09qy0iSMuEmN44IlQ6ovGs3EvL5oru_QDAzE,696
4
+ statwrapper/http.py,sha256=2clxVis_pe_OztivWztEkQEriBG4C0PdNp0YZosCVMM,3992
5
+ statwrapper/models.py,sha256=XCmzxoTL63aQeYNKVgfY5TT9rKVMVRkdEpWk8DYg8uw,3043
6
+ statwrapper/parsers.py,sha256=UT18AsV7c4iGhOieTlh6pA-_eTA6RgHmeCUeo0vC-s4,9098
7
+ statwrapper/provider_registry.py,sha256=mszPwnLzDJZgy5IEa5Bt_AvMJqEtfrIBx5CHot4JlF4,2129
8
+ statwrapper/providers.json,sha256=szqdqRg0KmHxzoIZn3GC0391FksaeSgTJP0QNOMDC6M,17391
9
+ statwrapper/statwrapper.py,sha256=lDmpdSPyIsKYWsUAB81H10o0J_ay_1zFwOHa2ZF2pdk,3426
10
+ statwrapper/utils.py,sha256=2TcyI4wvN2G9xOkBcYv2xd4UPzfgh1m-1zWA4HAkmS8,3867
11
+ statwrapper/api_clients/__init__.py,sha256=E6yujDDgv8nRfrjquS_isWQaHa55Pw-zRmYX5AV1ajE,249
12
+ statwrapper/api_clients/dst_client.py,sha256=iJj3OHWbNYpHPRp3CG5dAG_BGLfBUyot3o-9aWlJkzA,11368
13
+ statwrapper/api_clients/eurostat_client.py,sha256=7O6-e5HVoGhPaxbm9xVkgJNPsqhyKs53wYcSjt3Z2Tg,15164
14
+ statwrapper/api_clients/pxweb2_client.py,sha256=eIK4Ebia8uxlombk7jmbf7_NGz-D9TF2VaBllDdacB4,7256
15
+ statwrapper/api_clients/pxweb_client.py,sha256=J1vdvJBaakEF8I8Qn5OoLaPbccnemWfGfq9oheJEhs0,10522
16
+ statwrapper-0.1.0.dist-info/METADATA,sha256=BHruDjBAveNPcjB2tfbmXEGw9VZ2B9puqkCA2wg_Z4c,2928
17
+ statwrapper-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
18
+ statwrapper-0.1.0.dist-info/top_level.txt,sha256=8UiAVmS9MtJZrYmiJgBWYpgoCQ1jvW-Y8YN9E4Pm96E,12
19
+ statwrapper-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ statwrapper