the-datagarden 0.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- the_datagarden/__init__.py +8 -0
- the_datagarden/abc/__init__.py +3 -0
- the_datagarden/abc/api.py +19 -0
- the_datagarden/abc/authentication.py +42 -0
- the_datagarden/api/__init__.py +5 -0
- the_datagarden/api/authentication/__init__.py +112 -0
- the_datagarden/api/authentication/credentials/__init__.py +120 -0
- the_datagarden/api/authentication/environment/__init__.py +13 -0
- the_datagarden/api/authentication/settings.py +54 -0
- the_datagarden/api/base/__init__.py +215 -0
- the_datagarden/api/regions/__init__.py +4 -0
- the_datagarden/api/regions/base/__init__.py +108 -0
- the_datagarden/api/regions/base/settings.py +19 -0
- the_datagarden/api/regions/continent.py +9 -0
- the_datagarden/api/regions/country.py +9 -0
- the_datagarden/models/__init__.py +9 -0
- the_datagarden/models/geojson.py +179 -0
- the_datagarden/models/regional_data.py +411 -0
- the_datagarden/version.py +1 -0
- the_datagarden-1.2.1.dist-info/METADATA +137 -0
- the_datagarden-1.2.1.dist-info/RECORD +25 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/WHEEL +1 -1
- the_datagarden-0.1.0.dist-info/METADATA +0 -18
- the_datagarden-0.1.0.dist-info/RECORD +0 -7
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/entry_points.txt +0 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from enum import StrEnum
|
3
|
+
from typing import Literal
|
4
|
+
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
from the_datagarden.api.authentication.settings import STATISTICS_URL_EXTENSION
|
8
|
+
from the_datagarden.api.base import BaseApi
|
9
|
+
from the_datagarden.models import TheDataGardenRegionalDataModel, TheDataGardenRegionGeoJSONModel
|
10
|
+
|
11
|
+
from .settings import ResponseKeys
|
12
|
+
|
13
|
+
|
14
|
+
class PeriodTypes:
|
15
|
+
"""Choice class for periodtype used in most data classes"""
|
16
|
+
|
17
|
+
YEAR = "Y"
|
18
|
+
QUARTER = "Q"
|
19
|
+
MONTH = "M"
|
20
|
+
WEEK = "W"
|
21
|
+
DAY = "D"
|
22
|
+
HOUR = "H"
|
23
|
+
|
24
|
+
|
25
|
+
PeriodType = Literal["Y", "Q", "M", "W", "D", "H"]
|
26
|
+
|
27
|
+
|
28
|
+
class RegionParams(BaseModel):
|
29
|
+
models: list[str] | None = None
|
30
|
+
source: list[str] | None = None
|
31
|
+
period_type: PeriodType = "Y"
|
32
|
+
period_from: datetime | None = None
|
33
|
+
period_to: datetime | None = None
|
34
|
+
region_type: str | None = None
|
35
|
+
descendant_level: int = 0
|
36
|
+
|
37
|
+
|
38
|
+
class Region:
|
39
|
+
"""
|
40
|
+
A region in The Data Garden.
|
41
|
+
"""
|
42
|
+
|
43
|
+
REGION_STATS_MODEL: type[BaseModel]
|
44
|
+
_region_stats: BaseModel | None = None
|
45
|
+
|
46
|
+
KEYS: type[StrEnum]
|
47
|
+
|
48
|
+
def __repr__(self):
|
49
|
+
return f"{self.__class__.__name__} : {self._name}"
|
50
|
+
|
51
|
+
def __init__(self, url: str, api: BaseApi, name: str, continent: str | None = None):
|
52
|
+
self._region_url = url
|
53
|
+
self._api = api
|
54
|
+
self._available_models: dict = {}
|
55
|
+
self._model_data_storage: dict[str, TheDataGardenRegionalDataModel] = {}
|
56
|
+
self._geojsons = TheDataGardenRegionGeoJSONModel(api=api, region_url=url)
|
57
|
+
self._name = name
|
58
|
+
self._continent = continent
|
59
|
+
|
60
|
+
def __getattr__(self, attr: str):
|
61
|
+
if attr in self.available_model_names:
|
62
|
+
return self._model_data_from_storage(model_name=attr)
|
63
|
+
if attr == "geojsons":
|
64
|
+
return self._geojsons
|
65
|
+
|
66
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr}'")
|
67
|
+
|
68
|
+
def _model_data_from_storage(self, model_name: str) -> TheDataGardenRegionalDataModel | None:
|
69
|
+
stored_model_data = self._model_data_storage.get(model_name, None)
|
70
|
+
if not stored_model_data:
|
71
|
+
self._model_data_storage[model_name] = TheDataGardenRegionalDataModel(
|
72
|
+
model_name=model_name, api=self._api, region_url=self._region_url, meta_data=self.meta_data
|
73
|
+
)
|
74
|
+
return self._model_data_storage[model_name]
|
75
|
+
|
76
|
+
return stored_model_data
|
77
|
+
|
78
|
+
@property
|
79
|
+
def meta_data(self) -> BaseModel | None:
|
80
|
+
"""
|
81
|
+
Get the region statistics info from the API.
|
82
|
+
"""
|
83
|
+
if not self._region_stats:
|
84
|
+
region_stats_resp = self._api.retrieve_from_api(
|
85
|
+
url_extension=self._region_url + STATISTICS_URL_EXTENSION,
|
86
|
+
)
|
87
|
+
if region_stats_resp and region_stats_resp.status_code == 200:
|
88
|
+
region_stats_resp_json = region_stats_resp.json().get(self._key(ResponseKeys.STATISTICS), {})
|
89
|
+
self._region_stats = self.REGION_STATS_MODEL(
|
90
|
+
region_stats_resp_json if isinstance(region_stats_resp_json, dict) else {}
|
91
|
+
)
|
92
|
+
|
93
|
+
return self._region_stats
|
94
|
+
|
95
|
+
@property
|
96
|
+
def region_types(self) -> list[str]:
|
97
|
+
if not self.meta_data:
|
98
|
+
return []
|
99
|
+
return self.meta_data.region_types
|
100
|
+
|
101
|
+
@property
|
102
|
+
def available_model_names(self) -> list[str]:
|
103
|
+
if not self.meta_data:
|
104
|
+
return []
|
105
|
+
return self.meta_data.regional_data_models
|
106
|
+
|
107
|
+
def _key(self, key: str) -> str:
|
108
|
+
return getattr(self.KEYS, key)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from enum import StrEnum
|
2
|
+
|
3
|
+
|
4
|
+
class ResponseKeys(StrEnum):
|
5
|
+
AVAILABLE_MODELS = "AVAILABLE_MODELS"
|
6
|
+
STATISTICS = "STATISTICS"
|
7
|
+
|
8
|
+
|
9
|
+
class RegionKeys(StrEnum): ...
|
10
|
+
|
11
|
+
|
12
|
+
class ContinentKeys(RegionKeys):
|
13
|
+
AVAILABLE_MODELS = "available_data_on_continent_level"
|
14
|
+
STATISTICS = "statistics"
|
15
|
+
|
16
|
+
|
17
|
+
class CountryKeys(RegionKeys):
|
18
|
+
AVAILABLE_MODELS = "models_per_region_level"
|
19
|
+
STATISTICS = "statistics"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from .geojson import RegionGeoJSONDataRecord, TheDataGardenRegionGeoJSONModel
|
2
|
+
from .regional_data import RegionalDataRecord, TheDataGardenRegionalDataModel
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"RegionalDataRecord",
|
6
|
+
"TheDataGardenRegionalDataModel",
|
7
|
+
"RegionGeoJSONDataRecord",
|
8
|
+
"TheDataGardenRegionGeoJSONModel",
|
9
|
+
]
|
@@ -0,0 +1,179 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import polars as pl
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
from the_datagarden.api.base import BaseApi
|
8
|
+
|
9
|
+
GEJSON_UNIQUE_FIELDS = [
|
10
|
+
"region_type",
|
11
|
+
"iso_cc_2",
|
12
|
+
"local_region_code",
|
13
|
+
"local_region_code_type",
|
14
|
+
"region_level",
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
class Properties(BaseModel):
|
19
|
+
name: str
|
20
|
+
region_level: int
|
21
|
+
region_type: str
|
22
|
+
iso_cc_2: str
|
23
|
+
local_region_code: str
|
24
|
+
local_region_code_type: str
|
25
|
+
|
26
|
+
|
27
|
+
class Geometry(BaseModel):
|
28
|
+
type: str
|
29
|
+
coordinates: list
|
30
|
+
|
31
|
+
|
32
|
+
class Feature(BaseModel):
|
33
|
+
type: str = "Feature"
|
34
|
+
properties: Properties
|
35
|
+
geometry: Geometry
|
36
|
+
|
37
|
+
|
38
|
+
class RegionGeoJSONDataRecord(BaseModel):
|
39
|
+
name: str | None = None
|
40
|
+
region_type: str | None = None
|
41
|
+
iso_cc_2: str | None = None
|
42
|
+
local_region_code: str | None = None
|
43
|
+
local_region_code_type: str | None = None
|
44
|
+
region_level: int = 0
|
45
|
+
feature: Feature
|
46
|
+
|
47
|
+
def record_hash(self) -> str:
|
48
|
+
hash_str = ".".join([str(getattr(self, key)) for key in sorted(GEJSON_UNIQUE_FIELDS)])
|
49
|
+
return str(hash(hash_str))
|
50
|
+
|
51
|
+
def __str__(self):
|
52
|
+
return f"RegionGeoJSONDataRecord: {self.name} ({self.region_type} for {self.local_region_code})"
|
53
|
+
|
54
|
+
|
55
|
+
class TheDataGardenRegionGeoJSONModel:
|
56
|
+
"""
|
57
|
+
Model to hold response data from the The Data Garden API Region GeoJSON endpoint.
|
58
|
+
|
59
|
+
The model hold a list of regional_data records containg a regional data model
|
60
|
+
for the region for a specific set op sources, periods and period types.
|
61
|
+
|
62
|
+
The data can be converted to Polars and Pandas dataframes by the following
|
63
|
+
methods:
|
64
|
+
- to_polars(model_convertors: dict | None = None) -> pl.DataFrame
|
65
|
+
model_convertors dict will be used to covert specifc model fields to dataframe
|
66
|
+
columns.
|
67
|
+
- full_model_to_polars() -> pl.DataFrame
|
68
|
+
|
69
|
+
For pandas dataframes you can use the same methods:
|
70
|
+
- to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
|
71
|
+
- full_model_to_pandas() -> pd.DataFrame
|
72
|
+
"""
|
73
|
+
|
74
|
+
def __init__(self, api: "BaseApi", region_url: str):
|
75
|
+
self._api: BaseApi = api
|
76
|
+
self._region_url: str = region_url
|
77
|
+
self._levels_requested: list[int] = []
|
78
|
+
self._geojson_records: dict[str, RegionGeoJSONDataRecord] = {}
|
79
|
+
|
80
|
+
def __str__(self):
|
81
|
+
return f"TheDataGardenRegionGeoJSONModel : GeoJSON : (count={len(self._geojson_records)})"
|
82
|
+
|
83
|
+
def __repr__(self):
|
84
|
+
return self.__str__()
|
85
|
+
|
86
|
+
def __call__(self, region_level: int = 0) -> "TheDataGardenRegionGeoJSONModel":
|
87
|
+
if region_level not in self._levels_requested:
|
88
|
+
features = self.geojson_paginated_data_from_api(region_level=region_level)
|
89
|
+
if features:
|
90
|
+
self.set_items(features)
|
91
|
+
self._levels_requested.append(region_level)
|
92
|
+
return self
|
93
|
+
|
94
|
+
def _response_has_next_page(self, model_data_resp: dict) -> bool:
|
95
|
+
pagination = model_data_resp.get("pagination", None)
|
96
|
+
if not pagination:
|
97
|
+
return False
|
98
|
+
return pagination.get("next_page", None) is not None
|
99
|
+
|
100
|
+
def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
|
101
|
+
pagination = model_data_resp.pop("pagination", None)
|
102
|
+
if not pagination:
|
103
|
+
return None
|
104
|
+
next_page = pagination.get("next_page", None)
|
105
|
+
if not next_page:
|
106
|
+
return None
|
107
|
+
return {"page": next_page}
|
108
|
+
|
109
|
+
def geojson_paginated_data_from_api(self, region_level: int):
|
110
|
+
geojson_data_resp = self.geojson_data_from_api(region_level=region_level)
|
111
|
+
while geojson_data_resp and self._response_has_next_page(geojson_data_resp):
|
112
|
+
next_page_pagination = self._next_page_pagination(geojson_data_resp)
|
113
|
+
if next_page_pagination:
|
114
|
+
next_page_resp = self.geojson_data_from_api(
|
115
|
+
pagination=next_page_pagination, region_level=region_level
|
116
|
+
)
|
117
|
+
if next_page_resp:
|
118
|
+
geojson_data_resp["features"].extend(next_page_resp["features"])
|
119
|
+
geojson_data_resp["pagination"] = next_page_resp["pagination"]
|
120
|
+
|
121
|
+
return geojson_data_resp
|
122
|
+
|
123
|
+
def geojson_data_from_api(
|
124
|
+
self, region_level: int, pagination: dict[str, str] | None = None
|
125
|
+
) -> dict | None:
|
126
|
+
payload: dict[str, Any] = {"region_level": region_level}
|
127
|
+
if pagination:
|
128
|
+
payload = payload | {"pagination": pagination}
|
129
|
+
geojson_data_resp = self._api.retrieve_from_api(
|
130
|
+
url_extension=self._region_url + "geojson/",
|
131
|
+
method="POST",
|
132
|
+
payload=payload,
|
133
|
+
)
|
134
|
+
if geojson_data_resp:
|
135
|
+
return geojson_data_resp.json()
|
136
|
+
return None
|
137
|
+
|
138
|
+
def set_items(self, data: dict):
|
139
|
+
for feature in data["features"]:
|
140
|
+
feature = Feature(**feature)
|
141
|
+
data_record_items = {
|
142
|
+
"name": feature.properties.name,
|
143
|
+
"region_type": feature.properties.region_type,
|
144
|
+
"iso_cc_2": feature.properties.iso_cc_2,
|
145
|
+
"local_region_code": feature.properties.local_region_code,
|
146
|
+
"local_region_code_type": feature.properties.local_region_code_type,
|
147
|
+
"region_level": feature.properties.region_level,
|
148
|
+
"feature": feature,
|
149
|
+
}
|
150
|
+
data_record = RegionGeoJSONDataRecord(**data_record_items)
|
151
|
+
self._geojson_records.update({data_record.record_hash(): data_record})
|
152
|
+
|
153
|
+
def to_polars(self) -> pl.DataFrame:
|
154
|
+
"""
|
155
|
+
Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
|
156
|
+
"""
|
157
|
+
converted_records = []
|
158
|
+
for record in self._geojson_records.values():
|
159
|
+
record_dict = record.model_dump()
|
160
|
+
converted_records.append(record_dict)
|
161
|
+
return pl.from_records(converted_records)
|
162
|
+
|
163
|
+
def to_pandas(self) -> pd.DataFrame:
|
164
|
+
"""
|
165
|
+
Convert the data to a pandas dataframe
|
166
|
+
"""
|
167
|
+
return self.to_polars().to_pandas()
|
168
|
+
|
169
|
+
def __iter__(self):
|
170
|
+
"""Makes the class iterable over the values in _data_records"""
|
171
|
+
return iter(self._geojson_records.values())
|
172
|
+
|
173
|
+
def __len__(self):
|
174
|
+
"""Returns the number of records"""
|
175
|
+
return len(self._geojson_records)
|
176
|
+
|
177
|
+
@property
|
178
|
+
def geojson_records(self) -> list[RegionGeoJSONDataRecord]:
|
179
|
+
return list(self._geojson_records.values())
|