the-datagarden 0.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- the_datagarden/__init__.py +8 -0
- the_datagarden/abc/__init__.py +3 -0
- the_datagarden/abc/api.py +19 -0
- the_datagarden/abc/authentication.py +42 -0
- the_datagarden/api/__init__.py +5 -0
- the_datagarden/api/authentication/__init__.py +112 -0
- the_datagarden/api/authentication/credentials/__init__.py +120 -0
- the_datagarden/api/authentication/environment/__init__.py +13 -0
- the_datagarden/api/authentication/settings.py +54 -0
- the_datagarden/api/base/__init__.py +215 -0
- the_datagarden/api/regions/__init__.py +4 -0
- the_datagarden/api/regions/base/__init__.py +108 -0
- the_datagarden/api/regions/base/settings.py +19 -0
- the_datagarden/api/regions/continent.py +9 -0
- the_datagarden/api/regions/country.py +9 -0
- the_datagarden/models/__init__.py +9 -0
- the_datagarden/models/geojson.py +179 -0
- the_datagarden/models/regional_data.py +411 -0
- the_datagarden/version.py +1 -0
- the_datagarden-1.2.1.dist-info/METADATA +137 -0
- the_datagarden-1.2.1.dist-info/RECORD +25 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/WHEEL +1 -1
- the_datagarden-0.1.0.dist-info/METADATA +0 -18
- the_datagarden-0.1.0.dist-info/RECORD +0 -7
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/entry_points.txt +0 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from enum import StrEnum
|
3
|
+
from typing import Literal
|
4
|
+
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
from the_datagarden.api.authentication.settings import STATISTICS_URL_EXTENSION
|
8
|
+
from the_datagarden.api.base import BaseApi
|
9
|
+
from the_datagarden.models import TheDataGardenRegionalDataModel, TheDataGardenRegionGeoJSONModel
|
10
|
+
|
11
|
+
from .settings import ResponseKeys
|
12
|
+
|
13
|
+
|
14
|
+
class PeriodTypes:
|
15
|
+
"""Choice class for periodtype used in most data classes"""
|
16
|
+
|
17
|
+
YEAR = "Y"
|
18
|
+
QUARTER = "Q"
|
19
|
+
MONTH = "M"
|
20
|
+
WEEK = "W"
|
21
|
+
DAY = "D"
|
22
|
+
HOUR = "H"
|
23
|
+
|
24
|
+
|
25
|
+
PeriodType = Literal["Y", "Q", "M", "W", "D", "H"]
|
26
|
+
|
27
|
+
|
28
|
+
class RegionParams(BaseModel):
|
29
|
+
models: list[str] | None = None
|
30
|
+
source: list[str] | None = None
|
31
|
+
period_type: PeriodType = "Y"
|
32
|
+
period_from: datetime | None = None
|
33
|
+
period_to: datetime | None = None
|
34
|
+
region_type: str | None = None
|
35
|
+
descendant_level: int = 0
|
36
|
+
|
37
|
+
|
38
|
+
class Region:
|
39
|
+
"""
|
40
|
+
A region in The Data Garden.
|
41
|
+
"""
|
42
|
+
|
43
|
+
REGION_STATS_MODEL: type[BaseModel]
|
44
|
+
_region_stats: BaseModel | None = None
|
45
|
+
|
46
|
+
KEYS: type[StrEnum]
|
47
|
+
|
48
|
+
def __repr__(self):
|
49
|
+
return f"{self.__class__.__name__} : {self._name}"
|
50
|
+
|
51
|
+
def __init__(self, url: str, api: BaseApi, name: str, continent: str | None = None):
|
52
|
+
self._region_url = url
|
53
|
+
self._api = api
|
54
|
+
self._available_models: dict = {}
|
55
|
+
self._model_data_storage: dict[str, TheDataGardenRegionalDataModel] = {}
|
56
|
+
self._geojsons = TheDataGardenRegionGeoJSONModel(api=api, region_url=url)
|
57
|
+
self._name = name
|
58
|
+
self._continent = continent
|
59
|
+
|
60
|
+
def __getattr__(self, attr: str):
|
61
|
+
if attr in self.available_model_names:
|
62
|
+
return self._model_data_from_storage(model_name=attr)
|
63
|
+
if attr == "geojsons":
|
64
|
+
return self._geojsons
|
65
|
+
|
66
|
+
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr}'")
|
67
|
+
|
68
|
+
def _model_data_from_storage(self, model_name: str) -> TheDataGardenRegionalDataModel | None:
|
69
|
+
stored_model_data = self._model_data_storage.get(model_name, None)
|
70
|
+
if not stored_model_data:
|
71
|
+
self._model_data_storage[model_name] = TheDataGardenRegionalDataModel(
|
72
|
+
model_name=model_name, api=self._api, region_url=self._region_url, meta_data=self.meta_data
|
73
|
+
)
|
74
|
+
return self._model_data_storage[model_name]
|
75
|
+
|
76
|
+
return stored_model_data
|
77
|
+
|
78
|
+
@property
|
79
|
+
def meta_data(self) -> BaseModel | None:
|
80
|
+
"""
|
81
|
+
Get the region statistics info from the API.
|
82
|
+
"""
|
83
|
+
if not self._region_stats:
|
84
|
+
region_stats_resp = self._api.retrieve_from_api(
|
85
|
+
url_extension=self._region_url + STATISTICS_URL_EXTENSION,
|
86
|
+
)
|
87
|
+
if region_stats_resp and region_stats_resp.status_code == 200:
|
88
|
+
region_stats_resp_json = region_stats_resp.json().get(self._key(ResponseKeys.STATISTICS), {})
|
89
|
+
self._region_stats = self.REGION_STATS_MODEL(
|
90
|
+
region_stats_resp_json if isinstance(region_stats_resp_json, dict) else {}
|
91
|
+
)
|
92
|
+
|
93
|
+
return self._region_stats
|
94
|
+
|
95
|
+
@property
|
96
|
+
def region_types(self) -> list[str]:
|
97
|
+
if not self.meta_data:
|
98
|
+
return []
|
99
|
+
return self.meta_data.region_types
|
100
|
+
|
101
|
+
@property
|
102
|
+
def available_model_names(self) -> list[str]:
|
103
|
+
if not self.meta_data:
|
104
|
+
return []
|
105
|
+
return self.meta_data.regional_data_models
|
106
|
+
|
107
|
+
def _key(self, key: str) -> str:
|
108
|
+
return getattr(self.KEYS, key)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
from enum import StrEnum
|
2
|
+
|
3
|
+
|
4
|
+
class ResponseKeys(StrEnum):
|
5
|
+
AVAILABLE_MODELS = "AVAILABLE_MODELS"
|
6
|
+
STATISTICS = "STATISTICS"
|
7
|
+
|
8
|
+
|
9
|
+
class RegionKeys(StrEnum): ...
|
10
|
+
|
11
|
+
|
12
|
+
class ContinentKeys(RegionKeys):
|
13
|
+
AVAILABLE_MODELS = "available_data_on_continent_level"
|
14
|
+
STATISTICS = "statistics"
|
15
|
+
|
16
|
+
|
17
|
+
class CountryKeys(RegionKeys):
|
18
|
+
AVAILABLE_MODELS = "models_per_region_level"
|
19
|
+
STATISTICS = "statistics"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from .geojson import RegionGeoJSONDataRecord, TheDataGardenRegionGeoJSONModel
|
2
|
+
from .regional_data import RegionalDataRecord, TheDataGardenRegionalDataModel
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
"RegionalDataRecord",
|
6
|
+
"TheDataGardenRegionalDataModel",
|
7
|
+
"RegionGeoJSONDataRecord",
|
8
|
+
"TheDataGardenRegionGeoJSONModel",
|
9
|
+
]
|
@@ -0,0 +1,179 @@
|
|
1
|
+
from typing import Any
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import polars as pl
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
from the_datagarden.api.base import BaseApi
|
8
|
+
|
9
|
+
GEJSON_UNIQUE_FIELDS = [
|
10
|
+
"region_type",
|
11
|
+
"iso_cc_2",
|
12
|
+
"local_region_code",
|
13
|
+
"local_region_code_type",
|
14
|
+
"region_level",
|
15
|
+
]
|
16
|
+
|
17
|
+
|
18
|
+
class Properties(BaseModel):
|
19
|
+
name: str
|
20
|
+
region_level: int
|
21
|
+
region_type: str
|
22
|
+
iso_cc_2: str
|
23
|
+
local_region_code: str
|
24
|
+
local_region_code_type: str
|
25
|
+
|
26
|
+
|
27
|
+
class Geometry(BaseModel):
|
28
|
+
type: str
|
29
|
+
coordinates: list
|
30
|
+
|
31
|
+
|
32
|
+
class Feature(BaseModel):
|
33
|
+
type: str = "Feature"
|
34
|
+
properties: Properties
|
35
|
+
geometry: Geometry
|
36
|
+
|
37
|
+
|
38
|
+
class RegionGeoJSONDataRecord(BaseModel):
|
39
|
+
name: str | None = None
|
40
|
+
region_type: str | None = None
|
41
|
+
iso_cc_2: str | None = None
|
42
|
+
local_region_code: str | None = None
|
43
|
+
local_region_code_type: str | None = None
|
44
|
+
region_level: int = 0
|
45
|
+
feature: Feature
|
46
|
+
|
47
|
+
def record_hash(self) -> str:
|
48
|
+
hash_str = ".".join([str(getattr(self, key)) for key in sorted(GEJSON_UNIQUE_FIELDS)])
|
49
|
+
return str(hash(hash_str))
|
50
|
+
|
51
|
+
def __str__(self):
|
52
|
+
return f"RegionGeoJSONDataRecord: {self.name} ({self.region_type} for {self.local_region_code})"
|
53
|
+
|
54
|
+
|
55
|
+
class TheDataGardenRegionGeoJSONModel:
|
56
|
+
"""
|
57
|
+
Model to hold response data from the The Data Garden API Region GeoJSON endpoint.
|
58
|
+
|
59
|
+
The model hold a list of regional_data records containg a regional data model
|
60
|
+
for the region for a specific set op sources, periods and period types.
|
61
|
+
|
62
|
+
The data can be converted to Polars and Pandas dataframes by the following
|
63
|
+
methods:
|
64
|
+
- to_polars(model_convertors: dict | None = None) -> pl.DataFrame
|
65
|
+
model_convertors dict will be used to covert specifc model fields to dataframe
|
66
|
+
columns.
|
67
|
+
- full_model_to_polars() -> pl.DataFrame
|
68
|
+
|
69
|
+
For pandas dataframes you can use the same methods:
|
70
|
+
- to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
|
71
|
+
- full_model_to_pandas() -> pd.DataFrame
|
72
|
+
"""
|
73
|
+
|
74
|
+
def __init__(self, api: "BaseApi", region_url: str):
|
75
|
+
self._api: BaseApi = api
|
76
|
+
self._region_url: str = region_url
|
77
|
+
self._levels_requested: list[int] = []
|
78
|
+
self._geojson_records: dict[str, RegionGeoJSONDataRecord] = {}
|
79
|
+
|
80
|
+
def __str__(self):
|
81
|
+
return f"TheDataGardenRegionGeoJSONModel : GeoJSON : (count={len(self._geojson_records)})"
|
82
|
+
|
83
|
+
def __repr__(self):
|
84
|
+
return self.__str__()
|
85
|
+
|
86
|
+
def __call__(self, region_level: int = 0) -> "TheDataGardenRegionGeoJSONModel":
|
87
|
+
if region_level not in self._levels_requested:
|
88
|
+
features = self.geojson_paginated_data_from_api(region_level=region_level)
|
89
|
+
if features:
|
90
|
+
self.set_items(features)
|
91
|
+
self._levels_requested.append(region_level)
|
92
|
+
return self
|
93
|
+
|
94
|
+
def _response_has_next_page(self, model_data_resp: dict) -> bool:
|
95
|
+
pagination = model_data_resp.get("pagination", None)
|
96
|
+
if not pagination:
|
97
|
+
return False
|
98
|
+
return pagination.get("next_page", None) is not None
|
99
|
+
|
100
|
+
def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
|
101
|
+
pagination = model_data_resp.pop("pagination", None)
|
102
|
+
if not pagination:
|
103
|
+
return None
|
104
|
+
next_page = pagination.get("next_page", None)
|
105
|
+
if not next_page:
|
106
|
+
return None
|
107
|
+
return {"page": next_page}
|
108
|
+
|
109
|
+
def geojson_paginated_data_from_api(self, region_level: int):
|
110
|
+
geojson_data_resp = self.geojson_data_from_api(region_level=region_level)
|
111
|
+
while geojson_data_resp and self._response_has_next_page(geojson_data_resp):
|
112
|
+
next_page_pagination = self._next_page_pagination(geojson_data_resp)
|
113
|
+
if next_page_pagination:
|
114
|
+
next_page_resp = self.geojson_data_from_api(
|
115
|
+
pagination=next_page_pagination, region_level=region_level
|
116
|
+
)
|
117
|
+
if next_page_resp:
|
118
|
+
geojson_data_resp["features"].extend(next_page_resp["features"])
|
119
|
+
geojson_data_resp["pagination"] = next_page_resp["pagination"]
|
120
|
+
|
121
|
+
return geojson_data_resp
|
122
|
+
|
123
|
+
def geojson_data_from_api(
|
124
|
+
self, region_level: int, pagination: dict[str, str] | None = None
|
125
|
+
) -> dict | None:
|
126
|
+
payload: dict[str, Any] = {"region_level": region_level}
|
127
|
+
if pagination:
|
128
|
+
payload = payload | {"pagination": pagination}
|
129
|
+
geojson_data_resp = self._api.retrieve_from_api(
|
130
|
+
url_extension=self._region_url + "geojson/",
|
131
|
+
method="POST",
|
132
|
+
payload=payload,
|
133
|
+
)
|
134
|
+
if geojson_data_resp:
|
135
|
+
return geojson_data_resp.json()
|
136
|
+
return None
|
137
|
+
|
138
|
+
def set_items(self, data: dict):
|
139
|
+
for feature in data["features"]:
|
140
|
+
feature = Feature(**feature)
|
141
|
+
data_record_items = {
|
142
|
+
"name": feature.properties.name,
|
143
|
+
"region_type": feature.properties.region_type,
|
144
|
+
"iso_cc_2": feature.properties.iso_cc_2,
|
145
|
+
"local_region_code": feature.properties.local_region_code,
|
146
|
+
"local_region_code_type": feature.properties.local_region_code_type,
|
147
|
+
"region_level": feature.properties.region_level,
|
148
|
+
"feature": feature,
|
149
|
+
}
|
150
|
+
data_record = RegionGeoJSONDataRecord(**data_record_items)
|
151
|
+
self._geojson_records.update({data_record.record_hash(): data_record})
|
152
|
+
|
153
|
+
def to_polars(self) -> pl.DataFrame:
|
154
|
+
"""
|
155
|
+
Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
|
156
|
+
"""
|
157
|
+
converted_records = []
|
158
|
+
for record in self._geojson_records.values():
|
159
|
+
record_dict = record.model_dump()
|
160
|
+
converted_records.append(record_dict)
|
161
|
+
return pl.from_records(converted_records)
|
162
|
+
|
163
|
+
def to_pandas(self) -> pd.DataFrame:
|
164
|
+
"""
|
165
|
+
Convert the data to a pandas dataframe
|
166
|
+
"""
|
167
|
+
return self.to_polars().to_pandas()
|
168
|
+
|
169
|
+
def __iter__(self):
|
170
|
+
"""Makes the class iterable over the values in _data_records"""
|
171
|
+
return iter(self._geojson_records.values())
|
172
|
+
|
173
|
+
def __len__(self):
|
174
|
+
"""Returns the number of records"""
|
175
|
+
return len(self._geojson_records)
|
176
|
+
|
177
|
+
@property
|
178
|
+
def geojson_records(self) -> list[RegionGeoJSONDataRecord]:
|
179
|
+
return list(self._geojson_records.values())
|