the-datagarden 0.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,108 @@
1
+ from datetime import datetime
2
+ from enum import StrEnum
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.authentication.settings import STATISTICS_URL_EXTENSION
8
+ from the_datagarden.api.base import BaseApi
9
+ from the_datagarden.models import TheDataGardenRegionalDataModel, TheDataGardenRegionGeoJSONModel
10
+
11
+ from .settings import ResponseKeys
12
+
13
+
14
+ class PeriodTypes:
15
+ """Choice class for periodtype used in most data classes"""
16
+
17
+ YEAR = "Y"
18
+ QUARTER = "Q"
19
+ MONTH = "M"
20
+ WEEK = "W"
21
+ DAY = "D"
22
+ HOUR = "H"
23
+
24
+
25
+ PeriodType = Literal["Y", "Q", "M", "W", "D", "H"]
26
+
27
+
28
+ class RegionParams(BaseModel):
29
+ models: list[str] | None = None
30
+ source: list[str] | None = None
31
+ period_type: PeriodType = "Y"
32
+ period_from: datetime | None = None
33
+ period_to: datetime | None = None
34
+ region_type: str | None = None
35
+ descendant_level: int = 0
36
+
37
+
38
+ class Region:
39
+ """
40
+ A region in The Data Garden.
41
+ """
42
+
43
+ REGION_STATS_MODEL: type[BaseModel]
44
+ _region_stats: BaseModel | None = None
45
+
46
+ KEYS: type[StrEnum]
47
+
48
+ def __repr__(self):
49
+ return f"{self.__class__.__name__} : {self._name}"
50
+
51
+ def __init__(self, url: str, api: BaseApi, name: str, continent: str | None = None):
52
+ self._region_url = url
53
+ self._api = api
54
+ self._available_models: dict = {}
55
+ self._model_data_storage: dict[str, TheDataGardenRegionalDataModel] = {}
56
+ self._geojsons = TheDataGardenRegionGeoJSONModel(api=api, region_url=url)
57
+ self._name = name
58
+ self._continent = continent
59
+
60
+ def __getattr__(self, attr: str):
61
+ if attr in self.available_model_names:
62
+ return self._model_data_from_storage(model_name=attr)
63
+ if attr == "geojsons":
64
+ return self._geojsons
65
+
66
+ raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr}'")
67
+
68
+ def _model_data_from_storage(self, model_name: str) -> TheDataGardenRegionalDataModel | None:
69
+ stored_model_data = self._model_data_storage.get(model_name, None)
70
+ if not stored_model_data:
71
+ self._model_data_storage[model_name] = TheDataGardenRegionalDataModel(
72
+ model_name=model_name, api=self._api, region_url=self._region_url, meta_data=self.meta_data
73
+ )
74
+ return self._model_data_storage[model_name]
75
+
76
+ return stored_model_data
77
+
78
+ @property
79
+ def meta_data(self) -> BaseModel | None:
80
+ """
81
+ Get the region statistics info from the API.
82
+ """
83
+ if not self._region_stats:
84
+ region_stats_resp = self._api.retrieve_from_api(
85
+ url_extension=self._region_url + STATISTICS_URL_EXTENSION,
86
+ )
87
+ if region_stats_resp and region_stats_resp.status_code == 200:
88
+ region_stats_resp_json = region_stats_resp.json().get(self._key(ResponseKeys.STATISTICS), {})
89
+ self._region_stats = self.REGION_STATS_MODEL(
90
+ region_stats_resp_json if isinstance(region_stats_resp_json, dict) else {}
91
+ )
92
+
93
+ return self._region_stats
94
+
95
+ @property
96
+ def region_types(self) -> list[str]:
97
+ if not self.meta_data:
98
+ return []
99
+ return self.meta_data.region_types
100
+
101
+ @property
102
+ def available_model_names(self) -> list[str]:
103
+ if not self.meta_data:
104
+ return []
105
+ return self.meta_data.regional_data_models
106
+
107
+ def _key(self, key: str) -> str:
108
+ return getattr(self.KEYS, key)
@@ -0,0 +1,19 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class ResponseKeys(StrEnum):
5
+ AVAILABLE_MODELS = "AVAILABLE_MODELS"
6
+ STATISTICS = "STATISTICS"
7
+
8
+
9
+ class RegionKeys(StrEnum): ...
10
+
11
+
12
+ class ContinentKeys(RegionKeys):
13
+ AVAILABLE_MODELS = "available_data_on_continent_level"
14
+ STATISTICS = "statistics"
15
+
16
+
17
+ class CountryKeys(RegionKeys):
18
+ AVAILABLE_MODELS = "models_per_region_level"
19
+ STATISTICS = "statistics"
@@ -0,0 +1,9 @@
1
+ from datagarden_models import ContinentStats
2
+
3
+ from .base import Region
4
+ from .base.settings import ContinentKeys
5
+
6
+
7
+ class Continent(Region):
8
+ KEYS = ContinentKeys
9
+ REGION_STATS_MODEL = ContinentStats
@@ -0,0 +1,9 @@
1
+ from datagarden_models import CountryStats
2
+
3
+ from .base import Region
4
+ from .base.settings import CountryKeys
5
+
6
+
7
+ class Country(Region):
8
+ KEYS = CountryKeys
9
+ REGION_STATS_MODEL = CountryStats
@@ -0,0 +1,9 @@
1
+ from .geojson import RegionGeoJSONDataRecord, TheDataGardenRegionGeoJSONModel
2
+ from .regional_data import RegionalDataRecord, TheDataGardenRegionalDataModel
3
+
4
+ __all__ = [
5
+ "RegionalDataRecord",
6
+ "TheDataGardenRegionalDataModel",
7
+ "RegionGeoJSONDataRecord",
8
+ "TheDataGardenRegionGeoJSONModel",
9
+ ]
@@ -0,0 +1,179 @@
1
+ from typing import Any
2
+
3
+ import pandas as pd
4
+ import polars as pl
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.base import BaseApi
8
+
9
+ GEJSON_UNIQUE_FIELDS = [
10
+ "region_type",
11
+ "iso_cc_2",
12
+ "local_region_code",
13
+ "local_region_code_type",
14
+ "region_level",
15
+ ]
16
+
17
+
18
+ class Properties(BaseModel):
19
+ name: str
20
+ region_level: int
21
+ region_type: str
22
+ iso_cc_2: str
23
+ local_region_code: str
24
+ local_region_code_type: str
25
+
26
+
27
+ class Geometry(BaseModel):
28
+ type: str
29
+ coordinates: list
30
+
31
+
32
+ class Feature(BaseModel):
33
+ type: str = "Feature"
34
+ properties: Properties
35
+ geometry: Geometry
36
+
37
+
38
+ class RegionGeoJSONDataRecord(BaseModel):
39
+ name: str | None = None
40
+ region_type: str | None = None
41
+ iso_cc_2: str | None = None
42
+ local_region_code: str | None = None
43
+ local_region_code_type: str | None = None
44
+ region_level: int = 0
45
+ feature: Feature
46
+
47
+ def record_hash(self) -> str:
48
+ hash_str = ".".join([str(getattr(self, key)) for key in sorted(GEJSON_UNIQUE_FIELDS)])
49
+ return str(hash(hash_str))
50
+
51
+ def __str__(self):
52
+ return f"RegionGeoJSONDataRecord: {self.name} ({self.region_type} for {self.local_region_code})"
53
+
54
+
55
+ class TheDataGardenRegionGeoJSONModel:
56
+ """
57
+ Model to hold response data from the The Data Garden API Region GeoJSON endpoint.
58
+
59
+ The model hold a list of regional_data records containg a regional data model
60
+ for the region for a specific set op sources, periods and period types.
61
+
62
+ The data can be converted to Polars and Pandas dataframes by the following
63
+ methods:
64
+ - to_polars(model_convertors: dict | None = None) -> pl.DataFrame
65
+ model_convertors dict will be used to covert specifc model fields to dataframe
66
+ columns.
67
+ - full_model_to_polars() -> pl.DataFrame
68
+
69
+ For pandas dataframes you can use the same methods:
70
+ - to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
71
+ - full_model_to_pandas() -> pd.DataFrame
72
+ """
73
+
74
+ def __init__(self, api: "BaseApi", region_url: str):
75
+ self._api: BaseApi = api
76
+ self._region_url: str = region_url
77
+ self._levels_requested: list[int] = []
78
+ self._geojson_records: dict[str, RegionGeoJSONDataRecord] = {}
79
+
80
+ def __str__(self):
81
+ return f"TheDataGardenRegionGeoJSONModel : GeoJSON : (count={len(self._geojson_records)})"
82
+
83
+ def __repr__(self):
84
+ return self.__str__()
85
+
86
+ def __call__(self, region_level: int = 0) -> "TheDataGardenRegionGeoJSONModel":
87
+ if region_level not in self._levels_requested:
88
+ features = self.geojson_paginated_data_from_api(region_level=region_level)
89
+ if features:
90
+ self.set_items(features)
91
+ self._levels_requested.append(region_level)
92
+ return self
93
+
94
+ def _response_has_next_page(self, model_data_resp: dict) -> bool:
95
+ pagination = model_data_resp.get("pagination", None)
96
+ if not pagination:
97
+ return False
98
+ return pagination.get("next_page", None) is not None
99
+
100
+ def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
101
+ pagination = model_data_resp.pop("pagination", None)
102
+ if not pagination:
103
+ return None
104
+ next_page = pagination.get("next_page", None)
105
+ if not next_page:
106
+ return None
107
+ return {"page": next_page}
108
+
109
+ def geojson_paginated_data_from_api(self, region_level: int):
110
+ geojson_data_resp = self.geojson_data_from_api(region_level=region_level)
111
+ while geojson_data_resp and self._response_has_next_page(geojson_data_resp):
112
+ next_page_pagination = self._next_page_pagination(geojson_data_resp)
113
+ if next_page_pagination:
114
+ next_page_resp = self.geojson_data_from_api(
115
+ pagination=next_page_pagination, region_level=region_level
116
+ )
117
+ if next_page_resp:
118
+ geojson_data_resp["features"].extend(next_page_resp["features"])
119
+ geojson_data_resp["pagination"] = next_page_resp["pagination"]
120
+
121
+ return geojson_data_resp
122
+
123
+ def geojson_data_from_api(
124
+ self, region_level: int, pagination: dict[str, str] | None = None
125
+ ) -> dict | None:
126
+ payload: dict[str, Any] = {"region_level": region_level}
127
+ if pagination:
128
+ payload = payload | {"pagination": pagination}
129
+ geojson_data_resp = self._api.retrieve_from_api(
130
+ url_extension=self._region_url + "geojson/",
131
+ method="POST",
132
+ payload=payload,
133
+ )
134
+ if geojson_data_resp:
135
+ return geojson_data_resp.json()
136
+ return None
137
+
138
+ def set_items(self, data: dict):
139
+ for feature in data["features"]:
140
+ feature = Feature(**feature)
141
+ data_record_items = {
142
+ "name": feature.properties.name,
143
+ "region_type": feature.properties.region_type,
144
+ "iso_cc_2": feature.properties.iso_cc_2,
145
+ "local_region_code": feature.properties.local_region_code,
146
+ "local_region_code_type": feature.properties.local_region_code_type,
147
+ "region_level": feature.properties.region_level,
148
+ "feature": feature,
149
+ }
150
+ data_record = RegionGeoJSONDataRecord(**data_record_items)
151
+ self._geojson_records.update({data_record.record_hash(): data_record})
152
+
153
+ def to_polars(self) -> pl.DataFrame:
154
+ """
155
+ Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
156
+ """
157
+ converted_records = []
158
+ for record in self._geojson_records.values():
159
+ record_dict = record.model_dump()
160
+ converted_records.append(record_dict)
161
+ return pl.from_records(converted_records)
162
+
163
+ def to_pandas(self) -> pd.DataFrame:
164
+ """
165
+ Convert the data to a pandas dataframe
166
+ """
167
+ return self.to_polars().to_pandas()
168
+
169
+ def __iter__(self):
170
+ """Makes the class iterable over the values in _data_records"""
171
+ return iter(self._geojson_records.values())
172
+
173
+ def __len__(self):
174
+ """Returns the number of records"""
175
+ return len(self._geojson_records)
176
+
177
+ @property
178
+ def geojson_records(self) -> list[RegionGeoJSONDataRecord]:
179
+ return list(self._geojson_records.values())