the-datagarden 0.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ from datetime import datetime
2
+ from enum import StrEnum
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.authentication.settings import STATISTICS_URL_EXTENSION
8
+ from the_datagarden.api.base import BaseApi
9
+ from the_datagarden.models import TheDataGardenRegionalDataModel, TheDataGardenRegionGeoJSONModel
10
+
11
+ from .settings import ResponseKeys
12
+
13
+
14
+ class PeriodTypes:
15
+ """Choice class for periodtype used in most data classes"""
16
+
17
+ YEAR = "Y"
18
+ QUARTER = "Q"
19
+ MONTH = "M"
20
+ WEEK = "W"
21
+ DAY = "D"
22
+ HOUR = "H"
23
+
24
+
25
+ PeriodType = Literal["Y", "Q", "M", "W", "D", "H"]
26
+
27
+
28
+ class RegionParams(BaseModel):
29
+ models: list[str] | None = None
30
+ source: list[str] | None = None
31
+ period_type: PeriodType = "Y"
32
+ period_from: datetime | None = None
33
+ period_to: datetime | None = None
34
+ region_type: str | None = None
35
+ descendant_level: int = 0
36
+
37
+
38
+ class Region:
39
+ """
40
+ A region in The Data Garden.
41
+ """
42
+
43
+ REGION_STATS_MODEL: type[BaseModel]
44
+ _region_stats: BaseModel | None = None
45
+
46
+ KEYS: type[StrEnum]
47
+
48
+ def __repr__(self):
49
+ return f"{self.__class__.__name__} : {self._name}"
50
+
51
+ def __init__(self, url: str, api: BaseApi, name: str, continent: str | None = None):
52
+ self._region_url = url
53
+ self._api = api
54
+ self._available_models: dict = {}
55
+ self._model_data_storage: dict[str, TheDataGardenRegionalDataModel] = {}
56
+ self._geojsons = TheDataGardenRegionGeoJSONModel(api=api, region_url=url)
57
+ self._name = name
58
+ self._continent = continent
59
+
60
+ def __getattr__(self, attr: str):
61
+ if attr in self.available_model_names:
62
+ return self._model_data_from_storage(model_name=attr)
63
+ if attr == "geojsons":
64
+ return self._geojsons
65
+
66
+ raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr}'")
67
+
68
+ def _model_data_from_storage(self, model_name: str) -> TheDataGardenRegionalDataModel | None:
69
+ stored_model_data = self._model_data_storage.get(model_name, None)
70
+ if not stored_model_data:
71
+ self._model_data_storage[model_name] = TheDataGardenRegionalDataModel(
72
+ model_name=model_name, api=self._api, region_url=self._region_url, meta_data=self.meta_data
73
+ )
74
+ return self._model_data_storage[model_name]
75
+
76
+ return stored_model_data
77
+
78
+ @property
79
+ def meta_data(self) -> BaseModel | None:
80
+ """
81
+ Get the region statistics info from the API.
82
+ """
83
+ if not self._region_stats:
84
+ region_stats_resp = self._api.retrieve_from_api(
85
+ url_extension=self._region_url + STATISTICS_URL_EXTENSION,
86
+ )
87
+ if region_stats_resp and region_stats_resp.status_code == 200:
88
+ region_stats_resp_json = region_stats_resp.json().get(self._key(ResponseKeys.STATISTICS), {})
89
+ self._region_stats = self.REGION_STATS_MODEL(
90
+ region_stats_resp_json if isinstance(region_stats_resp_json, dict) else {}
91
+ )
92
+
93
+ return self._region_stats
94
+
95
+ @property
96
+ def region_types(self) -> list[str]:
97
+ if not self.meta_data:
98
+ return []
99
+ return self.meta_data.region_types
100
+
101
+ @property
102
+ def available_model_names(self) -> list[str]:
103
+ if not self.meta_data:
104
+ return []
105
+ return self.meta_data.regional_data_models
106
+
107
+ def _key(self, key: str) -> str:
108
+ return getattr(self.KEYS, key)
@@ -0,0 +1,19 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class ResponseKeys(StrEnum):
5
+ AVAILABLE_MODELS = "AVAILABLE_MODELS"
6
+ STATISTICS = "STATISTICS"
7
+
8
+
9
+ class RegionKeys(StrEnum): ...
10
+
11
+
12
+ class ContinentKeys(RegionKeys):
13
+ AVAILABLE_MODELS = "available_data_on_continent_level"
14
+ STATISTICS = "statistics"
15
+
16
+
17
+ class CountryKeys(RegionKeys):
18
+ AVAILABLE_MODELS = "models_per_region_level"
19
+ STATISTICS = "statistics"
@@ -0,0 +1,9 @@
1
+ from datagarden_models import ContinentStats
2
+
3
+ from .base import Region
4
+ from .base.settings import ContinentKeys
5
+
6
+
7
+ class Continent(Region):
8
+ KEYS = ContinentKeys
9
+ REGION_STATS_MODEL = ContinentStats
@@ -0,0 +1,9 @@
1
+ from datagarden_models import CountryStats
2
+
3
+ from .base import Region
4
+ from .base.settings import CountryKeys
5
+
6
+
7
+ class Country(Region):
8
+ KEYS = CountryKeys
9
+ REGION_STATS_MODEL = CountryStats
@@ -0,0 +1,9 @@
1
+ from .geojson import RegionGeoJSONDataRecord, TheDataGardenRegionGeoJSONModel
2
+ from .regional_data import RegionalDataRecord, TheDataGardenRegionalDataModel
3
+
4
+ __all__ = [
5
+ "RegionalDataRecord",
6
+ "TheDataGardenRegionalDataModel",
7
+ "RegionGeoJSONDataRecord",
8
+ "TheDataGardenRegionGeoJSONModel",
9
+ ]
@@ -0,0 +1,179 @@
1
+ from typing import Any
2
+
3
+ import pandas as pd
4
+ import polars as pl
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.base import BaseApi
8
+
9
+ GEJSON_UNIQUE_FIELDS = [
10
+ "region_type",
11
+ "iso_cc_2",
12
+ "local_region_code",
13
+ "local_region_code_type",
14
+ "region_level",
15
+ ]
16
+
17
+
18
+ class Properties(BaseModel):
19
+ name: str
20
+ region_level: int
21
+ region_type: str
22
+ iso_cc_2: str
23
+ local_region_code: str
24
+ local_region_code_type: str
25
+
26
+
27
+ class Geometry(BaseModel):
28
+ type: str
29
+ coordinates: list
30
+
31
+
32
+ class Feature(BaseModel):
33
+ type: str = "Feature"
34
+ properties: Properties
35
+ geometry: Geometry
36
+
37
+
38
+ class RegionGeoJSONDataRecord(BaseModel):
39
+ name: str | None = None
40
+ region_type: str | None = None
41
+ iso_cc_2: str | None = None
42
+ local_region_code: str | None = None
43
+ local_region_code_type: str | None = None
44
+ region_level: int = 0
45
+ feature: Feature
46
+
47
+ def record_hash(self) -> str:
48
+ hash_str = ".".join([str(getattr(self, key)) for key in sorted(GEJSON_UNIQUE_FIELDS)])
49
+ return str(hash(hash_str))
50
+
51
+ def __str__(self):
52
+ return f"RegionGeoJSONDataRecord: {self.name} ({self.region_type} for {self.local_region_code})"
53
+
54
+
55
+ class TheDataGardenRegionGeoJSONModel:
56
+ """
57
+ Model to hold response data from the The Data Garden API Region GeoJSON endpoint.
58
+
59
+ The model hold a list of regional_data records containg a regional data model
60
+ for the region for a specific set op sources, periods and period types.
61
+
62
+ The data can be converted to Polars and Pandas dataframes by the following
63
+ methods:
64
+ - to_polars(model_convertors: dict | None = None) -> pl.DataFrame
65
+ model_convertors dict will be used to covert specifc model fields to dataframe
66
+ columns.
67
+ - full_model_to_polars() -> pl.DataFrame
68
+
69
+ For pandas dataframes you can use the same methods:
70
+ - to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
71
+ - full_model_to_pandas() -> pd.DataFrame
72
+ """
73
+
74
+ def __init__(self, api: "BaseApi", region_url: str):
75
+ self._api: BaseApi = api
76
+ self._region_url: str = region_url
77
+ self._levels_requested: list[int] = []
78
+ self._geojson_records: dict[str, RegionGeoJSONDataRecord] = {}
79
+
80
+ def __str__(self):
81
+ return f"TheDataGardenRegionGeoJSONModel : GeoJSON : (count={len(self._geojson_records)})"
82
+
83
+ def __repr__(self):
84
+ return self.__str__()
85
+
86
+ def __call__(self, region_level: int = 0) -> "TheDataGardenRegionGeoJSONModel":
87
+ if region_level not in self._levels_requested:
88
+ features = self.geojson_paginated_data_from_api(region_level=region_level)
89
+ if features:
90
+ self.set_items(features)
91
+ self._levels_requested.append(region_level)
92
+ return self
93
+
94
+ def _response_has_next_page(self, model_data_resp: dict) -> bool:
95
+ pagination = model_data_resp.get("pagination", None)
96
+ if not pagination:
97
+ return False
98
+ return pagination.get("next_page", None) is not None
99
+
100
+ def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
101
+ pagination = model_data_resp.pop("pagination", None)
102
+ if not pagination:
103
+ return None
104
+ next_page = pagination.get("next_page", None)
105
+ if not next_page:
106
+ return None
107
+ return {"page": next_page}
108
+
109
+ def geojson_paginated_data_from_api(self, region_level: int):
110
+ geojson_data_resp = self.geojson_data_from_api(region_level=region_level)
111
+ while geojson_data_resp and self._response_has_next_page(geojson_data_resp):
112
+ next_page_pagination = self._next_page_pagination(geojson_data_resp)
113
+ if next_page_pagination:
114
+ next_page_resp = self.geojson_data_from_api(
115
+ pagination=next_page_pagination, region_level=region_level
116
+ )
117
+ if next_page_resp:
118
+ geojson_data_resp["features"].extend(next_page_resp["features"])
119
+ geojson_data_resp["pagination"] = next_page_resp["pagination"]
120
+
121
+ return geojson_data_resp
122
+
123
+ def geojson_data_from_api(
124
+ self, region_level: int, pagination: dict[str, str] | None = None
125
+ ) -> dict | None:
126
+ payload: dict[str, Any] = {"region_level": region_level}
127
+ if pagination:
128
+ payload = payload | {"pagination": pagination}
129
+ geojson_data_resp = self._api.retrieve_from_api(
130
+ url_extension=self._region_url + "geojson/",
131
+ method="POST",
132
+ payload=payload,
133
+ )
134
+ if geojson_data_resp:
135
+ return geojson_data_resp.json()
136
+ return None
137
+
138
+ def set_items(self, data: dict):
139
+ for feature in data["features"]:
140
+ feature = Feature(**feature)
141
+ data_record_items = {
142
+ "name": feature.properties.name,
143
+ "region_type": feature.properties.region_type,
144
+ "iso_cc_2": feature.properties.iso_cc_2,
145
+ "local_region_code": feature.properties.local_region_code,
146
+ "local_region_code_type": feature.properties.local_region_code_type,
147
+ "region_level": feature.properties.region_level,
148
+ "feature": feature,
149
+ }
150
+ data_record = RegionGeoJSONDataRecord(**data_record_items)
151
+ self._geojson_records.update({data_record.record_hash(): data_record})
152
+
153
+ def to_polars(self) -> pl.DataFrame:
154
+ """
155
+ Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
156
+ """
157
+ converted_records = []
158
+ for record in self._geojson_records.values():
159
+ record_dict = record.model_dump()
160
+ converted_records.append(record_dict)
161
+ return pl.from_records(converted_records)
162
+
163
+ def to_pandas(self) -> pd.DataFrame:
164
+ """
165
+ Convert the data to a pandas dataframe
166
+ """
167
+ return self.to_polars().to_pandas()
168
+
169
+ def __iter__(self):
170
+ """Makes the class iterable over the values in _data_records"""
171
+ return iter(self._geojson_records.values())
172
+
173
+ def __len__(self):
174
+ """Returns the number of records"""
175
+ return len(self._geojson_records)
176
+
177
+ @property
178
+ def geojson_records(self) -> list[RegionGeoJSONDataRecord]:
179
+ return list(self._geojson_records.values())