the-datagarden 0.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- the_datagarden/__init__.py +8 -0
- the_datagarden/abc/__init__.py +3 -0
- the_datagarden/abc/api.py +19 -0
- the_datagarden/abc/authentication.py +42 -0
- the_datagarden/api/__init__.py +5 -0
- the_datagarden/api/authentication/__init__.py +112 -0
- the_datagarden/api/authentication/credentials/__init__.py +120 -0
- the_datagarden/api/authentication/environment/__init__.py +13 -0
- the_datagarden/api/authentication/settings.py +54 -0
- the_datagarden/api/base/__init__.py +215 -0
- the_datagarden/api/regions/__init__.py +4 -0
- the_datagarden/api/regions/base/__init__.py +108 -0
- the_datagarden/api/regions/base/settings.py +19 -0
- the_datagarden/api/regions/continent.py +9 -0
- the_datagarden/api/regions/country.py +9 -0
- the_datagarden/models/__init__.py +9 -0
- the_datagarden/models/geojson.py +179 -0
- the_datagarden/models/regional_data.py +411 -0
- the_datagarden/version.py +1 -0
- the_datagarden-1.2.1.dist-info/METADATA +137 -0
- the_datagarden-1.2.1.dist-info/RECORD +25 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/WHEEL +1 -1
- the_datagarden-0.1.0.dist-info/METADATA +0 -18
- the_datagarden-0.1.0.dist-info/RECORD +0 -7
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/entry_points.txt +0 -0
- {the_datagarden-0.1.0.dist-info → the_datagarden-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,411 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import polars as pl
|
3
|
+
from datagarden_models import DataGardenModel, DatagardenModels, DataGardenSubModel, RegionalDataStats
|
4
|
+
from datagarden_models.models.base.legend import Legend
|
5
|
+
from pydantic import BaseModel
|
6
|
+
|
7
|
+
from the_datagarden.api.base import BaseApi
|
8
|
+
|
9
|
+
UNIQUE_FIELDS = [
|
10
|
+
"region_type",
|
11
|
+
"un_region_code",
|
12
|
+
"iso_cc_2",
|
13
|
+
"local_region_code",
|
14
|
+
"local_region_code_type",
|
15
|
+
"region_level",
|
16
|
+
"period",
|
17
|
+
"period_type",
|
18
|
+
"source_name",
|
19
|
+
]
|
20
|
+
DEFAULT_COLUMNS_TO_EXCLUDE = [
|
21
|
+
"datagarden_model_version",
|
22
|
+
"name",
|
23
|
+
"region_type",
|
24
|
+
"un_region_code",
|
25
|
+
"iso_cc_2",
|
26
|
+
"local_region_code",
|
27
|
+
"local_region_code_type",
|
28
|
+
"parent_region_code",
|
29
|
+
"parent_region_code_type",
|
30
|
+
"parent_region_type",
|
31
|
+
"region_level",
|
32
|
+
"source_name",
|
33
|
+
"data_model_name",
|
34
|
+
"period",
|
35
|
+
"period_type",
|
36
|
+
]
|
37
|
+
|
38
|
+
|
39
|
+
class RegionalDataRecord(BaseModel):
|
40
|
+
name: str | None = None
|
41
|
+
region_type: str | None = None
|
42
|
+
un_region_code: str | None = None
|
43
|
+
iso_cc_2: str | None = None
|
44
|
+
local_region_code: str | None = None
|
45
|
+
local_region_code_type: str | None = None
|
46
|
+
parent_region_code: str | None = None
|
47
|
+
parent_region_code_type: str | None = None
|
48
|
+
parent_region_type: str | None = None
|
49
|
+
region_level: int = 0
|
50
|
+
source_name: str | None = None
|
51
|
+
period: str | None = None
|
52
|
+
period_type: str | None = None
|
53
|
+
data_model_name: str | None = None
|
54
|
+
model: DataGardenSubModel
|
55
|
+
|
56
|
+
def record_hash(self) -> str:
|
57
|
+
hash_str = ".".join([str(getattr(self, key)) for key in sorted(UNIQUE_FIELDS)])
|
58
|
+
return str(hash(hash_str))
|
59
|
+
|
60
|
+
def __str__(self):
|
61
|
+
return (
|
62
|
+
f"RegionalDataRecord: {self.name} ({self.data_model_name} for {self.period}, {self.period_type})"
|
63
|
+
)
|
64
|
+
|
65
|
+
@property
|
66
|
+
def datgarden_model_class(self) -> type[DataGardenModel]:
|
67
|
+
return self.model.__class__
|
68
|
+
|
69
|
+
def record_for_sub_model(self, sub_model_name: str) -> "RegionalDataRecord":
|
70
|
+
if sub_model_name not in self.datgarden_model_class.legends().sub_model_names:
|
71
|
+
raise ValueError(f"Sub model `{sub_model_name}` not found in {self.datgarden_model_class}")
|
72
|
+
child_record = self.model_dump()
|
73
|
+
child_record["data_model_name"] = sub_model_name
|
74
|
+
child_record["model"] = getattr(self.model, sub_model_name)
|
75
|
+
return RegionalDataRecord(**child_record)
|
76
|
+
|
77
|
+
|
78
|
+
class TheDataGardenRegionalDataModel:
|
79
|
+
"""
|
80
|
+
Model to hold response data from the The Data Garden API Regional Data endpoint.
|
81
|
+
|
82
|
+
The model hold a list of regional_data records containg a regional data model
|
83
|
+
for the region for a specific set op sources, periods and period types.
|
84
|
+
|
85
|
+
The data can be converted to Polars and Pandas dataframes by the following
|
86
|
+
methods:
|
87
|
+
- to_polars(model_convertors: dict | None = None) -> pl.DataFrame
|
88
|
+
model_convertors dict will be used to covert specifc model fields to dataframe
|
89
|
+
columns.
|
90
|
+
- full_model_to_polars() -> pl.DataFrame
|
91
|
+
|
92
|
+
For pandas dataframes you can use the same methods:
|
93
|
+
- to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
|
94
|
+
- full_model_to_pandas() -> pd.DataFrame
|
95
|
+
"""
|
96
|
+
|
97
|
+
def __init__(
|
98
|
+
self,
|
99
|
+
api: "BaseApi",
|
100
|
+
model_name: str,
|
101
|
+
region_url: str,
|
102
|
+
meta_data: BaseModel,
|
103
|
+
is_sub_model: bool = False,
|
104
|
+
model: type[DataGardenSubModel] | None = None,
|
105
|
+
):
|
106
|
+
self._api: BaseApi = api
|
107
|
+
self._model_name: str = model_name
|
108
|
+
self._region_url: str = region_url
|
109
|
+
self._request_params_hashes: list[str] = []
|
110
|
+
self._data_records: dict[str, RegionalDataRecord] = {}
|
111
|
+
self.meta_data: BaseModel = meta_data
|
112
|
+
self._model: DataGardenModel = model or getattr(DatagardenModels, model_name.upper())
|
113
|
+
self._is_sub_model: bool = is_sub_model
|
114
|
+
|
115
|
+
def __str__(self):
|
116
|
+
return f"TheDataGardenRegionalDataModel : {self._model_name} : (count={len(self._data_records)})"
|
117
|
+
|
118
|
+
def __repr__(self):
|
119
|
+
return self.__str__()
|
120
|
+
|
121
|
+
def __call__(self, **kwargs) -> "TheDataGardenRegionalDataModel":
|
122
|
+
if self._is_sub_model:
|
123
|
+
raise TypeError(
|
124
|
+
"Sub model data cannot be used to retrieve data. "
|
125
|
+
"Use the main model data object to make calls to The-Datagarden API"
|
126
|
+
)
|
127
|
+
request_hash = self.request_hash(**kwargs)
|
128
|
+
if request_hash not in self._request_params_hashes:
|
129
|
+
regional_data = self.regional_paginated_data_from_api(**kwargs)
|
130
|
+
if regional_data:
|
131
|
+
self.set_items(regional_data)
|
132
|
+
self._request_params_hashes.append(request_hash)
|
133
|
+
return self
|
134
|
+
|
135
|
+
def __getattr__(self, attribute: str) -> "TheDataGardenRegionalDataModel":
|
136
|
+
if attribute not in self._model.legends().sub_model_names:
|
137
|
+
raise ValueError(f"Attribute {attribute} is not a sub-model of {self._model_name}")
|
138
|
+
sub_model = getattr(self._model.legends(), attribute).model
|
139
|
+
regional_data_for_attribute = TheDataGardenRegionalDataModel(
|
140
|
+
api=self._api,
|
141
|
+
model_name=attribute,
|
142
|
+
region_url=self._region_url,
|
143
|
+
meta_data=self.meta_data,
|
144
|
+
is_sub_model=True,
|
145
|
+
model=sub_model,
|
146
|
+
)
|
147
|
+
regional_data_for_attribute._data_records = {
|
148
|
+
key: value.record_for_sub_model(attribute) for key, value in self._data_records.items()
|
149
|
+
}
|
150
|
+
return regional_data_for_attribute
|
151
|
+
|
152
|
+
@property
|
153
|
+
def model_attributes(self) -> list[str]:
|
154
|
+
return self._model.legends().attributes
|
155
|
+
|
156
|
+
def model_attribute_legend(self, attribute: str) -> Legend:
|
157
|
+
return getattr(self._model.legends(), attribute)
|
158
|
+
|
159
|
+
def request_hash(self, **kwargs) -> str:
|
160
|
+
sorted_items = sorted(kwargs.items())
|
161
|
+
hash_str = ",".join(f"{k}:{v}" for k, v in sorted_items)
|
162
|
+
return str(hash(hash_str))
|
163
|
+
|
164
|
+
def _response_has_next_page(self, model_data_resp: dict) -> bool:
|
165
|
+
pagination = model_data_resp.get("pagination", None)
|
166
|
+
if not pagination:
|
167
|
+
return False
|
168
|
+
return pagination.get("next_page", None) is not None
|
169
|
+
|
170
|
+
def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
|
171
|
+
pagination = model_data_resp.pop("pagination", None)
|
172
|
+
if not pagination:
|
173
|
+
return None
|
174
|
+
next_page = pagination.get("next_page", None)
|
175
|
+
if not next_page:
|
176
|
+
return None
|
177
|
+
return {"page": next_page}
|
178
|
+
|
179
|
+
def regional_paginated_data_from_api(self, **kwargs) -> dict:
|
180
|
+
model_data_resp = self.regional_data_from_api(**kwargs)
|
181
|
+
if not model_data_resp:
|
182
|
+
return {}
|
183
|
+
while self._response_has_next_page(model_data_resp):
|
184
|
+
next_page_pagination = self._next_page_pagination(model_data_resp)
|
185
|
+
if next_page_pagination:
|
186
|
+
next_page_resp = self.regional_data_from_api(pagination=next_page_pagination, **kwargs)
|
187
|
+
if next_page_resp:
|
188
|
+
model_data_resp["data_by_region"].extend(next_page_resp["data_by_region"])
|
189
|
+
model_data_resp["pagination"] = next_page_resp["pagination"]
|
190
|
+
|
191
|
+
return model_data_resp
|
192
|
+
|
193
|
+
def regional_data_from_api(self, **kwargs) -> dict:
|
194
|
+
model_data_resp = self._api.retrieve_from_api(
|
195
|
+
url_extension=self._region_url + "regional_data/",
|
196
|
+
method="POST",
|
197
|
+
payload={"model": self._model_name, **kwargs},
|
198
|
+
)
|
199
|
+
if model_data_resp:
|
200
|
+
return model_data_resp.json()
|
201
|
+
return {}
|
202
|
+
|
203
|
+
def set_items(self, data: dict):
|
204
|
+
for regional_data in data["data_by_region"]:
|
205
|
+
base_items = {
|
206
|
+
"name": regional_data.get("region_name", None),
|
207
|
+
"region_type": regional_data.get("region_type", None),
|
208
|
+
"un_region_code": regional_data.get("un_region_code", None),
|
209
|
+
"iso_cc_2": regional_data.get("iso_cc_2", None),
|
210
|
+
"local_region_code": regional_data.get("local_region_code", None),
|
211
|
+
"local_region_code_type": regional_data.get("local_region_code_type", None),
|
212
|
+
"parent_region_code": regional_data.get("parent_region_code", None),
|
213
|
+
"parent_region_code_type": regional_data.get("parent_region_code_type", None),
|
214
|
+
"parent_region_type": regional_data.get("parent_region_type", None),
|
215
|
+
"region_level": regional_data.get("region_level", 0),
|
216
|
+
}
|
217
|
+
data_for_region = regional_data["data_objects_for_region"]
|
218
|
+
data_records = [
|
219
|
+
RegionalDataRecord(**base_items, **self._record_items(data_obj))
|
220
|
+
for data_obj in data_for_region
|
221
|
+
]
|
222
|
+
for data_record in data_records:
|
223
|
+
self._data_records.update({data_record.record_hash(): data_record})
|
224
|
+
|
225
|
+
if self._data_records:
|
226
|
+
_, first_record = list(self._data_records.items())[0]
|
227
|
+
model_name = first_record.data_model_name
|
228
|
+
if not model_name:
|
229
|
+
raise ValueError("data_model_name is required")
|
230
|
+
self._model_name = model_name
|
231
|
+
|
232
|
+
def _record_items(self, data: dict):
|
233
|
+
model_name = data.get("data_type", None)
|
234
|
+
if not model_name:
|
235
|
+
raise ValueError("data_model_name is required")
|
236
|
+
|
237
|
+
model = getattr(DatagardenModels, model_name.upper())
|
238
|
+
if not model:
|
239
|
+
raise ValueError(f"model {model_name} not found in DatagardenModels")
|
240
|
+
return {
|
241
|
+
"source_name": data.get("source_name", None),
|
242
|
+
"period": data.get("period", None),
|
243
|
+
"period_type": data.get("period_type", None),
|
244
|
+
"data_model_name": data.get("data_type", None),
|
245
|
+
"model": model(**data.get("data", {})),
|
246
|
+
}
|
247
|
+
|
248
|
+
def to_polars(self, model_convertors: dict | None = None) -> pl.DataFrame:
|
249
|
+
"""
|
250
|
+
Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
|
251
|
+
"""
|
252
|
+
model_convertors = model_convertors or {}
|
253
|
+
converted_records = []
|
254
|
+
for record in self._data_records.values():
|
255
|
+
model = record.model
|
256
|
+
record_dict = record.model_dump()
|
257
|
+
record_dict.pop("model")
|
258
|
+
|
259
|
+
for new_col, model_attr in model_convertors.items():
|
260
|
+
# Handle nested attributes using split by dots
|
261
|
+
model_attr_flatten = "__flatten" in model_attr
|
262
|
+
model_attr = model_attr.replace("__flatten", "")
|
263
|
+
attrs = model_attr.split(".")
|
264
|
+
value = getattr(model, attrs[0])
|
265
|
+
for attr in attrs[1:]:
|
266
|
+
value = getattr(value, attr, None)
|
267
|
+
if not value:
|
268
|
+
continue
|
269
|
+
if model_attr_flatten:
|
270
|
+
model_data = value.model_dump() if isinstance(value, BaseModel) else value
|
271
|
+
record_dict.update(self.flatten_dict(model_data, {}))
|
272
|
+
else:
|
273
|
+
record_dict[new_col] = value
|
274
|
+
converted_records.append(record_dict)
|
275
|
+
return pl.from_records(converted_records)
|
276
|
+
|
277
|
+
def flatten_dict(self, dict_to_flatten: dict, flattened_dict: dict, prefix: str = "") -> dict:
|
278
|
+
for key, value in dict_to_flatten.items():
|
279
|
+
new_key = f"{prefix}.{key}" if prefix else key
|
280
|
+
if isinstance(value, dict):
|
281
|
+
flattened_dict.update(self.flatten_dict(value, flattened_dict, new_key))
|
282
|
+
else:
|
283
|
+
flattened_dict[new_key] = value
|
284
|
+
|
285
|
+
return flattened_dict
|
286
|
+
|
287
|
+
def full_model_to_polars(self):
|
288
|
+
"""
|
289
|
+
Convert the data to a polars dataframe, flattening all nested dictionaries
|
290
|
+
"""
|
291
|
+
converted_records = []
|
292
|
+
for record in self._data_records.values():
|
293
|
+
# Get all fields from the record excluding the modeL
|
294
|
+
record_dict = record.model_dump(exclude={"model"})
|
295
|
+
# Model data is added as flattened dictionary
|
296
|
+
model_data = record.model.model_dump()
|
297
|
+
flattened_dict = self.flatten_dict(model_data, {})
|
298
|
+
record_dict.update(flattened_dict)
|
299
|
+
converted_records.append(record_dict)
|
300
|
+
return pl.from_records(converted_records)
|
301
|
+
|
302
|
+
def to_pandas(self, model_convertors: dict | None = None) -> pd.DataFrame:
|
303
|
+
"""
|
304
|
+
Convert the data to a pandas dataframe using a dictionary of model attributes to convert to columns
|
305
|
+
"""
|
306
|
+
return self.to_polars(model_convertors).to_pandas()
|
307
|
+
|
308
|
+
def full_model_to_pandas(self) -> pd.DataFrame:
|
309
|
+
"""
|
310
|
+
Convert the data to a pandas dataframe, flattening all nested dictionaries
|
311
|
+
"""
|
312
|
+
return self.full_model_to_polars().to_pandas()
|
313
|
+
|
314
|
+
def __iter__(self):
|
315
|
+
"""Makes the class iterable over the values in _data_records"""
|
316
|
+
return iter(self._data_records.values())
|
317
|
+
|
318
|
+
def __len__(self):
|
319
|
+
"""Returns the number of records"""
|
320
|
+
return len(self._data_records)
|
321
|
+
|
322
|
+
@property
|
323
|
+
def data_records(self) -> list[RegionalDataRecord]:
|
324
|
+
return list(self._data_records.values())
|
325
|
+
|
326
|
+
def regional_availability(self) -> dict[str, RegionalDataStats | None]:
|
327
|
+
availability_per_region = self.meta_data.statistics_for_data_model(model_name=self._model_name)
|
328
|
+
regional_availability = {}
|
329
|
+
for region_type in self.meta_data.region_types:
|
330
|
+
if region_type in availability_per_region.keys():
|
331
|
+
regional_availability[region_type] = availability_per_region[region_type]
|
332
|
+
else:
|
333
|
+
regional_availability[region_type] = None
|
334
|
+
return regional_availability
|
335
|
+
|
336
|
+
@property
|
337
|
+
def regions_with_model_data(self) -> list[str]:
|
338
|
+
return [region for region in self.regional_availability() if self.regional_availability()[region]]
|
339
|
+
|
340
|
+
def show_summary(self):
|
341
|
+
"""
|
342
|
+
Outputs a summary of the model's structure (submodels and attributes)
|
343
|
+
"""
|
344
|
+
self._model.legends().show_summary()
|
345
|
+
|
346
|
+
def summary(self) -> dict:
|
347
|
+
"""
|
348
|
+
return model's structure (submodels and attributes)
|
349
|
+
"""
|
350
|
+
return self._model.legends().summary()
|
351
|
+
|
352
|
+
def describe(
|
353
|
+
self,
|
354
|
+
include_attributes: list[str] | None = None,
|
355
|
+
exclude_attributes: list[str] | None = None,
|
356
|
+
filter_expr: pl.Expr | None = None,
|
357
|
+
) -> pl.DataFrame:
|
358
|
+
df = self.full_model_to_polars()
|
359
|
+
if df.is_empty():
|
360
|
+
raise ValueError("No data loaded for this model. Data is needed to describe the model.")
|
361
|
+
|
362
|
+
if filter_expr is not None:
|
363
|
+
df = df.filter(filter_expr)
|
364
|
+
|
365
|
+
if include_attributes:
|
366
|
+
return df.select(include_attributes).describe()
|
367
|
+
|
368
|
+
attributes_to_exclude = DEFAULT_COLUMNS_TO_EXCLUDE.copy()
|
369
|
+
if exclude_attributes:
|
370
|
+
attributes_to_exclude.extend(exclude_attributes)
|
371
|
+
return df.select([col for col in df.columns if col not in attributes_to_exclude]).describe()
|
372
|
+
|
373
|
+
def data_availability_per_attribute(
|
374
|
+
self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
|
375
|
+
):
|
376
|
+
if include_attributes:
|
377
|
+
describe_df = self.describe(include_attributes=include_attributes, filter_expr=filter_expr)
|
378
|
+
else:
|
379
|
+
describe_df = self.describe(
|
380
|
+
exclude_attributes=DEFAULT_COLUMNS_TO_EXCLUDE, filter_expr=filter_expr
|
381
|
+
)
|
382
|
+
|
383
|
+
describe_df = describe_df.with_columns(
|
384
|
+
pl.when(pl.col("statistic").is_in(["count", "null_count"]))
|
385
|
+
.then(pl.all().exclude("statistic").cast(pl.Int64))
|
386
|
+
.otherwise(pl.all().exclude("statistic"))
|
387
|
+
)
|
388
|
+
|
389
|
+
return describe_df
|
390
|
+
|
391
|
+
def show_data_availability_per_attribute(
|
392
|
+
self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
|
393
|
+
):
|
394
|
+
describe_df = self.data_availability_per_attribute(include_attributes, filter_expr)
|
395
|
+
stats_by_column = {
|
396
|
+
column: dict(
|
397
|
+
zip(describe_df.get_column("statistic"), describe_df.get_column(column), strict=True)
|
398
|
+
)
|
399
|
+
for column in describe_df.columns
|
400
|
+
if column != "statistic"
|
401
|
+
}
|
402
|
+
|
403
|
+
max_column_length = max(len(column) for column in stats_by_column.keys())
|
404
|
+
|
405
|
+
for column, stats in stats_by_column.items():
|
406
|
+
print(
|
407
|
+
f"{column} : {" " * (max_column_length + 3 - len(column))}"
|
408
|
+
f"{int(stats['count'] + stats['null_count'])}"
|
409
|
+
f" of which with data: {int(stats['count'])} "
|
410
|
+
f"({int(stats['count']) / (int(stats['count'] + stats['null_count'])) * 100:.0f}%)"
|
411
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.1.0"
|
@@ -0,0 +1,137 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: the-datagarden
|
3
|
+
Version: 1.2.1
|
4
|
+
Summary: Public data made easy.
|
5
|
+
Author-email: Maarten de Ruyter <info@the-datagarden.io>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Read the Docs, https://the-datagarden.readthedocs.io/
|
8
|
+
Project-URL: The-DataGarden, https://www.the-datagarden.io/
|
9
|
+
Project-URL: API documentation, https://api.the-datagarden.io/api-docs
|
10
|
+
Project-URL: Source, https://github.com/the-datagarden/the-datagarden
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
17
|
+
Classifier: Operating System :: OS Independent
|
18
|
+
Classifier: Development Status :: 4 - Beta
|
19
|
+
Classifier: Intended Audience :: Developers
|
20
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
21
|
+
Classifier: Intended Audience :: Science/Research
|
22
|
+
Classifier: Intended Audience :: Healthcare Industry
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
26
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
27
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
|
+
Classifier: Topic :: Utilities
|
29
|
+
Requires-Python: >=3.10
|
30
|
+
Description-Content-Type: text/x-rst
|
31
|
+
Requires-Dist: click>=8.1.7
|
32
|
+
Requires-Dist: pandas>=2.2.3
|
33
|
+
Requires-Dist: polars>=1.15.0
|
34
|
+
Requires-Dist: pydantic>=2.9.2
|
35
|
+
Requires-Dist: pyjwt>=2.10.0
|
36
|
+
Requires-Dist: python-decouple>=3.8
|
37
|
+
Requires-Dist: requests>=2.32.3
|
38
|
+
Requires-Dist: the-datagarden-models>=1.6.3
|
39
|
+
|
40
|
+
==================
|
41
|
+
the-datagarden SDK
|
42
|
+
==================
|
43
|
+
|
44
|
+
The-datagarden package is a Python SDK built on top of The-DataGarden API. It provides easy access to continent and country regional hierarchies, as well as public data related to these regions. Additionally, you can retrieve regional GeoJSONs using the SDK. It simplifies the process of converting regional data into DataFrames and/or GeoJSON Feature collections, enabling developers to build upon this data effortlessly.
|
45
|
+
|
46
|
+
A quick example
|
47
|
+
---------------
|
48
|
+
If you have a user account at the-datagarden.io, you can start using the SDK right away:
|
49
|
+
|
50
|
+
.. code-block:: python
|
51
|
+
|
52
|
+
# Retrieve a country object from the datagarden API
|
53
|
+
>>> from the-datagarden import TheDataGardenAPI
|
54
|
+
>>> the_datagarden_api = TheDataGardenAPI(email='your-email@example.com', password='your-password')
|
55
|
+
>>> nl = the_datagarden_api.netherlands()
|
56
|
+
>>> nl_demographics = nl.demographics(from_period="2010-01-01", source="united nations")
|
57
|
+
>>> nl_demographics
|
58
|
+
TheDataGardenRegionalDataModel : Demographics : (count=15)
|
59
|
+
|
60
|
+
this returns a `TheDataGardenRegionalDataModel` containimg the demographics data in this case 15 records.
|
61
|
+
Each of those records will contain a Demographics object for the region for the specified period.
|
62
|
+
|
63
|
+
To work with this data, you can convert it to a pandas or polars dataframe and select the data from the demographics
|
64
|
+
data model you need.
|
65
|
+
|
66
|
+
.. code-block:: python
|
67
|
+
|
68
|
+
>>> df = nl_demographics.to_polars({"pop_count": "population.total"}) # or to_pandas(...)
|
69
|
+
>>> df["name", "source_name", "period", "data_model_name", "total"]
|
70
|
+
┌─────────────┬────────────────┬─────────────────┬─────────────────┬─────────────┐
|
71
|
+
│ name ┆ source_name ┆ period ┆ data_model_name ┆ pop_count │
|
72
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
73
|
+
│ str ┆ str ┆ str ┆ str ┆ f64 │
|
74
|
+
╞═════════════╪════════════════╪═════════════════╪═════════════════╪═════════════╡
|
75
|
+
│ Netherlands ┆ United Nations ┆ 2010-01-010:00Z ┆ Demographics ┆ 1.6729801e7 │
|
76
|
+
│ Netherlands ┆ United Nations ┆ 2011-01-010:00Z ┆ Demographics ┆ 1.6812669e7 │
|
77
|
+
│ Netherlands ┆ United Nations ┆ 2012-01-010:00Z ┆ Demographics ┆ 1.6889445e7 │
|
78
|
+
│ Netherlands ┆ United Nations ┆ 2013-01-010:00Z ┆ Demographics ┆ 1.6940942e7 │
|
79
|
+
│ Netherlands ┆ United Nations ┆ 2014-01-010:00Z ┆ Demographics ┆ 1.6993184e7 │
|
80
|
+
│ … ┆ … ┆ … ┆ … ┆ … │
|
81
|
+
│ Netherlands ┆ United Nations ┆ 2020-01-010:00Z ┆ Demographics ┆ 1.7601682e7 │
|
82
|
+
│ Netherlands ┆ United Nations ┆ 2021-01-010:00Z ┆ Demographics ┆ 1.767178e7 │
|
83
|
+
│ Netherlands ┆ United Nations ┆ 2022-01-010:00Z ┆ Demographics ┆ 1.7789347e7 │
|
84
|
+
│ Netherlands ┆ United Nations ┆ 2023-01-010:00Z ┆ Demographics ┆ 1.8019495e7 │
|
85
|
+
│ Netherlands ┆ United Nations ┆ 2024-01-010:00Z ┆ Demographics ┆ null │
|
86
|
+
└─────────────┴────────────────┴─────────────────┴─────────────────┴─────────────┘
|
87
|
+
|
88
|
+
|
89
|
+
Retrieving the GeoJSON for the Netherlands and its provinces is straightforward as well:
|
90
|
+
|
91
|
+
.. code-block:: python
|
92
|
+
|
93
|
+
>>> nl_geojson = nl.geojsons()
|
94
|
+
>>> nl_geojson
|
95
|
+
TheDataGardenRegionGeoJSONModel : GeoJSON : (count=1)
|
96
|
+
>>> nl_geojson(region_level=2) # Retrieve GeoJSON for 2nd regional level (provinces)
|
97
|
+
TheDataGardenRegionGeoJSONModel : GeoJSON : (count=13) # 12 provinces + 1 country
|
98
|
+
>>> df = nl_geojson.to_polars()
|
99
|
+
>>> df["name", "region_type", "local_region_code", "region_level", "feature"]
|
100
|
+
┌───────────────┬─────────────┬───────────────┬──────────────┬────────────────────────┐
|
101
|
+
│ name ┆ region_type ┆ local_region_c┆ region_level ┆ feature │
|
102
|
+
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
103
|
+
│ str ┆ str ┆ str ┆ i64 ┆ struct[3] │
|
104
|
+
╞═══════════════╪═════════════╪═══════════════╪══════════════╪════════════════════════╡
|
105
|
+
│ Netherlands ┆ country ┆ 528 ┆ 0 ┆ {"Feature",{"Netherland│
|
106
|
+
│ Drenthe ┆ province ┆ NL13 ┆ 2 ┆ {"Feature",{"Drenthe",2│
|
107
|
+
│ Flevoland ┆ province ┆ NL23 ┆ 2 ┆ {"Feature",{"Flevoland"│
|
108
|
+
│ Friesland ┆ province ┆ NL12 ┆ 2 ┆ {"Feature",{"Friesland"│
|
109
|
+
│ Gelderland ┆ province ┆ NL22 ┆ 2 ┆ {"Feature",{"Gelderland│
|
110
|
+
│ … ┆ … ┆ … ┆ … ┆ … │
|
111
|
+
│ Noord-Holland ┆ province ┆ NL32 ┆ 2 ┆ {"Feature",{"Noord-Holl│
|
112
|
+
│ Overijssel ┆ province ┆ NL21 ┆ 2 ┆ {"Feature",{"Overijssel│
|
113
|
+
│ Utrecht ┆ province ┆ NL31 ┆ 2 ┆ {"Feature",{"Utrecht",2│
|
114
|
+
│ Zeeland ┆ province ┆ NL34 ┆ 2 ┆ {"Feature",{"Zeeland",2│
|
115
|
+
│ Zuid-Holland ┆ province ┆ NL33 ┆ 2 ┆ {"Feature",{"Zuid-Holla│
|
116
|
+
└───────────────┴─────────────┴───────────────┴──────────────┴────────────────────────┘
|
117
|
+
|
118
|
+
For readability, the output only a limited number of dataframe columns are displayed.
|
119
|
+
Attributes in both the demographics and geojson dataframes are available to connect the geojson to
|
120
|
+
the demographics data. This allows you quickly make data sets that contain both demographics and geojson data
|
121
|
+
for further analysis or visualisation in map applications.
|
122
|
+
|
123
|
+
|
124
|
+
Read more
|
125
|
+
---------
|
126
|
+
|
127
|
+
* `The DataGarden Website <https://www.the-datagarden.io>`_
|
128
|
+
* `API Documentation <https://www.the-datagarden.io/api-docs>`_
|
129
|
+
* `The Datagarden Models <https://www.the-datagarden.io/data-docs>`_
|
130
|
+
* `GitHub Repository <https://github.com/MaartendeRuyter/dg-the-datagarden>`_
|
131
|
+
|
132
|
+
Access to The DataGarden API
|
133
|
+
----------------------------
|
134
|
+
To use the DataGarden SDK, you need access to the The DataGarden API. Simply register for free at https://www.the-datagarden.io
|
135
|
+
and you will have an inital free access account to the API with access to country and continent data.
|
136
|
+
|
137
|
+
Visit https://www.the-datagarden.io for to register for free.
|
@@ -0,0 +1,25 @@
|
|
1
|
+
the_datagarden/__init__.py,sha256=nJ4JNm1hmdgfkRQr8jiUT_-lfVfB_lVr4Qt_tLPptG0,267
|
2
|
+
the_datagarden/cli.py,sha256=0NPE-rysIsoPGUOh_ylQbLHRAX2hAyuIhG-I3Z-5IFI,220
|
3
|
+
the_datagarden/version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
|
4
|
+
the_datagarden/abc/__init__.py,sha256=0jSO4sHyj_3Q92wI1UNZaABoS0F0FZ3wbvJsCDISuIs,143
|
5
|
+
the_datagarden/abc/api.py,sha256=RC3PTjrea9asrUfm9XD-AWIcQnhTUyZsBlc8e8leT08,484
|
6
|
+
the_datagarden/abc/authentication.py,sha256=6UJVMAlorMCTjvFMFQLwC_Zx6wyENXQekHHgaLZaouk,1174
|
7
|
+
the_datagarden/api/__init__.py,sha256=tUT1s1J3U5fX6edISvchO-rO0zZ5nJFcG4YUAG0Kwc4,235
|
8
|
+
the_datagarden/api/authentication/__init__.py,sha256=JN20Cpjfes9nFBlDLcojJJyyt_W7_78DoQAdAA9tQW0,3715
|
9
|
+
the_datagarden/api/authentication/settings.py,sha256=clXnhbV1rOcqyAtTEE4GCso0btqAhrPqveO3kme4WFc,1583
|
10
|
+
the_datagarden/api/authentication/credentials/__init__.py,sha256=OXlnUaZ6w-1VIHmCEDKQ7uMZuzKtroeQT_rCl2M0FB8,4806
|
11
|
+
the_datagarden/api/authentication/environment/__init__.py,sha256=9EWOWvinW6C4UAzrPIAI8CqxAM9GErXbo9sKMu5AcH8,426
|
12
|
+
the_datagarden/api/base/__init__.py,sha256=W-7JCQROnF48rlJuMCqMcqfc3gWRzVMvuxfgkQqOf4k,8165
|
13
|
+
the_datagarden/api/regions/__init__.py,sha256=oO533d7dn6CEUZQWGGIUBZvpxvJXRDtdZgNUt3rXg4E,98
|
14
|
+
the_datagarden/api/regions/continent.py,sha256=iCsjCvG39bV4zS-4Nj4mvszo1jmKPLWofobByV9G1cg,204
|
15
|
+
the_datagarden/api/regions/country.py,sha256=Uibm8KvvLSa6DGuHz20IW-G08DC7ggIEkx-OYkQMICA,194
|
16
|
+
the_datagarden/api/regions/base/__init__.py,sha256=PJ_OnELpbvoJspwuUvse3vxT1ZRBfqgm0ux8ptkprT0,3487
|
17
|
+
the_datagarden/api/regions/base/settings.py,sha256=KuUR4r-Iro4ZQpQZjssvRoe7_OJ_LfNfgzJ85UKkgsk,397
|
18
|
+
the_datagarden/models/__init__.py,sha256=UoBC9iK6VfHuW92PeGUteUWAqB0M3qoHDvNBPiNgfhQ,305
|
19
|
+
the_datagarden/models/geojson.py,sha256=74uvrI7YSGgu8ohAMvohKLu6fUkILDT0nZ9l3O4xpis,6340
|
20
|
+
the_datagarden/models/regional_data.py,sha256=zhk6iRYRlzirgxp1k6J9ftt0mAFqWrTH4M3TPiRfR88,16566
|
21
|
+
the_datagarden-1.2.1.dist-info/METADATA,sha256=rIq7psRCFQ7nn6WTir-FQfWCcnjUfmPvddCFF-06wQk,9395
|
22
|
+
the_datagarden-1.2.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
23
|
+
the_datagarden-1.2.1.dist-info/entry_points.txt,sha256=R40-UiUsDqy6RfBSnkOCj98Ed0bfvseJPpClXxqVVAM,59
|
24
|
+
the_datagarden-1.2.1.dist-info/top_level.txt,sha256=S0Wn3mYX0nrwGIqeqWE5vAc3ASTK13E-F_9eTOQu8hs,15
|
25
|
+
the_datagarden-1.2.1.dist-info/RECORD,,
|
@@ -1,18 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: the-datagarden
|
3
|
-
Version: 0.1.0
|
4
|
-
Summary: Accessing public data made easy for everyone.
|
5
|
-
Author-email: Maarten de Ruyter <maarten@the-datagarden.io>
|
6
|
-
License: MIT
|
7
|
-
Classifier: Programming Language :: Python :: 3
|
8
|
-
Classifier: Programming Language :: Python :: 3.10
|
9
|
-
Classifier: Programming Language :: Python :: 3.11
|
10
|
-
Classifier: Programming Language :: Python :: 3.12
|
11
|
-
Classifier: Programming Language :: Python :: 3.13
|
12
|
-
Classifier: License :: OSI Approved :: MIT License
|
13
|
-
Classifier: Operating System :: OS Independent
|
14
|
-
Requires-Python: >=3.10
|
15
|
-
Description-Content-Type: text/markdown
|
16
|
-
Requires-Dist: click >=8.1.7
|
17
|
-
Requires-Dist: pydantic >=2.9.2
|
18
|
-
|
@@ -1,7 +0,0 @@
|
|
1
|
-
the_datagarden/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
the_datagarden/cli.py,sha256=0NPE-rysIsoPGUOh_ylQbLHRAX2hAyuIhG-I3Z-5IFI,220
|
3
|
-
the_datagarden-0.1.0.dist-info/METADATA,sha256=AlhJXqjNAxyuQ_t22aAW6_mCp06_nbsXCV-UDK9Uhwc,662
|
4
|
-
the_datagarden-0.1.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
5
|
-
the_datagarden-0.1.0.dist-info/entry_points.txt,sha256=R40-UiUsDqy6RfBSnkOCj98Ed0bfvseJPpClXxqVVAM,59
|
6
|
-
the_datagarden-0.1.0.dist-info/top_level.txt,sha256=S0Wn3mYX0nrwGIqeqWE5vAc3ASTK13E-F_9eTOQu8hs,15
|
7
|
-
the_datagarden-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|