the-datagarden 0.1.0__py3-none-any.whl → 1.2.3__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,411 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+ from datagarden_models import DataGardenModel, DatagardenModels, DataGardenSubModel, RegionalDataStats
4
+ from datagarden_models.models.base.legend import Legend
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.base import BaseApi
8
+
9
+ UNIQUE_FIELDS = [
10
+ "region_type",
11
+ "un_region_code",
12
+ "iso_cc_2",
13
+ "local_region_code",
14
+ "local_region_code_type",
15
+ "region_level",
16
+ "period",
17
+ "period_type",
18
+ "source_name",
19
+ ]
20
+ DEFAULT_COLUMNS_TO_EXCLUDE = [
21
+ "datagarden_model_version",
22
+ "name",
23
+ "region_type",
24
+ "un_region_code",
25
+ "iso_cc_2",
26
+ "local_region_code",
27
+ "local_region_code_type",
28
+ "parent_region_code",
29
+ "parent_region_code_type",
30
+ "parent_region_type",
31
+ "region_level",
32
+ "source_name",
33
+ "data_model_name",
34
+ "period",
35
+ "period_type",
36
+ ]
37
+
38
+
39
+ class RegionalDataRecord(BaseModel):
40
+ name: str | None = None
41
+ region_type: str | None = None
42
+ un_region_code: str | None = None
43
+ iso_cc_2: str | None = None
44
+ local_region_code: str | None = None
45
+ local_region_code_type: str | None = None
46
+ parent_region_code: str | None = None
47
+ parent_region_code_type: str | None = None
48
+ parent_region_type: str | None = None
49
+ region_level: int = 0
50
+ source_name: str | None = None
51
+ period: str | None = None
52
+ period_type: str | None = None
53
+ data_model_name: str | None = None
54
+ model: DataGardenSubModel
55
+
56
+ def record_hash(self) -> str:
57
+ hash_str = ".".join([str(getattr(self, key)) for key in sorted(UNIQUE_FIELDS)])
58
+ return str(hash(hash_str))
59
+
60
+ def __str__(self):
61
+ return (
62
+ f"RegionalDataRecord: {self.name} ({self.data_model_name} for {self.period}, {self.period_type})"
63
+ )
64
+
65
+ @property
66
+ def datgarden_model_class(self) -> type[DataGardenModel]:
67
+ return self.model.__class__
68
+
69
+ def record_for_sub_model(self, sub_model_name: str) -> "RegionalDataRecord":
70
+ if sub_model_name not in self.datgarden_model_class.legends().sub_model_names:
71
+ raise ValueError(f"Sub model `{sub_model_name}` not found in {self.datgarden_model_class}")
72
+ child_record = self.model_dump()
73
+ child_record["data_model_name"] = sub_model_name
74
+ child_record["model"] = getattr(self.model, sub_model_name)
75
+ return RegionalDataRecord(**child_record)
76
+
77
+
78
+ class TheDataGardenRegionalDataModel:
79
+ """
80
+ Model to hold response data from the The Data Garden API Regional Data endpoint.
81
+
82
+ The model hold a list of regional_data records containg a regional data model
83
+ for the region for a specific set op sources, periods and period types.
84
+
85
+ The data can be converted to Polars and Pandas dataframes by the following
86
+ methods:
87
+ - to_polars(model_convertors: dict | None = None) -> pl.DataFrame
88
+ model_convertors dict will be used to covert specifc model fields to dataframe
89
+ columns.
90
+ - full_model_to_polars() -> pl.DataFrame
91
+
92
+ For pandas dataframes you can use the same methods:
93
+ - to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
94
+ - full_model_to_pandas() -> pd.DataFrame
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ api: "BaseApi",
100
+ model_name: str,
101
+ region_url: str,
102
+ meta_data: BaseModel,
103
+ is_sub_model: bool = False,
104
+ model: type[DataGardenSubModel] | None = None,
105
+ ):
106
+ self._api: BaseApi = api
107
+ self._model_name: str = model_name
108
+ self._region_url: str = region_url
109
+ self._request_params_hashes: list[str] = []
110
+ self._data_records: dict[str, RegionalDataRecord] = {}
111
+ self.meta_data: BaseModel = meta_data
112
+ self._model: DataGardenModel = model or getattr(DatagardenModels, model_name.upper())
113
+ self._is_sub_model: bool = is_sub_model
114
+
115
+ def __str__(self):
116
+ return f"TheDataGardenRegionalDataModel : {self._model_name} : (count={len(self._data_records)})"
117
+
118
+ def __repr__(self):
119
+ return self.__str__()
120
+
121
+ def __call__(self, **kwargs) -> "TheDataGardenRegionalDataModel":
122
+ if self._is_sub_model:
123
+ raise TypeError(
124
+ "Sub model data cannot be used to retrieve data. "
125
+ "Use the main model data object to make calls to The-Datagarden API"
126
+ )
127
+ request_hash = self.request_hash(**kwargs)
128
+ if request_hash not in self._request_params_hashes:
129
+ regional_data = self.regional_paginated_data_from_api(**kwargs)
130
+ if regional_data:
131
+ self.set_items(regional_data)
132
+ self._request_params_hashes.append(request_hash)
133
+ return self
134
+
135
+ def __getattr__(self, attribute: str) -> "TheDataGardenRegionalDataModel":
136
+ if attribute not in self._model.legends().sub_model_names:
137
+ raise ValueError(f"Attribute {attribute} is not a sub-model of {self._model_name}")
138
+ sub_model = getattr(self._model.legends(), attribute).model
139
+ regional_data_for_attribute = TheDataGardenRegionalDataModel(
140
+ api=self._api,
141
+ model_name=attribute,
142
+ region_url=self._region_url,
143
+ meta_data=self.meta_data,
144
+ is_sub_model=True,
145
+ model=sub_model,
146
+ )
147
+ regional_data_for_attribute._data_records = {
148
+ key: value.record_for_sub_model(attribute) for key, value in self._data_records.items()
149
+ }
150
+ return regional_data_for_attribute
151
+
152
+ @property
153
+ def model_attributes(self) -> list[str]:
154
+ return self._model.legends().attributes
155
+
156
+ def model_attribute_legend(self, attribute: str) -> Legend:
157
+ return getattr(self._model.legends(), attribute)
158
+
159
+ def request_hash(self, **kwargs) -> str:
160
+ sorted_items = sorted(kwargs.items())
161
+ hash_str = ",".join(f"{k}:{v}" for k, v in sorted_items)
162
+ return str(hash(hash_str))
163
+
164
+ def _response_has_next_page(self, model_data_resp: dict) -> bool:
165
+ pagination = model_data_resp.get("pagination", None)
166
+ if not pagination:
167
+ return False
168
+ return pagination.get("next_page", None) is not None
169
+
170
+ def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
171
+ pagination = model_data_resp.pop("pagination", None)
172
+ if not pagination:
173
+ return None
174
+ next_page = pagination.get("next_page", None)
175
+ if not next_page:
176
+ return None
177
+ return {"page": next_page}
178
+
179
+ def regional_paginated_data_from_api(self, **kwargs) -> dict:
180
+ model_data_resp = self.regional_data_from_api(**kwargs)
181
+ if not model_data_resp:
182
+ return {}
183
+ while self._response_has_next_page(model_data_resp):
184
+ next_page_pagination = self._next_page_pagination(model_data_resp)
185
+ if next_page_pagination:
186
+ next_page_resp = self.regional_data_from_api(pagination=next_page_pagination, **kwargs)
187
+ if next_page_resp:
188
+ model_data_resp["data_by_region"].extend(next_page_resp["data_by_region"])
189
+ model_data_resp["pagination"] = next_page_resp["pagination"]
190
+
191
+ return model_data_resp
192
+
193
+ def regional_data_from_api(self, **kwargs) -> dict:
194
+ model_data_resp = self._api.retrieve_from_api(
195
+ url_extension=self._region_url + "regional_data/",
196
+ method="POST",
197
+ payload={"model": self._model_name, **kwargs},
198
+ )
199
+ if model_data_resp:
200
+ return model_data_resp.json()
201
+ return {}
202
+
203
+ def set_items(self, data: dict):
204
+ for regional_data in data["data_by_region"]:
205
+ base_items = {
206
+ "name": regional_data.get("region_name", None),
207
+ "region_type": regional_data.get("region_type", None),
208
+ "un_region_code": regional_data.get("un_region_code", None),
209
+ "iso_cc_2": regional_data.get("iso_cc_2", None),
210
+ "local_region_code": regional_data.get("local_region_code", None),
211
+ "local_region_code_type": regional_data.get("local_region_code_type", None),
212
+ "parent_region_code": regional_data.get("parent_region_code", None),
213
+ "parent_region_code_type": regional_data.get("parent_region_code_type", None),
214
+ "parent_region_type": regional_data.get("parent_region_type", None),
215
+ "region_level": regional_data.get("region_level", 0),
216
+ }
217
+ data_for_region = regional_data["data_objects_for_region"]
218
+ data_records = [
219
+ RegionalDataRecord(**base_items, **self._record_items(data_obj))
220
+ for data_obj in data_for_region
221
+ ]
222
+ for data_record in data_records:
223
+ self._data_records.update({data_record.record_hash(): data_record})
224
+
225
+ if self._data_records:
226
+ _, first_record = list(self._data_records.items())[0]
227
+ model_name = first_record.data_model_name
228
+ if not model_name:
229
+ raise ValueError("data_model_name is required")
230
+ self._model_name = model_name
231
+
232
+ def _record_items(self, data: dict):
233
+ model_name = data.get("data_type", None)
234
+ if not model_name:
235
+ raise ValueError("data_model_name is required")
236
+
237
+ model = getattr(DatagardenModels, model_name.upper())
238
+ if not model:
239
+ raise ValueError(f"model {model_name} not found in DatagardenModels")
240
+ return {
241
+ "source_name": data.get("source_name", None),
242
+ "period": data.get("period", None),
243
+ "period_type": data.get("period_type", None),
244
+ "data_model_name": data.get("data_type", None),
245
+ "model": model(**data.get("data", {})),
246
+ }
247
+
248
+ def to_polars(self, model_convertors: dict | None = None) -> pl.DataFrame:
249
+ """
250
+ Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
251
+ """
252
+ model_convertors = model_convertors or {}
253
+ converted_records = []
254
+ for record in self._data_records.values():
255
+ model = record.model
256
+ record_dict = record.model_dump()
257
+ record_dict.pop("model")
258
+
259
+ for new_col, model_attr in model_convertors.items():
260
+ # Handle nested attributes using split by dots
261
+ model_attr_flatten = "__flatten" in model_attr
262
+ model_attr = model_attr.replace("__flatten", "")
263
+ attrs = model_attr.split(".")
264
+ value = getattr(model, attrs[0])
265
+ for attr in attrs[1:]:
266
+ value = getattr(value, attr, None)
267
+ if not value:
268
+ continue
269
+ if model_attr_flatten:
270
+ model_data = value.model_dump() if isinstance(value, BaseModel) else value
271
+ record_dict.update(self.flatten_dict(model_data, {}))
272
+ else:
273
+ record_dict[new_col] = value
274
+ converted_records.append(record_dict)
275
+ return pl.from_records(converted_records)
276
+
277
+ def flatten_dict(self, dict_to_flatten: dict, flattened_dict: dict, prefix: str = "") -> dict:
278
+ for key, value in dict_to_flatten.items():
279
+ new_key = f"{prefix}.{key}" if prefix else key
280
+ if isinstance(value, dict):
281
+ flattened_dict.update(self.flatten_dict(value, flattened_dict, new_key))
282
+ else:
283
+ flattened_dict[new_key] = value
284
+
285
+ return flattened_dict
286
+
287
+ def full_model_to_polars(self):
288
+ """
289
+ Convert the data to a polars dataframe, flattening all nested dictionaries
290
+ """
291
+ converted_records = []
292
+ for record in self._data_records.values():
293
+ # Get all fields from the record excluding the modeL
294
+ record_dict = record.model_dump(exclude={"model"})
295
+ # Model data is added as flattened dictionary
296
+ model_data = record.model.model_dump()
297
+ flattened_dict = self.flatten_dict(model_data, {})
298
+ record_dict.update(flattened_dict)
299
+ converted_records.append(record_dict)
300
+ return pl.from_records(converted_records)
301
+
302
+ def to_pandas(self, model_convertors: dict | None = None) -> pd.DataFrame:
303
+ """
304
+ Convert the data to a pandas dataframe using a dictionary of model attributes to convert to columns
305
+ """
306
+ return self.to_polars(model_convertors).to_pandas()
307
+
308
+ def full_model_to_pandas(self) -> pd.DataFrame:
309
+ """
310
+ Convert the data to a pandas dataframe, flattening all nested dictionaries
311
+ """
312
+ return self.full_model_to_polars().to_pandas()
313
+
314
+ def __iter__(self):
315
+ """Makes the class iterable over the values in _data_records"""
316
+ return iter(self._data_records.values())
317
+
318
+ def __len__(self):
319
+ """Returns the number of records"""
320
+ return len(self._data_records)
321
+
322
+ @property
323
+ def data_records(self) -> list[RegionalDataRecord]:
324
+ return list(self._data_records.values())
325
+
326
+ def regional_availability(self) -> dict[str, RegionalDataStats | None]:
327
+ availability_per_region = self.meta_data.statistics_for_data_model(model_name=self._model_name)
328
+ regional_availability = {}
329
+ for region_type in self.meta_data.region_types:
330
+ if region_type in availability_per_region.keys():
331
+ regional_availability[region_type] = availability_per_region[region_type]
332
+ else:
333
+ regional_availability[region_type] = None
334
+ return regional_availability
335
+
336
+ @property
337
+ def regions_with_model_data(self) -> list[str]:
338
+ return [region for region in self.regional_availability() if self.regional_availability()[region]]
339
+
340
+ def show_summary(self):
341
+ """
342
+ Outputs a summary of the model's structure (submodels and attributes)
343
+ """
344
+ self._model.legends().show_summary()
345
+
346
+ def summary(self) -> dict:
347
+ """
348
+ return model's structure (submodels and attributes)
349
+ """
350
+ return self._model.legends().summary()
351
+
352
+ def describe(
353
+ self,
354
+ include_attributes: list[str] | None = None,
355
+ exclude_attributes: list[str] | None = None,
356
+ filter_expr: pl.Expr | None = None,
357
+ ) -> pl.DataFrame:
358
+ df = self.full_model_to_polars()
359
+ if df.is_empty():
360
+ raise ValueError("No data loaded for this model. Data is needed to describe the model.")
361
+
362
+ if filter_expr is not None:
363
+ df = df.filter(filter_expr)
364
+
365
+ if include_attributes:
366
+ return df.select(include_attributes).describe()
367
+
368
+ attributes_to_exclude = DEFAULT_COLUMNS_TO_EXCLUDE.copy()
369
+ if exclude_attributes:
370
+ attributes_to_exclude.extend(exclude_attributes)
371
+ return df.select([col for col in df.columns if col not in attributes_to_exclude]).describe()
372
+
373
+ def data_availability_per_attribute(
374
+ self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
375
+ ):
376
+ if include_attributes:
377
+ describe_df = self.describe(include_attributes=include_attributes, filter_expr=filter_expr)
378
+ else:
379
+ describe_df = self.describe(
380
+ exclude_attributes=DEFAULT_COLUMNS_TO_EXCLUDE, filter_expr=filter_expr
381
+ )
382
+
383
+ describe_df = describe_df.with_columns(
384
+ pl.when(pl.col("statistic").is_in(["count", "null_count"]))
385
+ .then(pl.all().exclude("statistic").cast(pl.Int64))
386
+ .otherwise(pl.all().exclude("statistic"))
387
+ )
388
+
389
+ return describe_df
390
+
391
+ def show_data_availability_per_attribute(
392
+ self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
393
+ ):
394
+ describe_df = self.data_availability_per_attribute(include_attributes, filter_expr)
395
+ stats_by_column = {
396
+ column: dict(
397
+ zip(describe_df.get_column("statistic"), describe_df.get_column(column), strict=True)
398
+ )
399
+ for column in describe_df.columns
400
+ if column != "statistic"
401
+ }
402
+
403
+ max_column_length = max(len(column) for column in stats_by_column.keys())
404
+
405
+ for column, stats in stats_by_column.items():
406
+ print(
407
+ f"{column} : {" " * (max_column_length + 3 - len(column))}"
408
+ f"{int(stats['count'] + stats['null_count'])}"
409
+ f" of which with data: {int(stats['count'])} "
410
+ f"({int(stats['count']) / (int(stats['count'] + stats['null_count'])) * 100:.0f}%)"
411
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,253 @@
1
+ Metadata-Version: 2.1
2
+ Name: the-datagarden
3
+ Version: 1.2.3
4
+ Summary: Public data made easy.
5
+ Author-email: Maarten de Ruyter <info@the-datagarden.io>
6
+ License: MIT
7
+ Project-URL: Read the Docs, https://dg-the-datagarden.readthedocs.io/en/stable/
8
+ Project-URL: The-DataGarden, https://www.the-datagarden.io/
9
+ Project-URL: API documentation, https://www.the-datagarden.io/api-docs
10
+ Project-URL: Source, https://github.com/the-datagarden/the-datagarden
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Development Status :: 4 - Beta
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: Intended Audience :: Financial and Insurance Industry
21
+ Classifier: Intended Audience :: Science/Research
22
+ Classifier: Intended Audience :: Healthcare Industry
23
+ Classifier: Topic :: Scientific/Engineering :: GIS
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
+ Classifier: Topic :: Scientific/Engineering :: Visualization
27
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
+ Classifier: Topic :: Utilities
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/x-rst
31
+ Requires-Dist: click>=8.1.7
32
+ Requires-Dist: pandas>=2.2.3
33
+ Requires-Dist: polars>=1.15.0
34
+ Requires-Dist: pydantic>=2.9.2
35
+ Requires-Dist: pyjwt>=2.10.0
36
+ Requires-Dist: python-decouple>=3.8
37
+ Requires-Dist: requests>=2.32.3
38
+ Requires-Dist: the-datagarden-models>=1.6.3
39
+
40
+ ==================
41
+ the-datagarden SDK
42
+ ==================
43
+
44
+ The-datagarden package is a Python SDK built on top of The-DataGarden API. The SDK provides easy access to continent and country regional hierarchies,
45
+ as well as public data related to these regions. All data from The-DataGarden API is stored in normalized datamodels like ``Demographics``, ``Health``
46
+ or ``Economics``. This allows you as a data professional to create value from this data without having to worry about the (varying) data structure and
47
+ api's from the sources.
48
+
49
+ Additionally, The-DataGarden API also provides country and regional GeoJSONs. The SDK makes is easy for you to combine public data abd you own data and merge them into
50
+ geosjon Feature collections, making geographic visualisation easy.
51
+
52
+
53
+ The-DataGarden SDK main use case
54
+ --------------------------------
55
+ The SDK is designed to make it easy to access and work with the DataGarden data. After initializing the SDK you simply
56
+ retrieve data for a specific continent, country or subregion by calling the appropriate datamodel.
57
+
58
+ .. code-block:: python
59
+
60
+ # initialize a country object and retrieve the demographics attribute
61
+ >>> nl = the_datagarden_api.netherlands # or nl = the_datagarden_api.NL
62
+ >>> nl_demographics = nl.demographics()
63
+ TheDataGardenRegionalDataModel : Demographics : (count=5)
64
+
65
+ In this example the `nl_demographics` object holds 5 records. Each record contains demographic data for the Netherlands for a specific
66
+ period and period type combination. The data can be made accessible in a tabular format by converting the object to a pandas or polars dataframe.
67
+
68
+ .. code-block:: python
69
+
70
+ # convert demographics data to a polars dataframe
71
+ >>> dataframe = nl_demographics.full_model_to_polars()
72
+ >>> print(dataframe["period", "source_name", "data_model_name", "population.total", "population.total_male", "population.total_female"])
73
+
74
+ .. code-block:: text
75
+
76
+ ┌───────────────┬────────────┬─────────────────┬──────────────────┬───────────────────────┬─────────────────────────┐
77
+ │ period ┆ source_name┆ data_model_name ┆ population.total ┆ population.total_male ┆ population.total_female │
78
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
79
+ │ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 │
80
+ ╞═══════════════╪════════════╪═════════════════╪══════════════════╪═══════════════════════╪═════════════════════════╡
81
+ │ 2022-01-01T0Z ┆ Eurostat ┆ Demographics ┆ null ┆ 8.745468e6 ┆ 8.845204e6 │
82
+ │ 2022-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.7789347e7 ┆ 8.890013e6 ┆ 9.014408e6 │
83
+ │ 2023-01-01T0Z ┆ Eurostat ┆ Demographics ┆ null ┆ 8.850309e6 ┆ 8.960982e6 │
84
+ │ 2023-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.8019495e7 ┆ 8.986255e6 ┆ 9.106269e6 │
85
+ │ 2024-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.8165554e7 ┆ 9.055978e6 ┆ 9.172763e6 │
86
+ └───────────────┴────────────┴─────────────────┴──────────────────┴───────────────────────┴─────────────────────────┘
87
+
88
+ The demographics model holds lots of submodels and attributes. In this example only a limited number of attributes are listed
89
+ as the dataframe is way too large to display. For all models and their details see the model data documentation at
90
+ `The DataGarden Data Documentation <https://www.the-datagarden.io/data-docs>`_.
91
+
92
+ Getting started with the SDK
93
+ ----------------------------
94
+ You can start using the SDK out of the box by simply instatiating the TheDataGardenAPI object:
95
+
96
+ .. code-block:: python
97
+
98
+ # Starting with the datagarden API
99
+ >>> from the-datagarden import TheDataGardenAPI
100
+ >>> the_datagarden_api = TheDataGardenAPI()
101
+
102
+ .. code-block:: console
103
+
104
+ Welcome to The Data Garden API.
105
+
106
+ You can start using the API with an account from The-Datagarden.io.
107
+ Please provide your credentials or create a new account.
108
+ Check www.the-datagarden.io for more information.
109
+
110
+ Do you want to (1) create a new account or (2) provide existing credentials? Enter 1 or 2:
111
+
112
+
113
+ simply select 1 to create a new account.
114
+
115
+ .. code-block:: console
116
+
117
+ Enrolling in The Data Garden API...
118
+
119
+ Enter your email: <your-email>
120
+ Enter your password: <your-password>
121
+ Confirm your password: <your-password>
122
+
123
+ Successfully enrolled in The Data Garden API.
124
+ Initializing : TheDatagardenEnvironment
125
+ At: https://www.the-datagarden.io/
126
+
127
+ If you already have an account at the-datagarden.io, you can either select option 2 or directly provide your credentials
128
+ when creating the TheDataGardenAPI object:
129
+
130
+ .. code-block:: python
131
+
132
+ # Retrieve a country object from the datagarden API
133
+ >>> from the-datagarden import TheDataGardenAPI
134
+ >>> the_datagarden_api = TheDataGardenAPI(email='your-email@example.com', password='your-password')
135
+
136
+ .. code-block:: console
137
+
138
+ Initializing : TheDatagardenEnvironment
139
+ At: https://www.the-datagarden.io/
140
+
141
+ A 3rd way to initialize the SDK is adding your credentials to the ``.env`` file.
142
+
143
+
144
+ Getting your first data from The-DataGarden API
145
+ -----------------------------------------------
146
+ Now that you have initialized the SDK, you can start retrieving data from The-DataGarden API.
147
+ For example, you can retrieve the demographics data for the Netherlands:
148
+
149
+ .. code-block:: python
150
+
151
+ # initialize a country object and retrieve the demographics attribute
152
+ >>> nl = the_datagarden_api.netherlands
153
+ >>> nl_demographics = nl.demographics
154
+ TheDataGardenRegionalDataModel : Demographics : (count=0)
155
+
156
+ This creates a country object ``nl`` for the Netherlands, which serves as your gateway to all Netherlands-related
157
+ data and its regional subdivisions.
158
+
159
+ In this getting started section we will work with a demographics object retrieved from the `nl` country object.
160
+ As shown in the example, the ``nl_demographics`` object can be retrieved by simply calling the `demographics`
161
+ attribute on the `nl` country object
162
+
163
+ The `nl_demographics` object starts empty (count=0). To populate it with data, simply call it as a function:
164
+
165
+ .. code-block:: python
166
+
167
+ # Calling the demographics attribute will populate it with demographics data from the API
168
+ >>> nl_demographics()
169
+ >>> nl_demographics
170
+ TheDataGardenRegionalDataModel : Demographics : (count=5)
171
+
172
+ When called without parameters, the API returns data using default settings, which in this case yields 5 records.
173
+ You can customize your data retrieval by specifying parameters such as time periods, period types, and data sources.
174
+
175
+
176
+ The DataGarden Regional DataModel
177
+ ---------------------------------
178
+ When you retrieve data like ``nl_demographics``, you're working with a ``TheDataGardenRegionalDataModel`` object. This object acts as a container that holds:
179
+
180
+ 1. A collection of ``TheDataGardenRegionalDataRecord`` objects
181
+ 2. Metadata about the records (region, time period, data source, etc.)
182
+
183
+ You can easily transform this data into pandas or polars DataFrames for analysis. Here's an example showing population data for the Netherlands:
184
+
185
+ .. code-block:: python
186
+
187
+ >>> nl = the_datagarden_api.netherlands
188
+ >>> nl_demographics = nl.demographics(period_from="2010-01-01", source="united nations")
189
+ >>> # Convert to DataFrame, mapping 'population.total' to column name 'pop_count'
190
+ >>> df = nl_demographics.to_polars({"pop_count": "population.total"}) # or to_pandas(...)
191
+ >>> df["name", "source_name", "period", "data_model_name", "total"] # for readability only a limited number of columns are displayed
192
+ ┌─────────────┬────────────────┬─────────────────┬─────────────────┬─────────────┐
193
+ │ name ┆ source_name ┆ period ┆ data_model_name ┆ pop_count │
194
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
195
+ │ str ┆ str ┆ str ┆ str ┆ f64 │
196
+ ╞═════════════╪════════════════╪═════════════════╪═════════════════╪═════════════╡
197
+ │ Netherlands ┆ United Nations ┆ 2010-01-010:00Z ┆ Demographics ┆ 1.6729801e7 │
198
+ │ Netherlands ┆ United Nations ┆ 2011-01-010:00Z ┆ Demographics ┆ 1.6812669e7 │
199
+ │ … ┆ … ┆ … ┆ … ┆ … │
200
+ │ Netherlands ┆ United Nations ┆ 2023-01-010:00Z ┆ Demographics ┆ 1.8019495e7 │
201
+ │ Netherlands ┆ United Nations ┆ 2024-01-010:00Z ┆ Demographics ┆ 1.8165554e7 │
202
+ └─────────────┴────────────────┴─────────────────┴─────────────────┴─────────────┘
203
+
204
+ Each time you call the ``nl_demographics`` object with different parameters,
205
+ new demographic records for the specified subregions, periods, and/or sources are added to the existing ``nl_demographics`` object.
206
+ After you've gathered all the records you need, you can convert the entire collection into a dataframe for further analysis.
207
+
208
+
209
+ Retrieving GeoJSON data
210
+ -----------------------
211
+ Retrieving the GeoJSON for the Netherlands and its provinces is straightforward as well:
212
+
213
+ .. code-block:: python
214
+
215
+ >>> nl_geojson = nl.geojsons()
216
+ >>> nl_geojson
217
+ TheDataGardenRegionGeoJSONModel : GeoJSON : (count=1)
218
+ >>> nl_geojson(region_level=2) # Retrieve GeoJSON for 2nd regional level (provinces)
219
+ TheDataGardenRegionGeoJSONModel : GeoJSON : (count=13) # 12 provinces + 1 country
220
+ >>> df = nl_geojson.to_polars()
221
+ >>> df["name", "region_type", "local_region_code", "region_level", "feature"]
222
+ ┌───────────────┬─────────────┬───────────────┬──────────────┬────────────────────────┐
223
+ │ name ┆ region_type ┆ local_region_c┆ region_level ┆ feature │
224
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
225
+ │ str ┆ str ┆ str ┆ i64 ┆ struct[3] │
226
+ ╞═══════════════╪═════════════╪═══════════════╪══════════════╪════════════════════════╡
227
+ │ Netherlands ┆ country ┆ 528 ┆ 0 ┆ {"Feature",{"Netherland│
228
+ │ Drenthe ┆ province ┆ NL13 ┆ 2 ┆ {"Feature",{"Drenthe",2│
229
+ │ … ┆ … ┆ … ┆ … ┆ … │
230
+ │ Zuid-Holland ┆ province ┆ NL33 ┆ 2 ┆ {"Feature",{"Zuid-Holla│
231
+ └───────────────┴─────────────┴───────────────┴──────────────┴────────────────────────┘
232
+
233
+ For readability, the output only a limited number of dataframe columns are displayed.
234
+ Attributes in both the demographics and geojson dataframes are available to connect the geojson to
235
+ the demographics data. This allows you quickly make data sets that contain both demographics and geojson data
236
+ for further analysis or visualisation in map applications.
237
+
238
+
239
+ Read more
240
+ ---------
241
+
242
+ * `The DataGarden Website <https://www.the-datagarden.io>`_
243
+ * `API Documentation <https://www.the-datagarden.io/api-docs>`_
244
+ * `The Datagarden Models <https://www.the-datagarden.io/data-docs>`_
245
+ * `GitHub Repository <https://github.com/MaartendeRuyter/dg-the-datagarden>`_
246
+
247
+
248
+ Access to The DataGarden API
249
+ ----------------------------
250
+ To use the DataGarden SDK, you need access to the The DataGarden API. Simply register for free at https://www.the-datagarden.io
251
+ and you will have an inital free access account to the API with access to country and continent data.
252
+
253
+ Visit https://www.the-datagarden.io to register for free.