the-datagarden 0.1.0__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+ from datagarden_models import DataGardenModel, DatagardenModels, DataGardenSubModel, RegionalDataStats
4
+ from datagarden_models.models.base.legend import Legend
5
+ from pydantic import BaseModel
6
+
7
+ from the_datagarden.api.base import BaseApi
8
+
9
+ UNIQUE_FIELDS = [
10
+ "region_type",
11
+ "un_region_code",
12
+ "iso_cc_2",
13
+ "local_region_code",
14
+ "local_region_code_type",
15
+ "region_level",
16
+ "period",
17
+ "period_type",
18
+ "source_name",
19
+ ]
20
+ DEFAULT_COLUMNS_TO_EXCLUDE = [
21
+ "datagarden_model_version",
22
+ "name",
23
+ "region_type",
24
+ "un_region_code",
25
+ "iso_cc_2",
26
+ "local_region_code",
27
+ "local_region_code_type",
28
+ "parent_region_code",
29
+ "parent_region_code_type",
30
+ "parent_region_type",
31
+ "region_level",
32
+ "source_name",
33
+ "data_model_name",
34
+ "period",
35
+ "period_type",
36
+ ]
37
+
38
+
39
+ class RegionalDataRecord(BaseModel):
40
+ name: str | None = None
41
+ region_type: str | None = None
42
+ un_region_code: str | None = None
43
+ iso_cc_2: str | None = None
44
+ local_region_code: str | None = None
45
+ local_region_code_type: str | None = None
46
+ parent_region_code: str | None = None
47
+ parent_region_code_type: str | None = None
48
+ parent_region_type: str | None = None
49
+ region_level: int = 0
50
+ source_name: str | None = None
51
+ period: str | None = None
52
+ period_type: str | None = None
53
+ data_model_name: str | None = None
54
+ model: DataGardenSubModel
55
+
56
+ def record_hash(self) -> str:
57
+ hash_str = ".".join([str(getattr(self, key)) for key in sorted(UNIQUE_FIELDS)])
58
+ return str(hash(hash_str))
59
+
60
+ def __str__(self):
61
+ return (
62
+ f"RegionalDataRecord: {self.name} ({self.data_model_name} for {self.period}, {self.period_type})"
63
+ )
64
+
65
+ @property
66
+ def datgarden_model_class(self) -> type[DataGardenModel]:
67
+ return self.model.__class__
68
+
69
+ def record_for_sub_model(self, sub_model_name: str) -> "RegionalDataRecord":
70
+ if sub_model_name not in self.datgarden_model_class.legends().sub_model_names:
71
+ raise ValueError(f"Sub model `{sub_model_name}` not found in {self.datgarden_model_class}")
72
+ child_record = self.model_dump()
73
+ child_record["data_model_name"] = sub_model_name
74
+ child_record["model"] = getattr(self.model, sub_model_name)
75
+ return RegionalDataRecord(**child_record)
76
+
77
+
78
+ class TheDataGardenRegionalDataModel:
79
+ """
80
+ Model to hold response data from the The Data Garden API Regional Data endpoint.
81
+
82
+ The model hold a list of regional_data records containg a regional data model
83
+ for the region for a specific set op sources, periods and period types.
84
+
85
+ The data can be converted to Polars and Pandas dataframes by the following
86
+ methods:
87
+ - to_polars(model_convertors: dict | None = None) -> pl.DataFrame
88
+ model_convertors dict will be used to covert specifc model fields to dataframe
89
+ columns.
90
+ - full_model_to_polars() -> pl.DataFrame
91
+
92
+ For pandas dataframes you can use the same methods:
93
+ - to_pandas(model_convertors: dict | None = None) -> pd.DataFrame
94
+ - full_model_to_pandas() -> pd.DataFrame
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ api: "BaseApi",
100
+ model_name: str,
101
+ region_url: str,
102
+ meta_data: BaseModel,
103
+ is_sub_model: bool = False,
104
+ model: type[DataGardenSubModel] | None = None,
105
+ ):
106
+ self._api: BaseApi = api
107
+ self._model_name: str = model_name
108
+ self._region_url: str = region_url
109
+ self._request_params_hashes: list[str] = []
110
+ self._data_records: dict[str, RegionalDataRecord] = {}
111
+ self.meta_data: BaseModel = meta_data
112
+ self._model: DataGardenModel = model or getattr(DatagardenModels, model_name.upper())
113
+ self._is_sub_model: bool = is_sub_model
114
+
115
+ def __str__(self):
116
+ return f"TheDataGardenRegionalDataModel : {self._model_name} : (count={len(self._data_records)})"
117
+
118
+ def __repr__(self):
119
+ return self.__str__()
120
+
121
+ def __call__(self, **kwargs) -> "TheDataGardenRegionalDataModel":
122
+ if self._is_sub_model:
123
+ raise TypeError(
124
+ "Sub model data cannot be used to retrieve data. "
125
+ "Use the main model data object to make calls to The-Datagarden API"
126
+ )
127
+ request_hash = self.request_hash(**kwargs)
128
+ if request_hash not in self._request_params_hashes:
129
+ regional_data = self.regional_paginated_data_from_api(**kwargs)
130
+ if regional_data:
131
+ self.set_items(regional_data)
132
+ self._request_params_hashes.append(request_hash)
133
+ return self
134
+
135
+ def __getattr__(self, attribute: str) -> "TheDataGardenRegionalDataModel":
136
+ if attribute not in self._model.legends().sub_model_names:
137
+ raise ValueError(f"Attribute {attribute} is not a sub-model of {self._model_name}")
138
+ sub_model = getattr(self._model.legends(), attribute).model
139
+ regional_data_for_attribute = TheDataGardenRegionalDataModel(
140
+ api=self._api,
141
+ model_name=attribute,
142
+ region_url=self._region_url,
143
+ meta_data=self.meta_data,
144
+ is_sub_model=True,
145
+ model=sub_model,
146
+ )
147
+ regional_data_for_attribute._data_records = {
148
+ key: value.record_for_sub_model(attribute) for key, value in self._data_records.items()
149
+ }
150
+ return regional_data_for_attribute
151
+
152
+ @property
153
+ def model_attributes(self) -> list[str]:
154
+ return self._model.legends().attributes
155
+
156
+ def model_attribute_legend(self, attribute: str) -> Legend:
157
+ return getattr(self._model.legends(), attribute)
158
+
159
+ def request_hash(self, **kwargs) -> str:
160
+ sorted_items = sorted(kwargs.items())
161
+ hash_str = ",".join(f"{k}:{v}" for k, v in sorted_items)
162
+ return str(hash(hash_str))
163
+
164
+ def _response_has_next_page(self, model_data_resp: dict) -> bool:
165
+ pagination = model_data_resp.get("pagination", None)
166
+ if not pagination:
167
+ return False
168
+ return pagination.get("next_page", None) is not None
169
+
170
+ def _next_page_pagination(self, model_data_resp: dict) -> dict | None:
171
+ pagination = model_data_resp.pop("pagination", None)
172
+ if not pagination:
173
+ return None
174
+ next_page = pagination.get("next_page", None)
175
+ if not next_page:
176
+ return None
177
+ return {"page": next_page}
178
+
179
+ def regional_paginated_data_from_api(self, **kwargs) -> dict:
180
+ model_data_resp = self.regional_data_from_api(**kwargs)
181
+ if not model_data_resp:
182
+ return {}
183
+ while self._response_has_next_page(model_data_resp):
184
+ next_page_pagination = self._next_page_pagination(model_data_resp)
185
+ if next_page_pagination:
186
+ next_page_resp = self.regional_data_from_api(pagination=next_page_pagination, **kwargs)
187
+ if next_page_resp:
188
+ model_data_resp["data_by_region"].extend(next_page_resp["data_by_region"])
189
+ model_data_resp["pagination"] = next_page_resp["pagination"]
190
+
191
+ return model_data_resp
192
+
193
+ def regional_data_from_api(self, **kwargs) -> dict:
194
+ model_data_resp = self._api.retrieve_from_api(
195
+ url_extension=self._region_url + "regional_data/",
196
+ method="POST",
197
+ payload={"model": self._model_name, **kwargs},
198
+ )
199
+ if model_data_resp:
200
+ return model_data_resp.json()
201
+ return {}
202
+
203
+ def set_items(self, data: dict):
204
+ for regional_data in data["data_by_region"]:
205
+ base_items = {
206
+ "name": regional_data.get("region_name", None),
207
+ "region_type": regional_data.get("region_type", None),
208
+ "un_region_code": regional_data.get("un_region_code", None),
209
+ "iso_cc_2": regional_data.get("iso_cc_2", None),
210
+ "local_region_code": regional_data.get("local_region_code", None),
211
+ "local_region_code_type": regional_data.get("local_region_code_type", None),
212
+ "parent_region_code": regional_data.get("parent_region_code", None),
213
+ "parent_region_code_type": regional_data.get("parent_region_code_type", None),
214
+ "parent_region_type": regional_data.get("parent_region_type", None),
215
+ "region_level": regional_data.get("region_level", 0),
216
+ }
217
+ data_for_region = regional_data["data_objects_for_region"]
218
+ data_records = [
219
+ RegionalDataRecord(**base_items, **self._record_items(data_obj))
220
+ for data_obj in data_for_region
221
+ ]
222
+ for data_record in data_records:
223
+ self._data_records.update({data_record.record_hash(): data_record})
224
+
225
+ if self._data_records:
226
+ _, first_record = list(self._data_records.items())[0]
227
+ model_name = first_record.data_model_name
228
+ if not model_name:
229
+ raise ValueError("data_model_name is required")
230
+ self._model_name = model_name
231
+
232
+ def _record_items(self, data: dict):
233
+ model_name = data.get("data_type", None)
234
+ if not model_name:
235
+ raise ValueError("data_model_name is required")
236
+
237
+ model = getattr(DatagardenModels, model_name.upper())
238
+ if not model:
239
+ raise ValueError(f"model {model_name} not found in DatagardenModels")
240
+ return {
241
+ "source_name": data.get("source_name", None),
242
+ "period": data.get("period", None),
243
+ "period_type": data.get("period_type", None),
244
+ "data_model_name": data.get("data_type", None),
245
+ "model": model(**data.get("data", {})),
246
+ }
247
+
248
+ def to_polars(self, model_convertors: dict | None = None) -> pl.DataFrame:
249
+ """
250
+ Convert the data to a polars dataframe using a dictionary of model attributes to convert to columns
251
+ """
252
+ model_convertors = model_convertors or {}
253
+ converted_records = []
254
+ for record in self._data_records.values():
255
+ model = record.model
256
+ record_dict = record.model_dump()
257
+ record_dict.pop("model")
258
+
259
+ for new_col, model_attr in model_convertors.items():
260
+ # Handle nested attributes using split by dots
261
+ model_attr_flatten = "__flatten" in model_attr
262
+ model_attr = model_attr.replace("__flatten", "")
263
+ attrs = model_attr.split(".")
264
+ value = getattr(model, attrs[0])
265
+ for attr in attrs[1:]:
266
+ value = getattr(value, attr, None)
267
+ if not value:
268
+ continue
269
+ if model_attr_flatten:
270
+ model_data = value.model_dump() if isinstance(value, BaseModel) else value
271
+ record_dict.update(self.flatten_dict(model_data, {}))
272
+ else:
273
+ record_dict[new_col] = value
274
+ converted_records.append(record_dict)
275
+ return pl.from_records(converted_records)
276
+
277
+ def flatten_dict(self, dict_to_flatten: dict, flattened_dict: dict, prefix: str = "") -> dict:
278
+ for key, value in dict_to_flatten.items():
279
+ new_key = f"{prefix}.{key}" if prefix else key
280
+ if isinstance(value, dict):
281
+ flattened_dict.update(self.flatten_dict(value, flattened_dict, new_key))
282
+ else:
283
+ flattened_dict[new_key] = value
284
+
285
+ return flattened_dict
286
+
287
+ def full_model_to_polars(self):
288
+ """
289
+ Convert the data to a polars dataframe, flattening all nested dictionaries
290
+ """
291
+ converted_records = []
292
+ for record in self._data_records.values():
293
+ # Get all fields from the record excluding the modeL
294
+ record_dict = record.model_dump(exclude={"model"})
295
+ # Model data is added as flattened dictionary
296
+ model_data = record.model.model_dump()
297
+ flattened_dict = self.flatten_dict(model_data, {})
298
+ record_dict.update(flattened_dict)
299
+ converted_records.append(record_dict)
300
+ return pl.from_records(converted_records)
301
+
302
+ def to_pandas(self, model_convertors: dict | None = None) -> pd.DataFrame:
303
+ """
304
+ Convert the data to a pandas dataframe using a dictionary of model attributes to convert to columns
305
+ """
306
+ return self.to_polars(model_convertors).to_pandas()
307
+
308
+ def full_model_to_pandas(self) -> pd.DataFrame:
309
+ """
310
+ Convert the data to a pandas dataframe, flattening all nested dictionaries
311
+ """
312
+ return self.full_model_to_polars().to_pandas()
313
+
314
+ def __iter__(self):
315
+ """Makes the class iterable over the values in _data_records"""
316
+ return iter(self._data_records.values())
317
+
318
+ def __len__(self):
319
+ """Returns the number of records"""
320
+ return len(self._data_records)
321
+
322
+ @property
323
+ def data_records(self) -> list[RegionalDataRecord]:
324
+ return list(self._data_records.values())
325
+
326
+ def regional_availability(self) -> dict[str, RegionalDataStats | None]:
327
+ availability_per_region = self.meta_data.statistics_for_data_model(model_name=self._model_name)
328
+ regional_availability = {}
329
+ for region_type in self.meta_data.region_types:
330
+ if region_type in availability_per_region.keys():
331
+ regional_availability[region_type] = availability_per_region[region_type]
332
+ else:
333
+ regional_availability[region_type] = None
334
+ return regional_availability
335
+
336
+ @property
337
+ def regions_with_model_data(self) -> list[str]:
338
+ return [region for region in self.regional_availability() if self.regional_availability()[region]]
339
+
340
+ def show_summary(self):
341
+ """
342
+ Outputs a summary of the model's structure (submodels and attributes)
343
+ """
344
+ self._model.legends().show_summary()
345
+
346
+ def summary(self) -> dict:
347
+ """
348
+ return model's structure (submodels and attributes)
349
+ """
350
+ return self._model.legends().summary()
351
+
352
+ def describe(
353
+ self,
354
+ include_attributes: list[str] | None = None,
355
+ exclude_attributes: list[str] | None = None,
356
+ filter_expr: pl.Expr | None = None,
357
+ ) -> pl.DataFrame:
358
+ df = self.full_model_to_polars()
359
+ if df.is_empty():
360
+ raise ValueError("No data loaded for this model. Data is needed to describe the model.")
361
+
362
+ if filter_expr is not None:
363
+ df = df.filter(filter_expr)
364
+
365
+ if include_attributes:
366
+ return df.select(include_attributes).describe()
367
+
368
+ attributes_to_exclude = DEFAULT_COLUMNS_TO_EXCLUDE.copy()
369
+ if exclude_attributes:
370
+ attributes_to_exclude.extend(exclude_attributes)
371
+ return df.select([col for col in df.columns if col not in attributes_to_exclude]).describe()
372
+
373
+ def data_availability_per_attribute(
374
+ self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
375
+ ):
376
+ if include_attributes:
377
+ describe_df = self.describe(include_attributes=include_attributes, filter_expr=filter_expr)
378
+ else:
379
+ describe_df = self.describe(
380
+ exclude_attributes=DEFAULT_COLUMNS_TO_EXCLUDE, filter_expr=filter_expr
381
+ )
382
+
383
+ describe_df = describe_df.with_columns(
384
+ pl.when(pl.col("statistic").is_in(["count", "null_count"]))
385
+ .then(pl.all().exclude("statistic").cast(pl.Int64))
386
+ .otherwise(pl.all().exclude("statistic"))
387
+ )
388
+
389
+ return describe_df
390
+
391
+ def show_data_availability_per_attribute(
392
+ self, include_attributes: list[str] | None = None, filter_expr: pl.Expr | None = None
393
+ ):
394
+ describe_df = self.data_availability_per_attribute(include_attributes, filter_expr)
395
+ stats_by_column = {
396
+ column: dict(
397
+ zip(describe_df.get_column("statistic"), describe_df.get_column(column), strict=True)
398
+ )
399
+ for column in describe_df.columns
400
+ if column != "statistic"
401
+ }
402
+
403
+ max_column_length = max(len(column) for column in stats_by_column.keys())
404
+
405
+ for column, stats in stats_by_column.items():
406
+ print(
407
+ f"{column} : {" " * (max_column_length + 3 - len(column))}"
408
+ f"{int(stats['count'] + stats['null_count'])}"
409
+ f" of which with data: {int(stats['count'])} "
410
+ f"({int(stats['count']) / (int(stats['count'] + stats['null_count'])) * 100:.0f}%)"
411
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,253 @@
1
+ Metadata-Version: 2.1
2
+ Name: the-datagarden
3
+ Version: 1.2.3
4
+ Summary: Public data made easy.
5
+ Author-email: Maarten de Ruyter <info@the-datagarden.io>
6
+ License: MIT
7
+ Project-URL: Read the Docs, https://dg-the-datagarden.readthedocs.io/en/stable/
8
+ Project-URL: The-DataGarden, https://www.the-datagarden.io/
9
+ Project-URL: API documentation, https://www.the-datagarden.io/api-docs
10
+ Project-URL: Source, https://github.com/the-datagarden/the-datagarden
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Development Status :: 4 - Beta
19
+ Classifier: Intended Audience :: Developers
20
+ Classifier: Intended Audience :: Financial and Insurance Industry
21
+ Classifier: Intended Audience :: Science/Research
22
+ Classifier: Intended Audience :: Healthcare Industry
23
+ Classifier: Topic :: Scientific/Engineering :: GIS
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
+ Classifier: Topic :: Scientific/Engineering :: Visualization
27
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
+ Classifier: Topic :: Utilities
29
+ Requires-Python: >=3.10
30
+ Description-Content-Type: text/x-rst
31
+ Requires-Dist: click>=8.1.7
32
+ Requires-Dist: pandas>=2.2.3
33
+ Requires-Dist: polars>=1.15.0
34
+ Requires-Dist: pydantic>=2.9.2
35
+ Requires-Dist: pyjwt>=2.10.0
36
+ Requires-Dist: python-decouple>=3.8
37
+ Requires-Dist: requests>=2.32.3
38
+ Requires-Dist: the-datagarden-models>=1.6.3
39
+
40
+ ==================
41
+ the-datagarden SDK
42
+ ==================
43
+
44
+ The-datagarden package is a Python SDK built on top of The-DataGarden API. The SDK provides easy access to continent and country regional hierarchies,
45
+ as well as public data related to these regions. All data from The-DataGarden API is stored in normalized datamodels like ``Demographics``, ``Health``
46
+ or ``Economics``. This allows you as a data professional to create value from this data without having to worry about the (varying) data structure and
47
+ api's from the sources.
48
+
49
+ Additionally, The-DataGarden API also provides country and regional GeoJSONs. The SDK makes is easy for you to combine public data abd you own data and merge them into
50
+ geosjon Feature collections, making geographic visualisation easy.
51
+
52
+
53
+ The-DataGarden SDK main use case
54
+ --------------------------------
55
+ The SDK is designed to make it easy to access and work with the DataGarden data. After initializing the SDK you simply
56
+ retrieve data for a specific continent, country or subregion by calling the appropriate datamodel.
57
+
58
+ .. code-block:: python
59
+
60
+ # initialize a country object and retrieve the demographics attribute
61
+ >>> nl = the_datagarden_api.netherlands # or nl = the_datagarden_api.NL
62
+ >>> nl_demographics = nl.demographics()
63
+ TheDataGardenRegionalDataModel : Demographics : (count=5)
64
+
65
+ In this example the `nl_demographics` object holds 5 records. Each record contains demographic data for the Netherlands for a specific
66
+ period and period type combination. The data can be made accessible in a tabular format by converting the object to a pandas or polars dataframe.
67
+
68
+ .. code-block:: python
69
+
70
+ # convert demographics data to a polars dataframe
71
+ >>> dataframe = nl_demographics.full_model_to_polars()
72
+ >>> print(dataframe["period", "source_name", "data_model_name", "population.total", "population.total_male", "population.total_female"])
73
+
74
+ .. code-block:: text
75
+
76
+ ┌───────────────┬────────────┬─────────────────┬──────────────────┬───────────────────────┬─────────────────────────┐
77
+ │ period ┆ source_name┆ data_model_name ┆ population.total ┆ population.total_male ┆ population.total_female │
78
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
79
+ │ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 │
80
+ ╞═══════════════╪════════════╪═════════════════╪══════════════════╪═══════════════════════╪═════════════════════════╡
81
+ │ 2022-01-01T0Z ┆ Eurostat ┆ Demographics ┆ null ┆ 8.745468e6 ┆ 8.845204e6 │
82
+ │ 2022-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.7789347e7 ┆ 8.890013e6 ┆ 9.014408e6 │
83
+ │ 2023-01-01T0Z ┆ Eurostat ┆ Demographics ┆ null ┆ 8.850309e6 ┆ 8.960982e6 │
84
+ │ 2023-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.8019495e7 ┆ 8.986255e6 ┆ 9.106269e6 │
85
+ │ 2024-01-01T0Z ┆ United Nat ┆ Demographics ┆ 1.8165554e7 ┆ 9.055978e6 ┆ 9.172763e6 │
86
+ └───────────────┴────────────┴─────────────────┴──────────────────┴───────────────────────┴─────────────────────────┘
87
+
88
+ The demographics model holds lots of submodels and attributes. In this example only a limited number of attributes are listed
89
+ as the dataframe is way too large to display. For all models and their details see the model data documentation at
90
+ `The DataGarden Data Documentation <https://www.the-datagarden.io/data-docs>`_.
91
+
92
+ Getting started with the SDK
93
+ ----------------------------
94
+ You can start using the SDK out of the box by simply instatiating the TheDataGardenAPI object:
95
+
96
+ .. code-block:: python
97
+
98
+ # Starting with the datagarden API
99
+ >>> from the-datagarden import TheDataGardenAPI
100
+ >>> the_datagarden_api = TheDataGardenAPI()
101
+
102
+ .. code-block:: console
103
+
104
+ Welcome to The Data Garden API.
105
+
106
+ You can start using the API with an account from The-Datagarden.io.
107
+ Please provide your credentials or create a new account.
108
+ Check www.the-datagarden.io for more information.
109
+
110
+ Do you want to (1) create a new account or (2) provide existing credentials? Enter 1 or 2:
111
+
112
+
113
+ simply select 1 to create a new account.
114
+
115
+ .. code-block:: console
116
+
117
+ Enrolling in The Data Garden API...
118
+
119
+ Enter your email: <your-email>
120
+ Enter your password: <your-password>
121
+ Confirm your password: <your-password>
122
+
123
+ Successfully enrolled in The Data Garden API.
124
+ Initializing : TheDatagardenEnvironment
125
+ At: https://www.the-datagarden.io/
126
+
127
+ If you already have an account at the-datagarden.io, you can either select option 2 or directly provide your credentials
128
+ when creating the TheDataGardenAPI object:
129
+
130
+ .. code-block:: python
131
+
132
+ # Retrieve a country object from the datagarden API
133
+ >>> from the-datagarden import TheDataGardenAPI
134
+ >>> the_datagarden_api = TheDataGardenAPI(email='your-email@example.com', password='your-password')
135
+
136
+ .. code-block:: console
137
+
138
+ Initializing : TheDatagardenEnvironment
139
+ At: https://www.the-datagarden.io/
140
+
141
+ A 3rd way to initialize the SDK is adding your credentials to the ``.env`` file.
142
+
143
+
144
+ Getting your first data from The-DataGarden API
145
+ -----------------------------------------------
146
+ Now that you have initialized the SDK, you can start retrieving data from The-DataGarden API.
147
+ For example, you can retrieve the demographics data for the Netherlands:
148
+
149
+ .. code-block:: python
150
+
151
+ # initialize a country object and retrieve the demographics attribute
152
+ >>> nl = the_datagarden_api.netherlands
153
+ >>> nl_demographics = nl.demographics
154
+ TheDataGardenRegionalDataModel : Demographics : (count=0)
155
+
156
+ This creates a country object ``nl`` for the Netherlands, which serves as your gateway to all Netherlands-related
157
+ data and its regional subdivisions.
158
+
159
+ In this getting started section we will work with a demographics object retrieved from the `nl` country object.
160
+ As shown in the example, the ``nl_demographics`` object can be retrieved by simply calling the `demographics`
161
+ attribute on the `nl` country object
162
+
163
+ The `nl_demographics` object starts empty (count=0). To populate it with data, simply call it as a function:
164
+
165
+ .. code-block:: python
166
+
167
+ # Calling the demographics attribute will populate it with demographics data from the API
168
+ >>> nl_demographics()
169
+ >>> nl_demographics
170
+ TheDataGardenRegionalDataModel : Demographics : (count=5)
171
+
172
+ When called without parameters, the API returns data using default settings, which in this case yields 5 records.
173
+ You can customize your data retrieval by specifying parameters such as time periods, period types, and data sources.
174
+
175
+
176
+ The DataGarden Regional DataModel
177
+ ---------------------------------
178
+ When you retrieve data like ``nl_demographics``, you're working with a ``TheDataGardenRegionalDataModel`` object. This object acts as a container that holds:
179
+
180
+ 1. A collection of ``TheDataGardenRegionalDataRecord`` objects
181
+ 2. Metadata about the records (region, time period, data source, etc.)
182
+
183
+ You can easily transform this data into pandas or polars DataFrames for analysis. Here's an example showing population data for the Netherlands:
184
+
185
+ .. code-block:: python
186
+
187
+ >>> nl = the_datagarden_api.netherlands
188
+ >>> nl_demographics = nl.demographics(period_from="2010-01-01", source="united nations")
189
+ >>> # Convert to DataFrame, mapping 'population.total' to column name 'pop_count'
190
+ >>> df = nl_demographics.to_polars({"pop_count": "population.total"}) # or to_pandas(...)
191
+ >>> df["name", "source_name", "period", "data_model_name", "total"] # for readability only a limited number of columns are displayed
192
+ ┌─────────────┬────────────────┬─────────────────┬─────────────────┬─────────────┐
193
+ │ name ┆ source_name ┆ period ┆ data_model_name ┆ pop_count │
194
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
195
+ │ str ┆ str ┆ str ┆ str ┆ f64 │
196
+ ╞═════════════╪════════════════╪═════════════════╪═════════════════╪═════════════╡
197
+ │ Netherlands ┆ United Nations ┆ 2010-01-010:00Z ┆ Demographics ┆ 1.6729801e7 │
198
+ │ Netherlands ┆ United Nations ┆ 2011-01-010:00Z ┆ Demographics ┆ 1.6812669e7 │
199
+ │ … ┆ … ┆ … ┆ … ┆ … │
200
+ │ Netherlands ┆ United Nations ┆ 2023-01-010:00Z ┆ Demographics ┆ 1.8019495e7 │
201
+ │ Netherlands ┆ United Nations ┆ 2024-01-010:00Z ┆ Demographics ┆ 1.8165554e7 │
202
+ └─────────────┴────────────────┴─────────────────┴─────────────────┴─────────────┘
203
+
204
+ Each time you call the ``nl_demographics`` object with different parameters,
205
+ new demographic records for the specified subregions, periods, and/or sources are added to the existing ``nl_demographics`` object.
206
+ After you've gathered all the records you need, you can convert the entire collection into a dataframe for further analysis.
207
+
208
+
209
+ Retrieving GeoJSON data
210
+ -----------------------
211
+ Retrieving the GeoJSON for the Netherlands and its provinces is straightforward as well:
212
+
213
+ .. code-block:: python
214
+
215
+ >>> nl_geojson = nl.geojsons()
216
+ >>> nl_geojson
217
+ TheDataGardenRegionGeoJSONModel : GeoJSON : (count=1)
218
+ >>> nl_geojson(region_level=2) # Retrieve GeoJSON for 2nd regional level (provinces)
219
+ TheDataGardenRegionGeoJSONModel : GeoJSON : (count=13) # 12 provinces + 1 country
220
+ >>> df = nl_geojson.to_polars()
221
+ >>> df["name", "region_type", "local_region_code", "region_level", "feature"]
222
+ ┌───────────────┬─────────────┬───────────────┬──────────────┬────────────────────────┐
223
+ │ name ┆ region_type ┆ local_region_c┆ region_level ┆ feature │
224
+ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
225
+ │ str ┆ str ┆ str ┆ i64 ┆ struct[3] │
226
+ ╞═══════════════╪═════════════╪═══════════════╪══════════════╪════════════════════════╡
227
+ │ Netherlands ┆ country ┆ 528 ┆ 0 ┆ {"Feature",{"Netherland│
228
+ │ Drenthe ┆ province ┆ NL13 ┆ 2 ┆ {"Feature",{"Drenthe",2│
229
+ │ … ┆ … ┆ … ┆ … ┆ … │
230
+ │ Zuid-Holland ┆ province ┆ NL33 ┆ 2 ┆ {"Feature",{"Zuid-Holla│
231
+ └───────────────┴─────────────┴───────────────┴──────────────┴────────────────────────┘
232
+
233
+ For readability, the output only a limited number of dataframe columns are displayed.
234
+ Attributes in both the demographics and geojson dataframes are available to connect the geojson to
235
+ the demographics data. This allows you quickly make data sets that contain both demographics and geojson data
236
+ for further analysis or visualisation in map applications.
237
+
238
+
239
+ Read more
240
+ ---------
241
+
242
+ * `The DataGarden Website <https://www.the-datagarden.io>`_
243
+ * `API Documentation <https://www.the-datagarden.io/api-docs>`_
244
+ * `The Datagarden Models <https://www.the-datagarden.io/data-docs>`_
245
+ * `GitHub Repository <https://github.com/MaartendeRuyter/dg-the-datagarden>`_
246
+
247
+
248
+ Access to The DataGarden API
249
+ ----------------------------
250
+ To use the DataGarden SDK, you need access to the The DataGarden API. Simply register for free at https://www.the-datagarden.io
251
+ and you will have an inital free access account to the API with access to country and continent data.
252
+
253
+ Visit https://www.the-datagarden.io to register for free.