tukan-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tukan-python might be problematic. Click here for more details.

File without changes
tukan_python/query.py ADDED
@@ -0,0 +1,515 @@
1
+ from collections import defaultdict
2
+ from typing import Literal, Optional
3
+
4
+ import pandas as pd
5
+
6
+ from tukan_python.tukan import Tukan
7
+
8
+
9
+ class Query:
10
+ """
11
+ Helper class for building and executing queries against the TukanMX API.
12
+
13
+ This class allows flexible construction of query payloads, including filters,
14
+ groupings, and aggregations. It supports saving, executing, and reconstructing
15
+ queries from names, IDs, or payloads.
16
+
17
+ Attributes:
18
+ Tukan (Tukan): Instance of the Tukan API client.
19
+ table_name (str): Name of the data table to query.
20
+ where (list[dict]): List of filter conditions.
21
+ group_by (list[dict]): List of group-by conditions.
22
+ aggregate (list[dict]): List of aggregate operations.
23
+ language (str): Language for the query results.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ table_name: str,
29
+ token: Optional[str] = None,
30
+ where: Optional[list[dict]] = None,
31
+ group_by: Optional[list[dict]] = None,
32
+ aggregate: Optional[list[dict]] = None,
33
+ language: str = "en",
34
+ ):
35
+ """
36
+ Initialize a new Query instance.
37
+
38
+ Args:
39
+ table_name (str): Name of the data table to query.
40
+ token (Optional[str]): API token for authentication.
41
+ where (Optional[list[dict]]): List of filter conditions.
42
+ group_by (Optional[list[dict]]): List of group-by conditions.
43
+ aggregate (Optional[list[dict]]): List of aggregate operations.
44
+ language (str): Language for the query results.
45
+ """
46
+ self.table_name = table_name
47
+ self.Tukan = Tukan(token)
48
+ self.__set_metadata__()
49
+ self.where = where if where is not None else []
50
+ self.group_by = group_by if group_by is not None else []
51
+ self.aggregate = aggregate if aggregate is not None else []
52
+ self.language = language
53
+
54
+ def __set_metadata__(self) -> dict:
55
+ """
56
+ Get the metadata for the table.
57
+
58
+ Returns:
59
+ dict: Table metadata.
60
+ """
61
+ meta = self.Tukan.get_table_metadata(self.table_name)
62
+ dt_to_refs = defaultdict(set)
63
+ for ref in meta["data_table_references"]:
64
+ dt_to_refs[ref["type"]].add(ref["id"])
65
+
66
+ self.dtypes_to_refs = dt_to_refs
67
+ self.all_indicators = meta["indicators"]
68
+
69
+ def get_table_name(self) -> str:
70
+ """
71
+ Get the name of the table for the query.
72
+
73
+ Returns:
74
+ str: The name of the table.
75
+ """
76
+ return self.table_name
77
+
78
+ def set_where(self, where: list[dict]) -> None:
79
+ """
80
+ Set the filter conditions for the query.
81
+
82
+ Args:
83
+ where (list[dict]): List of filter conditions.
84
+ """
85
+ self.where = where
86
+ return self
87
+
88
+ def get_where(self) -> list[dict]:
89
+ """
90
+ Get the filter conditions for the query.
91
+
92
+ Returns:
93
+ list[dict]: List of filter conditions.
94
+ """
95
+ return self.where
96
+
97
+ def add_filter(self, filter: dict) -> None:
98
+ """
99
+ Add a filter condition to the query.
100
+
101
+ Args:
102
+ filter (dict): Filter condition to add.
103
+ """
104
+ self.where.append(filter)
105
+ return self
106
+
107
+ def add_date_filter(
108
+ self, reference: str, date_from: str, date_to: Optional[str] = None
109
+ ) -> None:
110
+ """
111
+ Add a date filter to the query. Dates should be in ISO format (YYYY-MM-DD).
112
+
113
+ Args:
114
+ reference (str): Reference field for the date filter.
115
+ date_from (str): Start date for the filter.
116
+ date_to (Optional[str]): End date for the filter (optional).
117
+ """
118
+ dt_filter = {"reference": reference, "from": date_from}
119
+ if date_to is not None:
120
+ dt_filter["to"] = date_to
121
+ self.where.append(dt_filter)
122
+ return self
123
+
124
+ def add_numeric_filter(
125
+ self,
126
+ reference: str,
127
+ lte: Optional[float] = None,
128
+ eq: Optional[float] = None,
129
+ gte: Optional[float] = None,
130
+ ) -> None:
131
+ """
132
+ Add a numeric filter to the query.
133
+
134
+ Args:
135
+ reference (str): Reference field for the numeric filter.
136
+ lte (Optional[float]): Less-than-or-equal value.
137
+ eq (Optional[float]): Equal value.
138
+ gte (Optional[float]): Greater-than-or-equal value.
139
+ """
140
+ self.__validate_numeric_filter__(lte, eq, gte)
141
+ nm_filter = {"reference": reference, "lte": lte, "eq": eq, "gte": gte}
142
+ nm_filter = {k: v for k, v in nm_filter.items() if v is not None}
143
+ self.where.append(nm_filter)
144
+ return self
145
+
146
+ def __validate_numeric_filter__(
147
+ self,
148
+ lte: Optional[float] = None,
149
+ eq: Optional[float] = None,
150
+ gte: Optional[float] = None,
151
+ ) -> None:
152
+ """
153
+ Validate the numeric filter arguments.
154
+
155
+ Args:
156
+ lte (Optional[float]): Less-than-or-equal value.
157
+ eq (Optional[float]): Equal value.
158
+ gte (Optional[float]): Greater-than-or-equal value.
159
+
160
+ Raises:
161
+ ValueError: If the filter arguments are invalid.
162
+ """
163
+ if eq is None and lte is None and gte is None:
164
+ raise ValueError("At least one of eq, lte, or gte must be specified")
165
+ elif eq is not None and (lte is not None or gte is not None):
166
+ raise ValueError("The eq parameter cannot be used with lte or gte")
167
+
168
+ def add_standard_filter(self, reference: str, value: list[str]) -> None:
169
+ """
170
+ Add a standard (categorical) filter to the query.
171
+
172
+ Args:
173
+ reference (str): Reference field for the filter.
174
+ value (list[str]): List of values to filter by.
175
+ """
176
+ self.where.append({"reference": reference, "value": value})
177
+ return self
178
+
179
+ def set_group_by(self, group_by: list[dict]) -> None:
180
+ """
181
+ Set the group-by conditions for the query.
182
+
183
+ Args:
184
+ group_by (list[dict]): List of group-by conditions.
185
+ """
186
+ self.group_by = group_by
187
+ return self
188
+
189
+ def get_group_by(self) -> list[dict]:
190
+ """
191
+ Get the group-by conditions for the query.
192
+
193
+ Returns:
194
+ list[dict]: List of group-by conditions.
195
+ """
196
+ return self.group_by
197
+
198
+ def add_to_group_by(self, group_by: dict) -> None:
199
+ """
200
+ Add a group-by condition to the query.
201
+
202
+ Args:
203
+ group_by (dict): Group-by condition to add.
204
+ """
205
+ self.group_by.append(group_by)
206
+ return self
207
+
208
+ def add_non_date_reference_to_group_by(self, reference: str) -> None:
209
+ """
210
+ Add a non-date reference to the group-by conditions.
211
+
212
+ Args:
213
+ reference (str): Reference field to group by.
214
+ """
215
+ self.group_by.append({"reference": reference})
216
+ return self
217
+
218
+ def add_date_reference_to_group_by(
219
+ self,
220
+ reference: str,
221
+ level: Literal["yearly", "quarterly", "monthly", "as_is"] = "as_is",
222
+ ) -> None:
223
+ """
224
+ Add a date reference to the group-by conditions with a specified granularity.
225
+
226
+ Args:
227
+ reference (str): Reference field to group by.
228
+ level (Literal): Granularity level ('yearly', 'quarterly', 'monthly', 'as_is').
229
+ """
230
+ self.__validate_date_filter__(level)
231
+ dt_filter = {"reference": reference, "level": level}
232
+ self.group_by.append(dt_filter)
233
+ return self
234
+
235
+ def __validate_date_filter__(
236
+ self, level: Literal["yearly", "quarterly", "monthly", "as_is"]
237
+ ) -> None:
238
+ """
239
+ Validate the date filter granularity level.
240
+
241
+ Args:
242
+ level (Literal): Granularity level to validate.
243
+
244
+ Raises:
245
+ ValueError: If the level is invalid.
246
+ """
247
+ if level not in {"yearly", "quarterly", "monthly", "as_is"}:
248
+ raise ValueError(
249
+ "Invalid level. Must be 'yearly', 'quarterly', 'monthly', or 'as_is'"
250
+ )
251
+
252
+ def set_aggregate(self, aggregate: list[dict]) -> None:
253
+ """
254
+ Set the aggregate operations for the query.
255
+
256
+ Args:
257
+ aggregate (list[dict]): List of aggregate operations.
258
+ """
259
+ self.aggregate = aggregate
260
+ return self
261
+
262
+ def get_aggregate(self) -> list[dict]:
263
+ """
264
+ Get the aggregate operations for the query.
265
+
266
+ Returns:
267
+ list[dict]: List of aggregate operations.
268
+ """
269
+ return self.aggregate
270
+
271
+ def add_aggregate(self, indicator: str, operations: list[str]) -> None:
272
+ """
273
+ Add an aggregate operation for a specific indicator.
274
+
275
+ Args:
276
+ indicator (str): Indicator to aggregate.
277
+ operations (list[str]): List of operations (e.g., ['sum', 'avg', 'identity']).
278
+ """
279
+ self.__validate_aggregate__(operations)
280
+ self.aggregate.append({"indicator": indicator, "operations": operations})
281
+ return self
282
+
283
+ def __validate_aggregate__(self, operations: list[str]) -> None:
284
+ """
285
+ Validate the aggregate operations.
286
+
287
+ Args:
288
+ operations (list[str]): List of aggregate operations.
289
+
290
+ Raises:
291
+ ValueError: If operations are empty or invalid.
292
+ """
293
+ if len(operations) == 0:
294
+ raise ValueError("At least one operation must be specified")
295
+ elif {*operations} - {"sum", "avg", "identity"}:
296
+ raise ValueError("Invalid operation. Must be 'sum', 'avg', or 'identity'")
297
+
298
+ def set_language(self, language: str) -> None:
299
+ """
300
+ Set the language for the query results.
301
+
302
+ Args:
303
+ language (str): The language code (e.g., 'en', 'es').
304
+ """
305
+ self.language = language
306
+ return self
307
+
308
+ def get_language(self) -> str:
309
+ """
310
+ Get the language for the query results.
311
+
312
+ Returns:
313
+ str: The language code.
314
+ """
315
+ return self.language
316
+
317
+ def __get_select__(self) -> list[dict]:
318
+ """
319
+ Get the select clause for the query.
320
+
321
+ Returns:
322
+ list[dict]: List containing the table and indicators to select.
323
+ """
324
+ indicators = [x["indicator"] for x in self.aggregate]
325
+ return [{"table": self.table_name, "indicators": indicators}]
326
+
327
+ def __get_iterate__(self) -> list[dict]:
328
+ """
329
+ Get the iterate clause for the query.
330
+
331
+ Returns:
332
+ list[dict]: List containing group-by and aggregate operations.
333
+ """
334
+ return [{"group_by": self.group_by, "aggregate": self.aggregate}]
335
+
336
+ def __str__(self) -> str:
337
+ """
338
+ Return the string representation of the query payload.
339
+
340
+ Returns:
341
+ str: Stringified query payload.
342
+ """
343
+ payload_info = {
344
+ "table_name": self.table_name,
345
+ "language": self.language,
346
+ "where": self.where,
347
+ "group_by": self.group_by,
348
+ "aggregate": self.aggregate,
349
+ }
350
+ return str(payload_info)
351
+
352
+ def __request_payload__(self) -> dict:
353
+ """
354
+ Construct the full query payload as a dictionary.
355
+
356
+ Returns:
357
+ dict: The query payload.
358
+ """
359
+ return {
360
+ "select": self.__get_select__(),
361
+ "where": self.where,
362
+ "iterate": self.__get_iterate__(),
363
+ "language": self.language,
364
+ }
365
+
366
+ def set_aggregate_for_all_indicators(self, operations: list[str]) -> None:
367
+ """
368
+ Set the aggregate to identity for all indicators in the current table.
369
+ """
370
+ all_indicators = self.__all_indicators_refs_for_table__()
371
+ self.aggregate = [
372
+ {"indicator": indicator, "operations": operations}
373
+ for indicator in all_indicators
374
+ ]
375
+ return self
376
+
377
+ def set_groupby_for_all_columns(self) -> None:
378
+ """
379
+ Set group-by for all references (columns) in the current table.
380
+ """
381
+ references = self.__all_non_date_references__()
382
+ non_date_group_by = [{"reference": reference} for reference in references]
383
+ date_group_by = [
384
+ {"reference": reference, "level": "as_is"}
385
+ for reference in self.dtypes_to_refs["DT"]
386
+ ]
387
+ group_by = [*non_date_group_by, *date_group_by]
388
+ self.set_group_by(group_by)
389
+ return self
390
+
391
+ def __all_non_date_references__(self) -> list[str]:
392
+ """
393
+ Get all reference columns for the current table.
394
+
395
+ Returns:
396
+ list[str]: List of reference column names.
397
+ """
398
+ non_dt_ref_groups = [
399
+ values for key, values in self.dtypes_to_refs.items() if key != "DT"
400
+ ]
401
+ return [*set.union(*non_dt_ref_groups)]
402
+
403
+ def __all_indicators_refs_for_table__(self) -> list[str]:
404
+ """
405
+ Get all indicator references for the current table.
406
+
407
+ Returns:
408
+ list[str]: List of indicator reference names.
409
+ """
410
+ all_indicators = [indicator["ref"] for indicator in self.all_indicators]
411
+ return all_indicators
412
+
413
+ def save_query(self, name: str) -> str:
414
+ """
415
+ Save the current query to the server with the given name.
416
+
417
+ Args:
418
+ name (str): Name to save the query as.
419
+
420
+ Returns:
421
+ str: Server response.
422
+ """
423
+ BODY = {
424
+ "data_table": self.table_name,
425
+ "language": self.language,
426
+ "name": name,
427
+ "query": self.__request_payload__(),
428
+ }
429
+
430
+ response = self.Tukan.__execute_post_operation__(BODY, "visualizations/query/")
431
+
432
+ return response
433
+
434
+ def execute_query(
435
+ self, mode: Literal["vertical", "horizontal"] = "vertical"
436
+ ) -> dict:
437
+ """
438
+ Execute the query on the server and return the results.
439
+
440
+ Args:
441
+ mode (Literal): Output mode, 'vertical' or 'horizontal'.
442
+
443
+ Returns:
444
+ dict: Dictionary containing indicators and the result DataFrame.
445
+ """
446
+ payload = self.__request_payload__()
447
+ payload["mode"] = mode
448
+ response = self.Tukan.__execute_post_operation__(payload, "data/new_retrieve/")
449
+ df = pd.DataFrame(response["data"])
450
+ return {"indicators": response["indicators"], "df": df}
451
+
452
+
453
+ def create_identity_query_for_table_with_date_filters(
454
+ table_name: str,
455
+ language: Literal["en", "es"],
456
+ from_date: str,
457
+ to_date: str,
458
+ ) -> dict:
459
+ """
460
+ Create an identity query for a table with date filters applied.
461
+
462
+ Args:
463
+ table_name (str): Name of the table.
464
+ language (Literal): Language for the query.
465
+ from_date (str): Start date for the filter.
466
+ to_date (str): End date for the filter.
467
+ """
468
+ query = create_identity_query_for_table(table_name, language)
469
+ for date_ref in query.dtypes_to_refs["DT"]:
470
+ query.add_date_filter(date_ref, from_date, to_date)
471
+ return query
472
+
473
+
474
+ def create_identity_query_for_table(
475
+ table_name: str, language: Literal["en", "es"]
476
+ ) -> dict:
477
+ """
478
+ Create an identity query for a table (all indicators, all references, group by all columns).
479
+
480
+ Args:
481
+ table_name (str): Name of the table.
482
+ language (Literal): Language for the query.
483
+ """
484
+ query = Query(table_name)
485
+ query.set_aggregate_for_all_indicators(["identity"])
486
+ query.set_language(language)
487
+ query.set_groupby_for_all_columns()
488
+ return query
489
+
490
+
491
+ def create_query_from_query_id_or_name(query_id_or_name: str) -> Query:
492
+ """
493
+ Create Query instance from a query ID or name on the server.
494
+
495
+ Args:
496
+ query_id_or_name (str): The query's ID or name.
497
+ """
498
+ query = Tukan().get_query_from_name_or_id(query_id_or_name)["query"]
499
+ query = create_query_from_payload(query)
500
+ return query
501
+
502
+
503
+ def create_query_from_payload(payload: dict) -> Query:
504
+ """
505
+ Create Query instance from a query payload dictionary.
506
+
507
+ Args:
508
+ payload (dict): The query payload.
509
+ """
510
+ query = Query(payload["table_name"])
511
+ query.set_where(payload["where"])
512
+ query.set_group_by(payload["group_by"])
513
+ query.set_aggregate(payload["aggregate"])
514
+ query.set_language(payload["language"])
515
+ return query
tukan_python/tukan.py ADDED
@@ -0,0 +1,560 @@
1
+ import os
2
+ from collections import OrderedDict
3
+ import json
4
+ from functools import partial, update_wrapper
5
+ from random import randint
6
+ from time import sleep
7
+ from typing import Callable, Optional
8
+
9
+ import requests
10
+ import pandas as pd
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+
16
+ class Tukan:
17
+ """
18
+ Handles authentication and requests to the TukanMX API.
19
+
20
+ This class provides methods for retrieving tables, indicators, and metadata,
21
+ as well as sending and receiving data via POST and GET operations. It also
22
+ provides utility methods for checking the existence of tables and indicators,
23
+ and for parsing hierarchical data structures.
24
+
25
+ Attributes:
26
+ token (str): API token for authentication.
27
+ env (str): Base URL for the TukanMX API.
28
+ """
29
+
30
+ def __init__(self, token: Optional[str] = None):
31
+ """
32
+ Initialize a new Tukan API client instance.
33
+
34
+ Args:
35
+ token (Optional[str]): API token for authentication. If not provided, will use the API_TUKAN environment variable.
36
+
37
+ Raises:
38
+ ValueError: If no token is provided and API_TUKAN is not set in the environment.
39
+ """
40
+ env_token = os.getenv("API_TUKAN")
41
+ if token is None and not env_token:
42
+ raise ValueError(
43
+ "Token not provided and not found in environment variables"
44
+ )
45
+ self.token = token or env_token
46
+ self.env = "https://client.tukanmx.com/"
47
+
48
+ def __execute_post_operation__(self, payload: dict, source: str):
49
+ """
50
+ Execute a POST request to the TukanMX API.
51
+
52
+ Args:
53
+ payload (dict): JSON payload to send in the POST request.
54
+ source (str): API endpoint to post to.
55
+
56
+ Returns:
57
+ dict: Parsed JSON response from the API.
58
+
59
+ Raises:
60
+ Exception: If the operation is not allowed (HTTP 403).
61
+ """
62
+ target_url = self.env + source
63
+ headers = {
64
+ "Content-Type": "application/json",
65
+ "Authorization": f"token {self.token}",
66
+ }
67
+ request_partial = wrapped_partial(
68
+ requests.request,
69
+ method="POST",
70
+ url=target_url,
71
+ json=payload,
72
+ headers=headers,
73
+ timeout=20,
74
+ )
75
+ response = self.__persistent_request__(request_partial)
76
+ if response.status_code < 300:
77
+ message = response.json()
78
+ return message
79
+ elif response.status_code == 403:
80
+ logger.info(f"{response.text}")
81
+ raise Exception("Operation not allowed on admin. Contact administrator!")
82
+ else:
83
+ message = response.text
84
+ return json.loads(message)
85
+
86
+ def __execute_get_operation__(self, source: str, query: dict):
87
+ """
88
+ Execute a GET request to the TukanMX API.
89
+
90
+ Args:
91
+ source (str): API endpoint to query.
92
+ query (dict): Query parameters for the GET request.
93
+
94
+ Returns:
95
+ dict: Parsed JSON response from the API.
96
+ """
97
+ target_url = self.env + source
98
+ headers = {
99
+ "Content-Type": "application/json",
100
+ "Authorization": f"token {self.token}",
101
+ }
102
+ response = requests.get(url=target_url, params=query, headers=headers)
103
+ if response.status_code < 300:
104
+ message = response.json()
105
+ return message
106
+ else:
107
+ message = response.text
108
+ return json.loads(message)
109
+
110
+ def __persistent_request__(self, request_partial: Callable):
111
+ """
112
+ Attempt a request persistently, retrying on failure.
113
+
114
+ Args:
115
+ request_partial (Callable): A partial function representing the request to execute.
116
+
117
+ Returns:
118
+ requests.Response: The successful response object.
119
+ """
120
+ attempts = 0
121
+ while attempts < 2:
122
+ try:
123
+ response = request_partial()
124
+ if response.status_code < 300:
125
+ break
126
+ except Exception as e:
127
+ pass
128
+ attempts += 1
129
+ sleep(randint(3, 5))
130
+ return response
131
+
132
+ def all_tables(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
133
+ """
134
+ Retrieve a list of all available data tables.
135
+
136
+ Args:
137
+ page (int): Page number for pagination.
138
+ page_size (int): Number of tables per page.
139
+
140
+ Returns:
141
+ list[dict]: List of table metadata dictionaries.
142
+ """
143
+ payload = {
144
+ "resource": "datatable",
145
+ "operation": "view",
146
+ "page": page,
147
+ "page_size": page_size,
148
+ }
149
+ response = self.__execute_post_operation__(payload, "data/")
150
+ return response["data"]
151
+
152
+ def get_table(self, table_name: str) -> dict:
153
+ """
154
+ Retrieve metadata for a specific data table by name or ID.
155
+
156
+ Args:
157
+ table_name (str): The name or ID of the data table.
158
+
159
+ Returns:
160
+ dict: Metadata dictionary for the table.
161
+ """
162
+ payload = {
163
+ "resource": "datatable",
164
+ "operation": "view",
165
+ "page": "1",
166
+ "page_size": "1",
167
+ "filter_by": {"id": table_name},
168
+ }
169
+ response = self.__execute_post_operation__(payload, "data/")
170
+ return response["data"][0]
171
+
172
+ def does_table_exist(self, table_name: str) -> bool:
173
+ """
174
+ Check if a data table exists by name or ID.
175
+
176
+ Args:
177
+ table_name (str): The name or ID of the data table.
178
+
179
+ Returns:
180
+ bool: True if the table exists, False otherwise.
181
+ """
182
+ try:
183
+ self.get_table(table_name)
184
+ return True
185
+ except IndexError:
186
+ return False
187
+
188
+ def get_table_metadata(self, table_name: str, language="en") -> dict:
189
+ """
190
+ Retrieve metadata for a specific table, including columns and references.
191
+
192
+ Args:
193
+ table_name (str): The name or ID of the data table.
194
+ language (str): Language for metadata (default is 'en').
195
+
196
+ Returns:
197
+ dict: Metadata dictionary for the table.
198
+ """
199
+ payload = {"data": {"id": table_name, "language": language}}
200
+ response = self.__execute_post_operation__(payload, "data/metadata/")
201
+ return response
202
+
203
+ def all_indicators(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
204
+ """
205
+ Retrieve all indicators available in the database.
206
+
207
+ Args:
208
+ page (int): Page number for pagination (default is 1).
209
+ page_size (int): Number of indicators per page (default is 2,500).
210
+
211
+ Returns:
212
+ list[dict]: List of indicator metadata dictionaries.
213
+ """
214
+ payload = {
215
+ "resource": "indicator",
216
+ "operation": "view",
217
+ "page": page,
218
+ "page_size": page_size,
219
+ }
220
+ response = self.__execute_post_operation__(payload, "data/")
221
+ return response["data"]
222
+
223
+ def all_indicators_for_table(
224
+ self, table_name: str, page: int = 1, page_size: int = 2_500
225
+ ) -> list[dict]:
226
+ """
227
+ Retrieve all indicators for a specific table.
228
+
229
+ Args:
230
+ table_name (str): The name or ID of the data table.
231
+ page (int): Page number for pagination.
232
+ page_size (int): Number of indicators per page.
233
+
234
+ Returns:
235
+ list[dict]: List of indicator metadata dictionaries.
236
+ """
237
+ payload = {
238
+ "resource": "indicator",
239
+ "operation": "view",
240
+ "page": page,
241
+ "page_size": page_size,
242
+ "filter_by": {"data_table": table_name},
243
+ }
244
+ response = self.__execute_post_operation__(payload, "data/")
245
+ return response["data"]
246
+
247
+ def does_indicator_ref_exist(self, indicator_ref: str) -> bool:
248
+ """
249
+ Check if an indicator reference exists.
250
+
251
+ Args:
252
+ indicator_ref (str): The reference ID of the indicator.
253
+
254
+ Returns:
255
+ bool: True if the indicator exists, False otherwise.
256
+ """
257
+ try:
258
+ indicator_info = self.get_indicator_by_ref(indicator_ref, page_size=1)
259
+ except IndexError:
260
+ indicator_info = {}
261
+ return bool(indicator_info)
262
+
263
+ def get_indicator_by_ref(
264
+ self, indicator_ref: str, page: int = 1, page_size: int = 2_500
265
+ ) -> dict:
266
+ """
267
+ Retrieve indicator metadata by its reference ID.
268
+
269
+ Args:
270
+ indicator_ref (str): The reference ID of the indicator.
271
+ page (int): Page number for pagination.
272
+ page_size (int): Number of indicators per page.
273
+
274
+ Returns:
275
+ dict: Metadata dictionary for the indicator.
276
+ """
277
+ payload = {
278
+ "resource": "indicator",
279
+ "operation": "view",
280
+ "page": page,
281
+ "page_size": page_size,
282
+ "filter_by": {"ref": indicator_ref},
283
+ }
284
+ response = self.__execute_post_operation__(payload, "data/")
285
+ return response["data"][0]
286
+
287
+ def ask_leah(self, query: str, language: str = "en") -> list[dict]:
288
+ """
289
+ Query the Leah endpoint for table suggestions based on a natural language query.
290
+
291
+ Args:
292
+ query (str): The question or prompt for Leah.
293
+ language (str): Language for the query (default is 'en').
294
+
295
+ Returns:
296
+ dict: Parsed response with table metadata suggestions.
297
+ """
298
+ payload = {"query": query, "language": language}
299
+ response = self.__execute_post_operation__(payload, "leah/")
300
+ parsed_response = parse_leah(response)
301
+ return parsed_response
302
+
303
+ def get_tree_for_table(self, table_name: str) -> dict[str, pd.DataFrame]:
304
+ """
305
+ Retrieve hierarchical tree structures for a given table.
306
+
307
+ Args:
308
+ table_name (str): The name or ID of the data table.
309
+
310
+ Returns:
311
+ dict[str, dict[str, pd.DataFrame]]: Dictionary mapping table references to its reference values with hierarchical information.
312
+ """
313
+ payload = {
314
+ "operation": "view",
315
+ "resource": "tree",
316
+ "filter_by": {"data_table": table_name},
317
+ }
318
+ response = self.__execute_post_operation__(payload, "data/")
319
+ parsed_trees = parse_leah_trees(response["data"][0]["tree"])
320
+ return parsed_trees
321
+
322
+ def get_query_from_name_or_id(self, query_name_or_id: str) -> OrderedDict:
323
+ """
324
+ Retrieve a saved query by its name or ID and return its details in an OrderedDict.
325
+
326
+ Args:
327
+ query_name_or_id (str): Name or ID of the saved query.
328
+
329
+ Returns:
330
+ OrderedDict: Ordered dictionary containing keys 'id', 'name', 'author_name', 'created', and 'query'.
331
+ """
332
+ BODY = {
333
+ "page_size": "10_000",
334
+ "current": "1",
335
+ "order_by": "-updated",
336
+ "tags": "",
337
+ "search": query_name_or_id,
338
+ "api": "visualizations",
339
+ "resource": "queries",
340
+ }
341
+ response = self.__execute_get_operation__("visualizations/queries", BODY)
342
+ data = response["data"][0]
343
+ parsed_data = parse_query_data(data)
344
+ return parsed_data
345
+
346
+
347
+ def wrapped_partial(func, *args, **kwargs) -> Callable:
348
+ """
349
+ Returns a partial function with updated wrapper metadata.
350
+
351
+ Args:
352
+ func (Callable): The function to partially apply.
353
+ *args: Positional arguments to pre-fill.
354
+ **kwargs: Keyword arguments to pre-fill.
355
+
356
+ Returns:
357
+ Callable: The partially applied function with updated metadata.
358
+ """
359
+ partial_func = partial(func, *args, **kwargs)
360
+ update_wrapper(partial_func, func)
361
+ return partial_func
362
+
363
+
364
+ def parse_leah(response: dict) -> list[dict]:
365
+ """
366
+ Parses a Leah API response into a list of table metadata dictionaries.
367
+
368
+ Args:
369
+ response (dict): The Leah API response containing 'openai_completion' and 'optional_tables'.
370
+
371
+ Returns:
372
+ list[dict]: List of dictionaries with table 'id', 'description', and 'name'.
373
+ """
374
+ ans = []
375
+ all_tables = response["openai_completion"] + response["optional_tables"]
376
+ for element in all_tables:
377
+ table_metadata = element["metadata"]["data_table"]
378
+ ans.append(
379
+ {
380
+ "id": table_metadata["id"],
381
+ "description": table_metadata["description"],
382
+ "name": table_metadata["name"],
383
+ }
384
+ )
385
+ return ans
386
+
387
+
388
+ def parse_leah_trees(response: dict) -> dict[str, pd.DataFrame]:
389
+ """
390
+ Parses Leah tree responses into a dictionary of DataFrames.
391
+
392
+ Args:
393
+ response (dict): Leah tree response mapping keys to tree JSON objects.
394
+
395
+ Returns:
396
+ dict[str, pd.DataFrame]: Dictionary mapping keys to DataFrames representing the tree structure.
397
+ """
398
+ ans = {}
399
+ for key, tree in response.items():
400
+ heritage_df = generate_heritage_col_df_from_json(tree)
401
+ ans[key] = heritage_df
402
+ return ans
403
+
404
+
405
+ def generate_heritage_col_df_from_json(tree_json: dict) -> pd.DataFrame:
406
+ """
407
+ Generates a pandas DataFrame from a heritage tree JSON structure.
408
+
409
+ Args:
410
+ tree_json (dict): JSON object representing the heritage tree.
411
+
412
+ Returns:
413
+ pd.DataFrame: DataFrame containing the heritage columns merged with display data.
414
+ """
415
+ [ref_name] = tree_json.keys()
416
+ all_ref_lineages, display_map = lineages_of_refs_and_display_map_from_json(
417
+ tree_json
418
+ )
419
+ heritage_df = heritage_df_from_ref_lineages(all_ref_lineages, ref_name)
420
+ display_df = display_df_from_map(display_map, ref_name)
421
+ return pd.merge(heritage_df, display_df, on=ref_name)
422
+
423
+
424
+ def lineages_of_refs_and_display_map_from_json(tree_json: dict) -> tuple[list, list]:
425
+ """
426
+ Extracts all reference lineages and display map from a tree JSON structure.
427
+
428
+ Args:
429
+ tree_json (dict): JSON object representing the tree structure.
430
+
431
+ Returns:
432
+ tuple[list, list]:
433
+ - List of all reference lineages (each as a list of reference IDs).
434
+ - Display map as a list of tuples (ref_id, data dict).
435
+ """
436
+ [(root_ref, root_ref_info)] = tree_json.items()
437
+ root_ref_node = [root_ref]
438
+ all_nodes = [root_ref_node]
439
+ display_map = [(root_ref, root_ref_info["data"])]
440
+ add_nodes_recursively(
441
+ root_ref_node, root_ref_info["children"], all_nodes, display_map
442
+ )
443
+ return all_nodes, display_map
444
+
445
+
446
+ def add_nodes_recursively(
447
+ ancestry: list[str], sons: list, all_nodes: list, display_map: list
448
+ ):
449
+ """
450
+ Recursively traverses and collects nodes and display data from a tree structure.
451
+
452
+ Args:
453
+ ancestry (list[str]): The lineage of ancestor references leading to the current node.
454
+ sons (list): List of child nodes (as dicts).
455
+ all_nodes (list): Accumulator for all reference lineages.
456
+ display_map (list): Accumulator for display map tuples (ref_id, data dict).
457
+ """
458
+ for son in sons:
459
+ [(son_ref_id, son_ref_info)] = son.items()
460
+ sons_heritage = ancestry + [son_ref_id]
461
+ display_map.append((son_ref_id, son_ref_info["data"]))
462
+ all_nodes.append(sons_heritage)
463
+ grand_children = son_ref_info.get("children", [])
464
+ add_nodes_recursively(sons_heritage, grand_children, all_nodes, display_map)
465
+
466
+
467
+ def heritage_df_from_ref_lineages(
468
+ all_ref_lineages: list, ref_name: str
469
+ ) -> pd.DataFrame:
470
+ """
471
+ Generates a DataFrame from a list of reference lineages.
472
+
473
+ Args:
474
+ all_ref_lineages (list): List of reference lineages (each as a list of reference IDs).
475
+ ref_name (str): Name of the reference column.
476
+
477
+ Returns:
478
+ pd.DataFrame: DataFrame with columns for each ancestor and the reference itself.
479
+ """
480
+ max_num_ancestors = len(max(all_ref_lineages, key=len)) - 1
481
+ col_names = ref_col_names(ref_name, max_num_ancestors)
482
+ col_names_to_refs = []
483
+ for lineage in all_ref_lineages:
484
+ lineage_with_all_levels = right_fill_ancestor_ref(lineage, max_num_ancestors)
485
+ col_names_to_refs.append(dict(zip(col_names, lineage_with_all_levels)))
486
+ return pd.DataFrame(col_names_to_refs)
487
+
488
+
489
+ def ref_col_names(ref_name: str, max_num_ancestors: int) -> list:
490
+ """
491
+ Generates column names for ancestor references and the main reference.
492
+
493
+ Args:
494
+ ref_name (str): Name of the reference column.
495
+ max_num_ancestors (int): Maximum number of ancestor levels.
496
+
497
+ Returns:
498
+ list: List of column names for each ancestor and the reference.
499
+ """
500
+ ancestor_cols = [f"{ref_name}_p{n}" for n in range(max_num_ancestors)]
501
+ return ancestor_cols + [ref_name]
502
+
503
+
504
+ def right_fill_ancestor_ref(lineage: list[str], max_num_ancestors: int) -> list[str]:
505
+ """
506
+ Fills the ancestor portion of a lineage to a fixed length with None values.
507
+
508
+ Args:
509
+ lineage (list[str]): List of reference IDs representing a lineage.
510
+ max_num_ancestors (int): The total number of ancestor columns required.
511
+
512
+ Returns:
513
+ list[str]: The lineage, right-filled with None for missing ancestors.
514
+ """
515
+ ancestors = lineage[:-1]
516
+ ancestors_fill = ancestors + ([None] * (max_num_ancestors - len(ancestors)))
517
+ return ancestors_fill + [lineage[-1]]
518
+
519
+
520
+ def display_df_from_map(display_map: list, ref_name: str) -> pd.DataFrame:
521
+ """
522
+ Generates a pandas DataFrame from a display map.
523
+
524
+ Args:
525
+ display_map (list): List of tuples (ref_id, data dict) for each node in the tree.
526
+ ref_name (str): Name of the reference column.
527
+
528
+ Returns:
529
+ pd.DataFrame: DataFrame containing reference IDs and their associated display data.
530
+ """
531
+ data = [{ref_name: ref, **data} for ref, data in display_map]
532
+ return pd.DataFrame(data)
533
+
534
+
535
+ def parse_query_data(data: dict) -> OrderedDict:
536
+ """
537
+ Parse a query data dictionary and return an OrderedDict with selected keys.
538
+
539
+ Args:
540
+ data (dict): Dictionary containing query data as returned by the API.
541
+
542
+ Returns:
543
+ OrderedDict: Ordered dictionary with keys 'id', 'name', 'author_name', 'created', and 'query'.
544
+ """
545
+ data["query"] = parse_query(data["query"])
546
+ ordered_keys = ["id", "name", "author_name", "created", "updated", "query"]
547
+ ordered_pairs = [(key, data[key]) for key in ordered_keys]
548
+ ordered_data = OrderedDict(ordered_pairs)
549
+ return ordered_data
550
+
551
+
552
+ def parse_query(query: dict) -> dict:
553
+ parsed_query = {
554
+ "table_name": query["select"][0]["table"],
555
+ "where": query["where"],
556
+ "group_by": query["iterate"][0]["group_by"],
557
+ "aggregate": query["iterate"][0]["aggregate"],
558
+ "language": query["language"],
559
+ }
560
+ return parsed_query
@@ -0,0 +1,120 @@
1
+ Metadata-Version: 2.4
2
+ Name: tukan_python
3
+ Version: 0.1.0
4
+ Summary: A package to utilize the tukan API.
5
+ Author-email: roberto <roberto@tukanmx.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2021 TukanMx
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/TukanMx/tukan_python
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: Programming Language :: Python :: 3.12
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Operating System :: OS Independent
33
+ Requires-Python: >=3.7
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: loguru==0.7.2
37
+ Requires-Dist: numpy==1.26.1
38
+ Requires-Dist: pandas==2.1.1
39
+ Requires-Dist: pytest==7.4.2
40
+ Requires-Dist: python-dotenv==1.0.0
41
+ Requires-Dist: requests==2.31.0
42
+ Requires-Dist: tqdm==4.66.1
43
+ Dynamic: license-file
44
+
45
+ # tukan_python
46
+
47
+ A Python package to interact with the TukanMX API, retrieve table metadata, and build and execute queries with flexible filters, groupings, and aggregations.
48
+
49
+ ## Installation
50
+
51
+ Once the package is published to PyPI, install it using:
52
+
53
+ ```bash
54
+ pip install tukan_python
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ ### Authentication
60
+
61
+ You need an API token to use the TukanMX API. You can provide it directly to the `Tukan` or `Query` classes, or set it as an environment variable:
62
+
63
+ ```bash
64
+ export API_TUKAN=your_api_token
65
+ ```
66
+
67
+ ### Main Classes
68
+
69
+ #### Tukan
70
+ Handles authentication and requests to the TukanMX API. Provides methods for retrieving tables, indicators, and metadata, as well as sending and receiving data.
71
+
72
+ ```python
73
+ from tukan_python.tukan import Tukan
74
+
75
+ tukan = Tukan(token="your_api_token")
76
+
77
+ # Retrieve metadata for a table
78
+ metadata = tukan.get_table_metadata("table_name")
79
+
80
+ # Retrieve a list of tables
81
+ all_tables = tukan.get_tables()
82
+
83
+ # Retrieve a list of indicators
84
+ indicators = tukan.get_indicators()
85
+ ```
86
+
87
+ #### Query
88
+ Helper class for building and executing queries against the TukanMX API. Supports filters, groupings, aggregations, and execution.
89
+
90
+ ```python
91
+ from tukan_python.query import Query
92
+
93
+ # Create a Query instance
94
+ query = Query("table_name", token="your_api_token")
95
+
96
+ # Add filters, groupings, or aggregations as needed
97
+ query.set_where([{ "reference": "column", "eq": "value" }])
98
+ query.set_group_by([{ "reference": "column" }])
99
+ query.set_aggregate([{ "indicator": "indicator_name", "operation": "sum" }])
100
+
101
+ # Execute the query
102
+ result = query.execute_query()
103
+ print(result["df"]) # result is a dict with 'indicators' and a pandas DataFrame
104
+ ```
105
+
106
+ #### Utility Functions
107
+
108
+ - `create_identity_query_for_table(table_name, language)`
109
+ - `create_identity_query_for_table_with_date_filters(table_name, language, from_date, to_date)`
110
+ - `create_query_from_query_id_or_name(query_id_or_name)`
111
+ - `create_query_from_payload(payload)`
112
+
113
+ These functions help build queries quickly from table names, IDs, or payloads.
114
+
115
+ ## License
116
+ See the `LICENSE` file for license information.
117
+
118
+ ---
119
+
120
+ For more details, see the code in `tukan_python/tukan.py` and `tukan_python/query.py`.
@@ -0,0 +1,8 @@
1
+ tukan_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tukan_python/query.py,sha256=QbpTFYPMvEfYdDJg0Yyk9HlaXWfL_FAVPqdVgn_WZgY,16212
3
+ tukan_python/tukan.py,sha256=DqgQFgE_RN7vTQBQbLf-FBP7fkW047MweIqBsZQB3qY,18760
4
+ tukan_python-0.1.0.dist-info/licenses/LICENSE,sha256=4-KXxjgpAywK6yAHKLHmKoSJ2KqqKzOZYTcey0B0xGY,1064
5
+ tukan_python-0.1.0.dist-info/METADATA,sha256=omBgbBUHpyI-mG_q8C91ypRTn6Cm4DdKHpD_BHcJDow,4125
6
+ tukan_python-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ tukan_python-0.1.0.dist-info/top_level.txt,sha256=64Ewy3_aoQ0bzlFumNTozOHZ60jP-EGxgC-duqmPlnM,13
8
+ tukan_python-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 TukanMx
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ tukan_python