tukan-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tukan-python might be problematic. Click here for more details.
- tukan_python/__init__.py +0 -0
- tukan_python/query.py +515 -0
- tukan_python/tukan.py +560 -0
- tukan_python-0.1.0.dist-info/METADATA +120 -0
- tukan_python-0.1.0.dist-info/RECORD +8 -0
- tukan_python-0.1.0.dist-info/WHEEL +5 -0
- tukan_python-0.1.0.dist-info/licenses/LICENSE +21 -0
- tukan_python-0.1.0.dist-info/top_level.txt +1 -0
tukan_python/__init__.py
ADDED
|
File without changes
|
tukan_python/query.py
ADDED
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Literal, Optional
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from tukan_python.tukan import Tukan
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Query:
|
|
10
|
+
"""
|
|
11
|
+
Helper class for building and executing queries against the TukanMX API.
|
|
12
|
+
|
|
13
|
+
This class allows flexible construction of query payloads, including filters,
|
|
14
|
+
groupings, and aggregations. It supports saving, executing, and reconstructing
|
|
15
|
+
queries from names, IDs, or payloads.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
Tukan (Tukan): Instance of the Tukan API client.
|
|
19
|
+
table_name (str): Name of the data table to query.
|
|
20
|
+
where (list[dict]): List of filter conditions.
|
|
21
|
+
group_by (list[dict]): List of group-by conditions.
|
|
22
|
+
aggregate (list[dict]): List of aggregate operations.
|
|
23
|
+
language (str): Language for the query results.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
table_name: str,
|
|
29
|
+
token: Optional[str] = None,
|
|
30
|
+
where: Optional[list[dict]] = None,
|
|
31
|
+
group_by: Optional[list[dict]] = None,
|
|
32
|
+
aggregate: Optional[list[dict]] = None,
|
|
33
|
+
language: str = "en",
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Initialize a new Query instance.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
table_name (str): Name of the data table to query.
|
|
40
|
+
token (Optional[str]): API token for authentication.
|
|
41
|
+
where (Optional[list[dict]]): List of filter conditions.
|
|
42
|
+
group_by (Optional[list[dict]]): List of group-by conditions.
|
|
43
|
+
aggregate (Optional[list[dict]]): List of aggregate operations.
|
|
44
|
+
language (str): Language for the query results.
|
|
45
|
+
"""
|
|
46
|
+
self.table_name = table_name
|
|
47
|
+
self.Tukan = Tukan(token)
|
|
48
|
+
self.__set_metadata__()
|
|
49
|
+
self.where = where if where is not None else []
|
|
50
|
+
self.group_by = group_by if group_by is not None else []
|
|
51
|
+
self.aggregate = aggregate if aggregate is not None else []
|
|
52
|
+
self.language = language
|
|
53
|
+
|
|
54
|
+
def __set_metadata__(self) -> dict:
|
|
55
|
+
"""
|
|
56
|
+
Get the metadata for the table.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
dict: Table metadata.
|
|
60
|
+
"""
|
|
61
|
+
meta = self.Tukan.get_table_metadata(self.table_name)
|
|
62
|
+
dt_to_refs = defaultdict(set)
|
|
63
|
+
for ref in meta["data_table_references"]:
|
|
64
|
+
dt_to_refs[ref["type"]].add(ref["id"])
|
|
65
|
+
|
|
66
|
+
self.dtypes_to_refs = dt_to_refs
|
|
67
|
+
self.all_indicators = meta["indicators"]
|
|
68
|
+
|
|
69
|
+
def get_table_name(self) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Get the name of the table for the query.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
str: The name of the table.
|
|
75
|
+
"""
|
|
76
|
+
return self.table_name
|
|
77
|
+
|
|
78
|
+
def set_where(self, where: list[dict]) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Set the filter conditions for the query.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
where (list[dict]): List of filter conditions.
|
|
84
|
+
"""
|
|
85
|
+
self.where = where
|
|
86
|
+
return self
|
|
87
|
+
|
|
88
|
+
def get_where(self) -> list[dict]:
|
|
89
|
+
"""
|
|
90
|
+
Get the filter conditions for the query.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
list[dict]: List of filter conditions.
|
|
94
|
+
"""
|
|
95
|
+
return self.where
|
|
96
|
+
|
|
97
|
+
def add_filter(self, filter: dict) -> None:
|
|
98
|
+
"""
|
|
99
|
+
Add a filter condition to the query.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
filter (dict): Filter condition to add.
|
|
103
|
+
"""
|
|
104
|
+
self.where.append(filter)
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
def add_date_filter(
|
|
108
|
+
self, reference: str, date_from: str, date_to: Optional[str] = None
|
|
109
|
+
) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Add a date filter to the query. Dates should be in ISO format (YYYY-MM-DD).
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
reference (str): Reference field for the date filter.
|
|
115
|
+
date_from (str): Start date for the filter.
|
|
116
|
+
date_to (Optional[str]): End date for the filter (optional).
|
|
117
|
+
"""
|
|
118
|
+
dt_filter = {"reference": reference, "from": date_from}
|
|
119
|
+
if date_to is not None:
|
|
120
|
+
dt_filter["to"] = date_to
|
|
121
|
+
self.where.append(dt_filter)
|
|
122
|
+
return self
|
|
123
|
+
|
|
124
|
+
def add_numeric_filter(
|
|
125
|
+
self,
|
|
126
|
+
reference: str,
|
|
127
|
+
lte: Optional[float] = None,
|
|
128
|
+
eq: Optional[float] = None,
|
|
129
|
+
gte: Optional[float] = None,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Add a numeric filter to the query.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
reference (str): Reference field for the numeric filter.
|
|
136
|
+
lte (Optional[float]): Less-than-or-equal value.
|
|
137
|
+
eq (Optional[float]): Equal value.
|
|
138
|
+
gte (Optional[float]): Greater-than-or-equal value.
|
|
139
|
+
"""
|
|
140
|
+
self.__validate_numeric_filter__(lte, eq, gte)
|
|
141
|
+
nm_filter = {"reference": reference, "lte": lte, "eq": eq, "gte": gte}
|
|
142
|
+
nm_filter = {k: v for k, v in nm_filter.items() if v is not None}
|
|
143
|
+
self.where.append(nm_filter)
|
|
144
|
+
return self
|
|
145
|
+
|
|
146
|
+
def __validate_numeric_filter__(
|
|
147
|
+
self,
|
|
148
|
+
lte: Optional[float] = None,
|
|
149
|
+
eq: Optional[float] = None,
|
|
150
|
+
gte: Optional[float] = None,
|
|
151
|
+
) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Validate the numeric filter arguments.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
lte (Optional[float]): Less-than-or-equal value.
|
|
157
|
+
eq (Optional[float]): Equal value.
|
|
158
|
+
gte (Optional[float]): Greater-than-or-equal value.
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
ValueError: If the filter arguments are invalid.
|
|
162
|
+
"""
|
|
163
|
+
if eq is None and lte is None and gte is None:
|
|
164
|
+
raise ValueError("At least one of eq, lte, or gte must be specified")
|
|
165
|
+
elif eq is not None and (lte is not None or gte is not None):
|
|
166
|
+
raise ValueError("The eq parameter cannot be used with lte or gte")
|
|
167
|
+
|
|
168
|
+
def add_standard_filter(self, reference: str, value: list[str]) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Add a standard (categorical) filter to the query.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
reference (str): Reference field for the filter.
|
|
174
|
+
value (list[str]): List of values to filter by.
|
|
175
|
+
"""
|
|
176
|
+
self.where.append({"reference": reference, "value": value})
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
def set_group_by(self, group_by: list[dict]) -> None:
|
|
180
|
+
"""
|
|
181
|
+
Set the group-by conditions for the query.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
group_by (list[dict]): List of group-by conditions.
|
|
185
|
+
"""
|
|
186
|
+
self.group_by = group_by
|
|
187
|
+
return self
|
|
188
|
+
|
|
189
|
+
def get_group_by(self) -> list[dict]:
|
|
190
|
+
"""
|
|
191
|
+
Get the group-by conditions for the query.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
list[dict]: List of group-by conditions.
|
|
195
|
+
"""
|
|
196
|
+
return self.group_by
|
|
197
|
+
|
|
198
|
+
def add_to_group_by(self, group_by: dict) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Add a group-by condition to the query.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
group_by (dict): Group-by condition to add.
|
|
204
|
+
"""
|
|
205
|
+
self.group_by.append(group_by)
|
|
206
|
+
return self
|
|
207
|
+
|
|
208
|
+
def add_non_date_reference_to_group_by(self, reference: str) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Add a non-date reference to the group-by conditions.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
reference (str): Reference field to group by.
|
|
214
|
+
"""
|
|
215
|
+
self.group_by.append({"reference": reference})
|
|
216
|
+
return self
|
|
217
|
+
|
|
218
|
+
def add_date_reference_to_group_by(
|
|
219
|
+
self,
|
|
220
|
+
reference: str,
|
|
221
|
+
level: Literal["yearly", "quarterly", "monthly", "as_is"] = "as_is",
|
|
222
|
+
) -> None:
|
|
223
|
+
"""
|
|
224
|
+
Add a date reference to the group-by conditions with a specified granularity.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
reference (str): Reference field to group by.
|
|
228
|
+
level (Literal): Granularity level ('yearly', 'quarterly', 'monthly', 'as_is').
|
|
229
|
+
"""
|
|
230
|
+
self.__validate_date_filter__(level)
|
|
231
|
+
dt_filter = {"reference": reference, "level": level}
|
|
232
|
+
self.group_by.append(dt_filter)
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
def __validate_date_filter__(
|
|
236
|
+
self, level: Literal["yearly", "quarterly", "monthly", "as_is"]
|
|
237
|
+
) -> None:
|
|
238
|
+
"""
|
|
239
|
+
Validate the date filter granularity level.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
level (Literal): Granularity level to validate.
|
|
243
|
+
|
|
244
|
+
Raises:
|
|
245
|
+
ValueError: If the level is invalid.
|
|
246
|
+
"""
|
|
247
|
+
if level not in {"yearly", "quarterly", "monthly", "as_is"}:
|
|
248
|
+
raise ValueError(
|
|
249
|
+
"Invalid level. Must be 'yearly', 'quarterly', 'monthly', or 'as_is'"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def set_aggregate(self, aggregate: list[dict]) -> None:
|
|
253
|
+
"""
|
|
254
|
+
Set the aggregate operations for the query.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
aggregate (list[dict]): List of aggregate operations.
|
|
258
|
+
"""
|
|
259
|
+
self.aggregate = aggregate
|
|
260
|
+
return self
|
|
261
|
+
|
|
262
|
+
def get_aggregate(self) -> list[dict]:
|
|
263
|
+
"""
|
|
264
|
+
Get the aggregate operations for the query.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
list[dict]: List of aggregate operations.
|
|
268
|
+
"""
|
|
269
|
+
return self.aggregate
|
|
270
|
+
|
|
271
|
+
def add_aggregate(self, indicator: str, operations: list[str]) -> None:
|
|
272
|
+
"""
|
|
273
|
+
Add an aggregate operation for a specific indicator.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
indicator (str): Indicator to aggregate.
|
|
277
|
+
operations (list[str]): List of operations (e.g., ['sum', 'avg', 'identity']).
|
|
278
|
+
"""
|
|
279
|
+
self.__validate_aggregate__(operations)
|
|
280
|
+
self.aggregate.append({"indicator": indicator, "operations": operations})
|
|
281
|
+
return self
|
|
282
|
+
|
|
283
|
+
def __validate_aggregate__(self, operations: list[str]) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Validate the aggregate operations.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
operations (list[str]): List of aggregate operations.
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
ValueError: If operations are empty or invalid.
|
|
292
|
+
"""
|
|
293
|
+
if len(operations) == 0:
|
|
294
|
+
raise ValueError("At least one operation must be specified")
|
|
295
|
+
elif {*operations} - {"sum", "avg", "identity"}:
|
|
296
|
+
raise ValueError("Invalid operation. Must be 'sum', 'avg', or 'identity'")
|
|
297
|
+
|
|
298
|
+
def set_language(self, language: str) -> None:
|
|
299
|
+
"""
|
|
300
|
+
Set the language for the query results.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
language (str): The language code (e.g., 'en', 'es').
|
|
304
|
+
"""
|
|
305
|
+
self.language = language
|
|
306
|
+
return self
|
|
307
|
+
|
|
308
|
+
def get_language(self) -> str:
|
|
309
|
+
"""
|
|
310
|
+
Get the language for the query results.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
str: The language code.
|
|
314
|
+
"""
|
|
315
|
+
return self.language
|
|
316
|
+
|
|
317
|
+
def __get_select__(self) -> list[dict]:
|
|
318
|
+
"""
|
|
319
|
+
Get the select clause for the query.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
list[dict]: List containing the table and indicators to select.
|
|
323
|
+
"""
|
|
324
|
+
indicators = [x["indicator"] for x in self.aggregate]
|
|
325
|
+
return [{"table": self.table_name, "indicators": indicators}]
|
|
326
|
+
|
|
327
|
+
def __get_iterate__(self) -> list[dict]:
|
|
328
|
+
"""
|
|
329
|
+
Get the iterate clause for the query.
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
list[dict]: List containing group-by and aggregate operations.
|
|
333
|
+
"""
|
|
334
|
+
return [{"group_by": self.group_by, "aggregate": self.aggregate}]
|
|
335
|
+
|
|
336
|
+
def __str__(self) -> str:
|
|
337
|
+
"""
|
|
338
|
+
Return the string representation of the query payload.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
str: Stringified query payload.
|
|
342
|
+
"""
|
|
343
|
+
payload_info = {
|
|
344
|
+
"table_name": self.table_name,
|
|
345
|
+
"language": self.language,
|
|
346
|
+
"where": self.where,
|
|
347
|
+
"group_by": self.group_by,
|
|
348
|
+
"aggregate": self.aggregate,
|
|
349
|
+
}
|
|
350
|
+
return str(payload_info)
|
|
351
|
+
|
|
352
|
+
def __request_payload__(self) -> dict:
|
|
353
|
+
"""
|
|
354
|
+
Construct the full query payload as a dictionary.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
dict: The query payload.
|
|
358
|
+
"""
|
|
359
|
+
return {
|
|
360
|
+
"select": self.__get_select__(),
|
|
361
|
+
"where": self.where,
|
|
362
|
+
"iterate": self.__get_iterate__(),
|
|
363
|
+
"language": self.language,
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
def set_aggregate_for_all_indicators(self, operations: list[str]) -> None:
|
|
367
|
+
"""
|
|
368
|
+
Set the aggregate to identity for all indicators in the current table.
|
|
369
|
+
"""
|
|
370
|
+
all_indicators = self.__all_indicators_refs_for_table__()
|
|
371
|
+
self.aggregate = [
|
|
372
|
+
{"indicator": indicator, "operations": operations}
|
|
373
|
+
for indicator in all_indicators
|
|
374
|
+
]
|
|
375
|
+
return self
|
|
376
|
+
|
|
377
|
+
def set_groupby_for_all_columns(self) -> None:
|
|
378
|
+
"""
|
|
379
|
+
Set group-by for all references (columns) in the current table.
|
|
380
|
+
"""
|
|
381
|
+
references = self.__all_non_date_references__()
|
|
382
|
+
non_date_group_by = [{"reference": reference} for reference in references]
|
|
383
|
+
date_group_by = [
|
|
384
|
+
{"reference": reference, "level": "as_is"}
|
|
385
|
+
for reference in self.dtypes_to_refs["DT"]
|
|
386
|
+
]
|
|
387
|
+
group_by = [*non_date_group_by, *date_group_by]
|
|
388
|
+
self.set_group_by(group_by)
|
|
389
|
+
return self
|
|
390
|
+
|
|
391
|
+
def __all_non_date_references__(self) -> list[str]:
|
|
392
|
+
"""
|
|
393
|
+
Get all reference columns for the current table.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
list[str]: List of reference column names.
|
|
397
|
+
"""
|
|
398
|
+
non_dt_ref_groups = [
|
|
399
|
+
values for key, values in self.dtypes_to_refs.items() if key != "DT"
|
|
400
|
+
]
|
|
401
|
+
return [*set.union(*non_dt_ref_groups)]
|
|
402
|
+
|
|
403
|
+
def __all_indicators_refs_for_table__(self) -> list[str]:
|
|
404
|
+
"""
|
|
405
|
+
Get all indicator references for the current table.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
list[str]: List of indicator reference names.
|
|
409
|
+
"""
|
|
410
|
+
all_indicators = [indicator["ref"] for indicator in self.all_indicators]
|
|
411
|
+
return all_indicators
|
|
412
|
+
|
|
413
|
+
def save_query(self, name: str) -> str:
|
|
414
|
+
"""
|
|
415
|
+
Save the current query to the server with the given name.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
name (str): Name to save the query as.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
str: Server response.
|
|
422
|
+
"""
|
|
423
|
+
BODY = {
|
|
424
|
+
"data_table": self.table_name,
|
|
425
|
+
"language": self.language,
|
|
426
|
+
"name": name,
|
|
427
|
+
"query": self.__request_payload__(),
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
response = self.Tukan.__execute_post_operation__(BODY, "visualizations/query/")
|
|
431
|
+
|
|
432
|
+
return response
|
|
433
|
+
|
|
434
|
+
def execute_query(
|
|
435
|
+
self, mode: Literal["vertical", "horizontal"] = "vertical"
|
|
436
|
+
) -> dict:
|
|
437
|
+
"""
|
|
438
|
+
Execute the query on the server and return the results.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
mode (Literal): Output mode, 'vertical' or 'horizontal'.
|
|
442
|
+
|
|
443
|
+
Returns:
|
|
444
|
+
dict: Dictionary containing indicators and the result DataFrame.
|
|
445
|
+
"""
|
|
446
|
+
payload = self.__request_payload__()
|
|
447
|
+
payload["mode"] = mode
|
|
448
|
+
response = self.Tukan.__execute_post_operation__(payload, "data/new_retrieve/")
|
|
449
|
+
df = pd.DataFrame(response["data"])
|
|
450
|
+
return {"indicators": response["indicators"], "df": df}
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def create_identity_query_for_table_with_date_filters(
|
|
454
|
+
table_name: str,
|
|
455
|
+
language: Literal["en", "es"],
|
|
456
|
+
from_date: str,
|
|
457
|
+
to_date: str,
|
|
458
|
+
) -> dict:
|
|
459
|
+
"""
|
|
460
|
+
Create an identity query for a table with date filters applied.
|
|
461
|
+
|
|
462
|
+
Args:
|
|
463
|
+
table_name (str): Name of the table.
|
|
464
|
+
language (Literal): Language for the query.
|
|
465
|
+
from_date (str): Start date for the filter.
|
|
466
|
+
to_date (str): End date for the filter.
|
|
467
|
+
"""
|
|
468
|
+
query = create_identity_query_for_table(table_name, language)
|
|
469
|
+
for date_ref in query.dtypes_to_refs["DT"]:
|
|
470
|
+
query.add_date_filter(date_ref, from_date, to_date)
|
|
471
|
+
return query
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def create_identity_query_for_table(
|
|
475
|
+
table_name: str, language: Literal["en", "es"]
|
|
476
|
+
) -> dict:
|
|
477
|
+
"""
|
|
478
|
+
Create an identity query for a table (all indicators, all references, group by all columns).
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
table_name (str): Name of the table.
|
|
482
|
+
language (Literal): Language for the query.
|
|
483
|
+
"""
|
|
484
|
+
query = Query(table_name)
|
|
485
|
+
query.set_aggregate_for_all_indicators(["identity"])
|
|
486
|
+
query.set_language(language)
|
|
487
|
+
query.set_groupby_for_all_columns()
|
|
488
|
+
return query
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def create_query_from_query_id_or_name(query_id_or_name: str) -> Query:
|
|
492
|
+
"""
|
|
493
|
+
Create Query instance from a query ID or name on the server.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
query_id_or_name (str): The query's ID or name.
|
|
497
|
+
"""
|
|
498
|
+
query = Tukan().get_query_from_name_or_id(query_id_or_name)["query"]
|
|
499
|
+
query = create_query_from_payload(query)
|
|
500
|
+
return query
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def create_query_from_payload(payload: dict) -> Query:
|
|
504
|
+
"""
|
|
505
|
+
Create Query instance from a query payload dictionary.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
payload (dict): The query payload.
|
|
509
|
+
"""
|
|
510
|
+
query = Query(payload["table_name"])
|
|
511
|
+
query.set_where(payload["where"])
|
|
512
|
+
query.set_group_by(payload["group_by"])
|
|
513
|
+
query.set_aggregate(payload["aggregate"])
|
|
514
|
+
query.set_language(payload["language"])
|
|
515
|
+
return query
|
tukan_python/tukan.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
import json
|
|
4
|
+
from functools import partial, update_wrapper
|
|
5
|
+
from random import randint
|
|
6
|
+
from time import sleep
|
|
7
|
+
from typing import Callable, Optional
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from dotenv import load_dotenv
|
|
12
|
+
|
|
13
|
+
load_dotenv()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Tukan:
|
|
17
|
+
"""
|
|
18
|
+
Handles authentication and requests to the TukanMX API.
|
|
19
|
+
|
|
20
|
+
This class provides methods for retrieving tables, indicators, and metadata,
|
|
21
|
+
as well as sending and receiving data via POST and GET operations. It also
|
|
22
|
+
provides utility methods for checking the existence of tables and indicators,
|
|
23
|
+
and for parsing hierarchical data structures.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
token (str): API token for authentication.
|
|
27
|
+
env (str): Base URL for the TukanMX API.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, token: Optional[str] = None):
|
|
31
|
+
"""
|
|
32
|
+
Initialize a new Tukan API client instance.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
token (Optional[str]): API token for authentication. If not provided, will use the API_TUKAN environment variable.
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ValueError: If no token is provided and API_TUKAN is not set in the environment.
|
|
39
|
+
"""
|
|
40
|
+
env_token = os.getenv("API_TUKAN")
|
|
41
|
+
if token is None and not env_token:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"Token not provided and not found in environment variables"
|
|
44
|
+
)
|
|
45
|
+
self.token = token or env_token
|
|
46
|
+
self.env = "https://client.tukanmx.com/"
|
|
47
|
+
|
|
48
|
+
def __execute_post_operation__(self, payload: dict, source: str):
|
|
49
|
+
"""
|
|
50
|
+
Execute a POST request to the TukanMX API.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
payload (dict): JSON payload to send in the POST request.
|
|
54
|
+
source (str): API endpoint to post to.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
dict: Parsed JSON response from the API.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
Exception: If the operation is not allowed (HTTP 403).
|
|
61
|
+
"""
|
|
62
|
+
target_url = self.env + source
|
|
63
|
+
headers = {
|
|
64
|
+
"Content-Type": "application/json",
|
|
65
|
+
"Authorization": f"token {self.token}",
|
|
66
|
+
}
|
|
67
|
+
request_partial = wrapped_partial(
|
|
68
|
+
requests.request,
|
|
69
|
+
method="POST",
|
|
70
|
+
url=target_url,
|
|
71
|
+
json=payload,
|
|
72
|
+
headers=headers,
|
|
73
|
+
timeout=20,
|
|
74
|
+
)
|
|
75
|
+
response = self.__persistent_request__(request_partial)
|
|
76
|
+
if response.status_code < 300:
|
|
77
|
+
message = response.json()
|
|
78
|
+
return message
|
|
79
|
+
elif response.status_code == 403:
|
|
80
|
+
logger.info(f"{response.text}")
|
|
81
|
+
raise Exception("Operation not allowed on admin. Contact administrator!")
|
|
82
|
+
else:
|
|
83
|
+
message = response.text
|
|
84
|
+
return json.loads(message)
|
|
85
|
+
|
|
86
|
+
def __execute_get_operation__(self, source: str, query: dict):
|
|
87
|
+
"""
|
|
88
|
+
Execute a GET request to the TukanMX API.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
source (str): API endpoint to query.
|
|
92
|
+
query (dict): Query parameters for the GET request.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
dict: Parsed JSON response from the API.
|
|
96
|
+
"""
|
|
97
|
+
target_url = self.env + source
|
|
98
|
+
headers = {
|
|
99
|
+
"Content-Type": "application/json",
|
|
100
|
+
"Authorization": f"token {self.token}",
|
|
101
|
+
}
|
|
102
|
+
response = requests.get(url=target_url, params=query, headers=headers)
|
|
103
|
+
if response.status_code < 300:
|
|
104
|
+
message = response.json()
|
|
105
|
+
return message
|
|
106
|
+
else:
|
|
107
|
+
message = response.text
|
|
108
|
+
return json.loads(message)
|
|
109
|
+
|
|
110
|
+
def __persistent_request__(self, request_partial: Callable):
|
|
111
|
+
"""
|
|
112
|
+
Attempt a request persistently, retrying on failure.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
request_partial (Callable): A partial function representing the request to execute.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
requests.Response: The successful response object.
|
|
119
|
+
"""
|
|
120
|
+
attempts = 0
|
|
121
|
+
while attempts < 2:
|
|
122
|
+
try:
|
|
123
|
+
response = request_partial()
|
|
124
|
+
if response.status_code < 300:
|
|
125
|
+
break
|
|
126
|
+
except Exception as e:
|
|
127
|
+
pass
|
|
128
|
+
attempts += 1
|
|
129
|
+
sleep(randint(3, 5))
|
|
130
|
+
return response
|
|
131
|
+
|
|
132
|
+
def all_tables(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
|
|
133
|
+
"""
|
|
134
|
+
Retrieve a list of all available data tables.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
page (int): Page number for pagination.
|
|
138
|
+
page_size (int): Number of tables per page.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
list[dict]: List of table metadata dictionaries.
|
|
142
|
+
"""
|
|
143
|
+
payload = {
|
|
144
|
+
"resource": "datatable",
|
|
145
|
+
"operation": "view",
|
|
146
|
+
"page": page,
|
|
147
|
+
"page_size": page_size,
|
|
148
|
+
}
|
|
149
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
150
|
+
return response["data"]
|
|
151
|
+
|
|
152
|
+
def get_table(self, table_name: str) -> dict:
|
|
153
|
+
"""
|
|
154
|
+
Retrieve metadata for a specific data table by name or ID.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
table_name (str): The name or ID of the data table.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
dict: Metadata dictionary for the table.
|
|
161
|
+
"""
|
|
162
|
+
payload = {
|
|
163
|
+
"resource": "datatable",
|
|
164
|
+
"operation": "view",
|
|
165
|
+
"page": "1",
|
|
166
|
+
"page_size": "1",
|
|
167
|
+
"filter_by": {"id": table_name},
|
|
168
|
+
}
|
|
169
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
170
|
+
return response["data"][0]
|
|
171
|
+
|
|
172
|
+
def does_table_exist(self, table_name: str) -> bool:
|
|
173
|
+
"""
|
|
174
|
+
Check if a data table exists by name or ID.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
table_name (str): The name or ID of the data table.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
bool: True if the table exists, False otherwise.
|
|
181
|
+
"""
|
|
182
|
+
try:
|
|
183
|
+
self.get_table(table_name)
|
|
184
|
+
return True
|
|
185
|
+
except IndexError:
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
def get_table_metadata(self, table_name: str, language="en") -> dict:
|
|
189
|
+
"""
|
|
190
|
+
Retrieve metadata for a specific table, including columns and references.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
table_name (str): The name or ID of the data table.
|
|
194
|
+
language (str): Language for metadata (default is 'en').
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
dict: Metadata dictionary for the table.
|
|
198
|
+
"""
|
|
199
|
+
payload = {"data": {"id": table_name, "language": language}}
|
|
200
|
+
response = self.__execute_post_operation__(payload, "data/metadata/")
|
|
201
|
+
return response
|
|
202
|
+
|
|
203
|
+
def all_indicators(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
|
|
204
|
+
"""
|
|
205
|
+
Retrieve all indicators available in the database.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
page (int): Page number for pagination (default is 1).
|
|
209
|
+
page_size (int): Number of indicators per page (default is 2,500).
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
list[dict]: List of indicator metadata dictionaries.
|
|
213
|
+
"""
|
|
214
|
+
payload = {
|
|
215
|
+
"resource": "indicator",
|
|
216
|
+
"operation": "view",
|
|
217
|
+
"page": page,
|
|
218
|
+
"page_size": page_size,
|
|
219
|
+
}
|
|
220
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
221
|
+
return response["data"]
|
|
222
|
+
|
|
223
|
+
def all_indicators_for_table(
|
|
224
|
+
self, table_name: str, page: int = 1, page_size: int = 2_500
|
|
225
|
+
) -> list[dict]:
|
|
226
|
+
"""
|
|
227
|
+
Retrieve all indicators for a specific table.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
table_name (str): The name or ID of the data table.
|
|
231
|
+
page (int): Page number for pagination.
|
|
232
|
+
page_size (int): Number of indicators per page.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
list[dict]: List of indicator metadata dictionaries.
|
|
236
|
+
"""
|
|
237
|
+
payload = {
|
|
238
|
+
"resource": "indicator",
|
|
239
|
+
"operation": "view",
|
|
240
|
+
"page": page,
|
|
241
|
+
"page_size": page_size,
|
|
242
|
+
"filter_by": {"data_table": table_name},
|
|
243
|
+
}
|
|
244
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
245
|
+
return response["data"]
|
|
246
|
+
|
|
247
|
+
def does_indicator_ref_exist(self, indicator_ref: str) -> bool:
|
|
248
|
+
"""
|
|
249
|
+
Check if an indicator reference exists.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
indicator_ref (str): The reference ID of the indicator.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
bool: True if the indicator exists, False otherwise.
|
|
256
|
+
"""
|
|
257
|
+
try:
|
|
258
|
+
indicator_info = self.get_indicator_by_ref(indicator_ref, page_size=1)
|
|
259
|
+
except IndexError:
|
|
260
|
+
indicator_info = {}
|
|
261
|
+
return bool(indicator_info)
|
|
262
|
+
|
|
263
|
+
def get_indicator_by_ref(
|
|
264
|
+
self, indicator_ref: str, page: int = 1, page_size: int = 2_500
|
|
265
|
+
) -> dict:
|
|
266
|
+
"""
|
|
267
|
+
Retrieve indicator metadata by its reference ID.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
indicator_ref (str): The reference ID of the indicator.
|
|
271
|
+
page (int): Page number for pagination.
|
|
272
|
+
page_size (int): Number of indicators per page.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
dict: Metadata dictionary for the indicator.
|
|
276
|
+
"""
|
|
277
|
+
payload = {
|
|
278
|
+
"resource": "indicator",
|
|
279
|
+
"operation": "view",
|
|
280
|
+
"page": page,
|
|
281
|
+
"page_size": page_size,
|
|
282
|
+
"filter_by": {"ref": indicator_ref},
|
|
283
|
+
}
|
|
284
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
285
|
+
return response["data"][0]
|
|
286
|
+
|
|
287
|
+
def ask_leah(self, query: str, language: str = "en") -> list[dict]:
|
|
288
|
+
"""
|
|
289
|
+
Query the Leah endpoint for table suggestions based on a natural language query.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
query (str): The question or prompt for Leah.
|
|
293
|
+
language (str): Language for the query (default is 'en').
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
dict: Parsed response with table metadata suggestions.
|
|
297
|
+
"""
|
|
298
|
+
payload = {"query": query, "language": language}
|
|
299
|
+
response = self.__execute_post_operation__(payload, "leah/")
|
|
300
|
+
parsed_response = parse_leah(response)
|
|
301
|
+
return parsed_response
|
|
302
|
+
|
|
303
|
+
def get_tree_for_table(self, table_name: str) -> dict[str, pd.DataFrame]:
|
|
304
|
+
"""
|
|
305
|
+
Retrieve hierarchical tree structures for a given table.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
table_name (str): The name or ID of the data table.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
dict[str, dict[str, pd.DataFrame]]: Dictionary mapping table references to its reference values with hierarchical information.
|
|
312
|
+
"""
|
|
313
|
+
payload = {
|
|
314
|
+
"operation": "view",
|
|
315
|
+
"resource": "tree",
|
|
316
|
+
"filter_by": {"data_table": table_name},
|
|
317
|
+
}
|
|
318
|
+
response = self.__execute_post_operation__(payload, "data/")
|
|
319
|
+
parsed_trees = parse_leah_trees(response["data"][0]["tree"])
|
|
320
|
+
return parsed_trees
|
|
321
|
+
|
|
322
|
+
def get_query_from_name_or_id(self, query_name_or_id: str) -> OrderedDict:
|
|
323
|
+
"""
|
|
324
|
+
Retrieve a saved query by its name or ID and return its details in an OrderedDict.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
query_name_or_id (str): Name or ID of the saved query.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
OrderedDict: Ordered dictionary containing keys 'id', 'name', 'author_name', 'created', and 'query'.
|
|
331
|
+
"""
|
|
332
|
+
BODY = {
|
|
333
|
+
"page_size": "10_000",
|
|
334
|
+
"current": "1",
|
|
335
|
+
"order_by": "-updated",
|
|
336
|
+
"tags": "",
|
|
337
|
+
"search": query_name_or_id,
|
|
338
|
+
"api": "visualizations",
|
|
339
|
+
"resource": "queries",
|
|
340
|
+
}
|
|
341
|
+
response = self.__execute_get_operation__("visualizations/queries", BODY)
|
|
342
|
+
data = response["data"][0]
|
|
343
|
+
parsed_data = parse_query_data(data)
|
|
344
|
+
return parsed_data
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def wrapped_partial(func, *args, **kwargs) -> Callable:
|
|
348
|
+
"""
|
|
349
|
+
Returns a partial function with updated wrapper metadata.
|
|
350
|
+
|
|
351
|
+
Args:
|
|
352
|
+
func (Callable): The function to partially apply.
|
|
353
|
+
*args: Positional arguments to pre-fill.
|
|
354
|
+
**kwargs: Keyword arguments to pre-fill.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Callable: The partially applied function with updated metadata.
|
|
358
|
+
"""
|
|
359
|
+
partial_func = partial(func, *args, **kwargs)
|
|
360
|
+
update_wrapper(partial_func, func)
|
|
361
|
+
return partial_func
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def parse_leah(response: dict) -> list[dict]:
|
|
365
|
+
"""
|
|
366
|
+
Parses a Leah API response into a list of table metadata dictionaries.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
response (dict): The Leah API response containing 'openai_completion' and 'optional_tables'.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
list[dict]: List of dictionaries with table 'id', 'description', and 'name'.
|
|
373
|
+
"""
|
|
374
|
+
ans = []
|
|
375
|
+
all_tables = response["openai_completion"] + response["optional_tables"]
|
|
376
|
+
for element in all_tables:
|
|
377
|
+
table_metadata = element["metadata"]["data_table"]
|
|
378
|
+
ans.append(
|
|
379
|
+
{
|
|
380
|
+
"id": table_metadata["id"],
|
|
381
|
+
"description": table_metadata["description"],
|
|
382
|
+
"name": table_metadata["name"],
|
|
383
|
+
}
|
|
384
|
+
)
|
|
385
|
+
return ans
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def parse_leah_trees(response: dict) -> dict[str, pd.DataFrame]:
|
|
389
|
+
"""
|
|
390
|
+
Parses Leah tree responses into a dictionary of DataFrames.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
response (dict): Leah tree response mapping keys to tree JSON objects.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
dict[str, pd.DataFrame]: Dictionary mapping keys to DataFrames representing the tree structure.
|
|
397
|
+
"""
|
|
398
|
+
ans = {}
|
|
399
|
+
for key, tree in response.items():
|
|
400
|
+
heritage_df = generate_heritage_col_df_from_json(tree)
|
|
401
|
+
ans[key] = heritage_df
|
|
402
|
+
return ans
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def generate_heritage_col_df_from_json(tree_json: dict) -> pd.DataFrame:
|
|
406
|
+
"""
|
|
407
|
+
Generates a pandas DataFrame from a heritage tree JSON structure.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
tree_json (dict): JSON object representing the heritage tree.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
pd.DataFrame: DataFrame containing the heritage columns merged with display data.
|
|
414
|
+
"""
|
|
415
|
+
[ref_name] = tree_json.keys()
|
|
416
|
+
all_ref_lineages, display_map = lineages_of_refs_and_display_map_from_json(
|
|
417
|
+
tree_json
|
|
418
|
+
)
|
|
419
|
+
heritage_df = heritage_df_from_ref_lineages(all_ref_lineages, ref_name)
|
|
420
|
+
display_df = display_df_from_map(display_map, ref_name)
|
|
421
|
+
return pd.merge(heritage_df, display_df, on=ref_name)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def lineages_of_refs_and_display_map_from_json(tree_json: dict) -> tuple[list, list]:
|
|
425
|
+
"""
|
|
426
|
+
Extracts all reference lineages and display map from a tree JSON structure.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
tree_json (dict): JSON object representing the tree structure.
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
tuple[list, list]:
|
|
433
|
+
- List of all reference lineages (each as a list of reference IDs).
|
|
434
|
+
- Display map as a list of tuples (ref_id, data dict).
|
|
435
|
+
"""
|
|
436
|
+
[(root_ref, root_ref_info)] = tree_json.items()
|
|
437
|
+
root_ref_node = [root_ref]
|
|
438
|
+
all_nodes = [root_ref_node]
|
|
439
|
+
display_map = [(root_ref, root_ref_info["data"])]
|
|
440
|
+
add_nodes_recursively(
|
|
441
|
+
root_ref_node, root_ref_info["children"], all_nodes, display_map
|
|
442
|
+
)
|
|
443
|
+
return all_nodes, display_map
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def add_nodes_recursively(
|
|
447
|
+
ancestry: list[str], sons: list, all_nodes: list, display_map: list
|
|
448
|
+
):
|
|
449
|
+
"""
|
|
450
|
+
Recursively traverses and collects nodes and display data from a tree structure.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
ancestry (list[str]): The lineage of ancestor references leading to the current node.
|
|
454
|
+
sons (list): List of child nodes (as dicts).
|
|
455
|
+
all_nodes (list): Accumulator for all reference lineages.
|
|
456
|
+
display_map (list): Accumulator for display map tuples (ref_id, data dict).
|
|
457
|
+
"""
|
|
458
|
+
for son in sons:
|
|
459
|
+
[(son_ref_id, son_ref_info)] = son.items()
|
|
460
|
+
sons_heritage = ancestry + [son_ref_id]
|
|
461
|
+
display_map.append((son_ref_id, son_ref_info["data"]))
|
|
462
|
+
all_nodes.append(sons_heritage)
|
|
463
|
+
grand_children = son_ref_info.get("children", [])
|
|
464
|
+
add_nodes_recursively(sons_heritage, grand_children, all_nodes, display_map)
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def heritage_df_from_ref_lineages(
|
|
468
|
+
all_ref_lineages: list, ref_name: str
|
|
469
|
+
) -> pd.DataFrame:
|
|
470
|
+
"""
|
|
471
|
+
Generates a DataFrame from a list of reference lineages.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
all_ref_lineages (list): List of reference lineages (each as a list of reference IDs).
|
|
475
|
+
ref_name (str): Name of the reference column.
|
|
476
|
+
|
|
477
|
+
Returns:
|
|
478
|
+
pd.DataFrame: DataFrame with columns for each ancestor and the reference itself.
|
|
479
|
+
"""
|
|
480
|
+
max_num_ancestors = len(max(all_ref_lineages, key=len)) - 1
|
|
481
|
+
col_names = ref_col_names(ref_name, max_num_ancestors)
|
|
482
|
+
col_names_to_refs = []
|
|
483
|
+
for lineage in all_ref_lineages:
|
|
484
|
+
lineage_with_all_levels = right_fill_ancestor_ref(lineage, max_num_ancestors)
|
|
485
|
+
col_names_to_refs.append(dict(zip(col_names, lineage_with_all_levels)))
|
|
486
|
+
return pd.DataFrame(col_names_to_refs)
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def ref_col_names(ref_name: str, max_num_ancestors: int) -> list:
|
|
490
|
+
"""
|
|
491
|
+
Generates column names for ancestor references and the main reference.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
ref_name (str): Name of the reference column.
|
|
495
|
+
max_num_ancestors (int): Maximum number of ancestor levels.
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
list: List of column names for each ancestor and the reference.
|
|
499
|
+
"""
|
|
500
|
+
ancestor_cols = [f"{ref_name}_p{n}" for n in range(max_num_ancestors)]
|
|
501
|
+
return ancestor_cols + [ref_name]
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def right_fill_ancestor_ref(lineage: list[str], max_num_ancestors: int) -> list[str]:
|
|
505
|
+
"""
|
|
506
|
+
Fills the ancestor portion of a lineage to a fixed length with None values.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
lineage (list[str]): List of reference IDs representing a lineage.
|
|
510
|
+
max_num_ancestors (int): The total number of ancestor columns required.
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
list[str]: The lineage, right-filled with None for missing ancestors.
|
|
514
|
+
"""
|
|
515
|
+
ancestors = lineage[:-1]
|
|
516
|
+
ancestors_fill = ancestors + ([None] * (max_num_ancestors - len(ancestors)))
|
|
517
|
+
return ancestors_fill + [lineage[-1]]
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def display_df_from_map(display_map: list, ref_name: str) -> pd.DataFrame:
|
|
521
|
+
"""
|
|
522
|
+
Generates a pandas DataFrame from a display map.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
display_map (list): List of tuples (ref_id, data dict) for each node in the tree.
|
|
526
|
+
ref_name (str): Name of the reference column.
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
pd.DataFrame: DataFrame containing reference IDs and their associated display data.
|
|
530
|
+
"""
|
|
531
|
+
data = [{ref_name: ref, **data} for ref, data in display_map]
|
|
532
|
+
return pd.DataFrame(data)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def parse_query_data(data: dict) -> OrderedDict:
|
|
536
|
+
"""
|
|
537
|
+
Parse a query data dictionary and return an OrderedDict with selected keys.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
data (dict): Dictionary containing query data as returned by the API.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
OrderedDict: Ordered dictionary with keys 'id', 'name', 'author_name', 'created', and 'query'.
|
|
544
|
+
"""
|
|
545
|
+
data["query"] = parse_query(data["query"])
|
|
546
|
+
ordered_keys = ["id", "name", "author_name", "created", "updated", "query"]
|
|
547
|
+
ordered_pairs = [(key, data[key]) for key in ordered_keys]
|
|
548
|
+
ordered_data = OrderedDict(ordered_pairs)
|
|
549
|
+
return ordered_data
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def parse_query(query: dict) -> dict:
|
|
553
|
+
parsed_query = {
|
|
554
|
+
"table_name": query["select"][0]["table"],
|
|
555
|
+
"where": query["where"],
|
|
556
|
+
"group_by": query["iterate"][0]["group_by"],
|
|
557
|
+
"aggregate": query["iterate"][0]["aggregate"],
|
|
558
|
+
"language": query["language"],
|
|
559
|
+
}
|
|
560
|
+
return parsed_query
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tukan_python
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A package to utilize the tukan API.
|
|
5
|
+
Author-email: roberto <roberto@tukanmx.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2021 TukanMx
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/TukanMx/tukan_python
|
|
29
|
+
Classifier: Programming Language :: Python :: 3
|
|
30
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Requires-Python: >=3.7
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
License-File: LICENSE
|
|
36
|
+
Requires-Dist: loguru==0.7.2
|
|
37
|
+
Requires-Dist: numpy==1.26.1
|
|
38
|
+
Requires-Dist: pandas==2.1.1
|
|
39
|
+
Requires-Dist: pytest==7.4.2
|
|
40
|
+
Requires-Dist: python-dotenv==1.0.0
|
|
41
|
+
Requires-Dist: requests==2.31.0
|
|
42
|
+
Requires-Dist: tqdm==4.66.1
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# tukan_python
|
|
46
|
+
|
|
47
|
+
A Python package to interact with the TukanMX API, retrieve table metadata, and build and execute queries with flexible filters, groupings, and aggregations.
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
Once the package is published to PyPI, install it using:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install tukan_python
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Usage
|
|
58
|
+
|
|
59
|
+
### Authentication
|
|
60
|
+
|
|
61
|
+
You need an API token to use the TukanMX API. You can provide it directly to the `Tukan` or `Query` classes, or set it as an environment variable:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
export API_TUKAN=your_api_token
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Main Classes
|
|
68
|
+
|
|
69
|
+
#### Tukan
|
|
70
|
+
Handles authentication and requests to the TukanMX API. Provides methods for retrieving tables, indicators, and metadata, as well as sending and receiving data.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from tukan_python.tukan import Tukan
|
|
74
|
+
|
|
75
|
+
tukan = Tukan(token="your_api_token")
|
|
76
|
+
|
|
77
|
+
# Retrieve metadata for a table
|
|
78
|
+
metadata = tukan.get_table_metadata("table_name")
|
|
79
|
+
|
|
80
|
+
# Retrieve a list of tables
|
|
81
|
+
all_tables = tukan.get_tables()
|
|
82
|
+
|
|
83
|
+
# Retrieve a list of indicators
|
|
84
|
+
indicators = tukan.get_indicators()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Query
|
|
88
|
+
Helper class for building and executing queries against the TukanMX API. Supports filters, groupings, aggregations, and execution.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from tukan_python.query import Query
|
|
92
|
+
|
|
93
|
+
# Create a Query instance
|
|
94
|
+
query = Query("table_name", token="your_api_token")
|
|
95
|
+
|
|
96
|
+
# Add filters, groupings, or aggregations as needed
|
|
97
|
+
query.set_where([{ "reference": "column", "eq": "value" }])
|
|
98
|
+
query.set_group_by([{ "reference": "column" }])
|
|
99
|
+
query.set_aggregate([{ "indicator": "indicator_name", "operation": "sum" }])
|
|
100
|
+
|
|
101
|
+
# Execute the query
|
|
102
|
+
result = query.execute_query()
|
|
103
|
+
print(result["df"]) # result is a dict with 'indicators' and a pandas DataFrame
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
#### Utility Functions
|
|
107
|
+
|
|
108
|
+
- `create_identity_query_for_table(table_name, language)`
|
|
109
|
+
- `create_identity_query_for_table_with_date_filters(table_name, language, from_date, to_date)`
|
|
110
|
+
- `create_query_from_query_id_or_name(query_id_or_name)`
|
|
111
|
+
- `create_query_from_payload(payload)`
|
|
112
|
+
|
|
113
|
+
These functions help build queries quickly from table names, IDs, or payloads.
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
See the `LICENSE` file for license information.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
For more details, see the code in `tukan_python/tukan.py` and `tukan_python/query.py`.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
tukan_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
tukan_python/query.py,sha256=QbpTFYPMvEfYdDJg0Yyk9HlaXWfL_FAVPqdVgn_WZgY,16212
|
|
3
|
+
tukan_python/tukan.py,sha256=DqgQFgE_RN7vTQBQbLf-FBP7fkW047MweIqBsZQB3qY,18760
|
|
4
|
+
tukan_python-0.1.0.dist-info/licenses/LICENSE,sha256=4-KXxjgpAywK6yAHKLHmKoSJ2KqqKzOZYTcey0B0xGY,1064
|
|
5
|
+
tukan_python-0.1.0.dist-info/METADATA,sha256=omBgbBUHpyI-mG_q8C91ypRTn6Cm4DdKHpD_BHcJDow,4125
|
|
6
|
+
tukan_python-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
7
|
+
tukan_python-0.1.0.dist-info/top_level.txt,sha256=64Ewy3_aoQ0bzlFumNTozOHZ60jP-EGxgC-duqmPlnM,13
|
|
8
|
+
tukan_python-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 TukanMx
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tukan_python
|