tukan-python 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tukan_python/__init__.py +5 -0
- tukan_python/query.py +209 -0
- tukan_python/tukan.py +218 -0
- tukan_python-0.2.1.dist-info/METADATA +351 -0
- tukan_python-0.2.1.dist-info/RECORD +8 -0
- tukan_python-0.2.1.dist-info/WHEEL +5 -0
- tukan_python-0.2.1.dist-info/licenses/LICENSE +21 -0
- tukan_python-0.2.1.dist-info/top_level.txt +1 -0
tukan_python/__init__.py
ADDED
tukan_python/query.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
from typing import Literal, Optional
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from tukan_python.tukan import Tukan
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Query:
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
token: Optional[str] = None,
|
|
13
|
+
table_name: Optional[str] = None,
|
|
14
|
+
where: Optional[list[dict]] = None,
|
|
15
|
+
group_by: Optional[list[dict]] = None,
|
|
16
|
+
aggregate: Optional[list[dict]] = None,
|
|
17
|
+
language: str = "en",
|
|
18
|
+
engine: Optional[Literal["air", "blizzard"]] = None,
|
|
19
|
+
):
|
|
20
|
+
self.Tukan = Tukan(token)
|
|
21
|
+
self.table_name = table_name
|
|
22
|
+
self.where = where if where is not None else []
|
|
23
|
+
self.group_by = group_by if group_by is not None else []
|
|
24
|
+
self.aggregate = aggregate if aggregate is not None else []
|
|
25
|
+
self.language = language
|
|
26
|
+
self.engine = engine
|
|
27
|
+
|
|
28
|
+
def set_table_name(self, table_name: str) -> None:
|
|
29
|
+
self.table_name = table_name
|
|
30
|
+
|
|
31
|
+
def set_where(self, where: list[dict]) -> None:
|
|
32
|
+
self.where = where
|
|
33
|
+
|
|
34
|
+
def add_date_filter(
|
|
35
|
+
self, reference: str, date_from: str, date_to: Optional[str] = None
|
|
36
|
+
) -> None:
|
|
37
|
+
dt_filter = {"reference": reference, "from": date_from}
|
|
38
|
+
if date_to is not None:
|
|
39
|
+
dt_filter["to"] = date_to
|
|
40
|
+
self.where.append(dt_filter)
|
|
41
|
+
|
|
42
|
+
def add_numeric_filter(
|
|
43
|
+
self,
|
|
44
|
+
reference: str,
|
|
45
|
+
lte: Optional[float] = None,
|
|
46
|
+
eq: Optional[float] = None,
|
|
47
|
+
gte: Optional[float] = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
self.validate_numeric_filter(lte, eq, gte)
|
|
50
|
+
nm_filter = {"reference": reference, "lte": lte, "eq": eq, "gte": gte}
|
|
51
|
+
nm_filter = {k: v for k, v in nm_filter.items() if v is not None}
|
|
52
|
+
self.where.append(nm_filter)
|
|
53
|
+
|
|
54
|
+
def validate_numeric_filter(
|
|
55
|
+
self,
|
|
56
|
+
lte: Optional[float] = None,
|
|
57
|
+
eq: Optional[float] = None,
|
|
58
|
+
gte: Optional[float] = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
if eq is None and lte is None and gte is None:
|
|
61
|
+
raise ValueError("At least one of eq, lte, or gte must be specified")
|
|
62
|
+
elif eq is not None and (lte is not None or gte is not None):
|
|
63
|
+
raise ValueError("The eq parameter cannot be used with lte or gte")
|
|
64
|
+
|
|
65
|
+
def add_standard_filter(self, reference: str, value: list[str]) -> None:
|
|
66
|
+
self.where.append({"reference": reference, "value": value})
|
|
67
|
+
|
|
68
|
+
def set_group_by(self, group_by: list[dict]) -> None:
|
|
69
|
+
self.group_by = group_by
|
|
70
|
+
|
|
71
|
+
def add_non_date_reference_to_group_by(self, reference: str) -> None:
|
|
72
|
+
self.group_by.append({"reference": reference})
|
|
73
|
+
|
|
74
|
+
def add_date_reference_to_group_by(
|
|
75
|
+
self, reference: str, level: Literal["yearly", "quarterly", "monthly", "as_is"]
|
|
76
|
+
) -> None:
|
|
77
|
+
self.validate_date_filter(level)
|
|
78
|
+
dt_filter = {"reference": reference, "level": level}
|
|
79
|
+
self.group_by.append(dt_filter)
|
|
80
|
+
|
|
81
|
+
def validate_date_filter(
|
|
82
|
+
self, level: Literal["yearly", "quarterly", "monthly", "as_is"]
|
|
83
|
+
) -> None:
|
|
84
|
+
if level not in {"yearly", "quarterly", "monthly", "as_is"}:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"Invalid level. Must be 'yearly', 'quarterly', 'monthly', or 'as_is'"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def set_aggregate(self, aggregate: list[dict]) -> None:
|
|
90
|
+
self.aggregate = aggregate
|
|
91
|
+
|
|
92
|
+
def add_aggregate(self, indicator: str, operations: list[str]) -> None:
|
|
93
|
+
self.validate_aggregate(operations)
|
|
94
|
+
self.aggregate.append({"indicator": indicator, "operations": operations})
|
|
95
|
+
|
|
96
|
+
def validate_aggregate(self, operations: list[str]) -> None:
|
|
97
|
+
if len(operations) == 0:
|
|
98
|
+
raise ValueError("At least one operation must be specified")
|
|
99
|
+
elif {*operations} - {"sum", "avg", "identity"}:
|
|
100
|
+
raise ValueError("Invalid operation. Must be 'sum', 'avg', or 'identity'")
|
|
101
|
+
|
|
102
|
+
def set_language(self, language: str) -> None:
|
|
103
|
+
self.language = language
|
|
104
|
+
|
|
105
|
+
def set_engine(self, engine: Literal["air", "blizzard"]) -> None:
|
|
106
|
+
self.engine = engine
|
|
107
|
+
|
|
108
|
+
def get_select(self) -> list[dict]:
|
|
109
|
+
indicators = [x["indicator"] for x in self.aggregate]
|
|
110
|
+
return [{"table": self.table_name, "indicators": indicators}]
|
|
111
|
+
|
|
112
|
+
def get_iterate(self) -> list[dict]:
|
|
113
|
+
return [{"group_by": self.group_by, "aggregate": self.aggregate}]
|
|
114
|
+
|
|
115
|
+
def __str__(self) -> str:
|
|
116
|
+
return str(self.__request_payload__())
|
|
117
|
+
|
|
118
|
+
def __request_payload__(self) -> dict:
|
|
119
|
+
payload = {
|
|
120
|
+
"select": self.get_select(),
|
|
121
|
+
"where": self.where,
|
|
122
|
+
"iterate": self.get_iterate(),
|
|
123
|
+
"language": self.language,
|
|
124
|
+
}
|
|
125
|
+
if self.engine is not None:
|
|
126
|
+
payload["engine"] = self.engine
|
|
127
|
+
return payload
|
|
128
|
+
|
|
129
|
+
def create_identity_query_for_table(
|
|
130
|
+
self, table_name: str, language: Literal["en", "es"]
|
|
131
|
+
) -> dict:
|
|
132
|
+
self.set_table_name(table_name)
|
|
133
|
+
self.set_identity_aggregate_for_indicators(table_name)
|
|
134
|
+
self.set_language(language)
|
|
135
|
+
self.set_groupby_for_all_columns(table_name)
|
|
136
|
+
|
|
137
|
+
def create_identity_query_for_table_with_date_filters(
|
|
138
|
+
self,
|
|
139
|
+
table_name: str,
|
|
140
|
+
language: Literal["en", "es"],
|
|
141
|
+
from_date: str,
|
|
142
|
+
to_date: str,
|
|
143
|
+
) -> dict:
|
|
144
|
+
self.create_identity_query_for_table(table_name, language)
|
|
145
|
+
date_refs = self.all_dt_references_for_table(table_name)
|
|
146
|
+
for date_ref in date_refs:
|
|
147
|
+
self.add_date_filter(date_ref, from_date, to_date)
|
|
148
|
+
|
|
149
|
+
def all_dt_references_for_table(self, table_name: str) -> list[str]:
|
|
150
|
+
metadata = self.Tukan.get_table_metadata(table_name, language="en")
|
|
151
|
+
date_refs = metadata["data_table"]["date_ranges"].keys()
|
|
152
|
+
return [*date_refs]
|
|
153
|
+
|
|
154
|
+
def set_identity_aggregate_for_indicators(self, table_name: str) -> None:
|
|
155
|
+
all_indicators = self.all_indicators_for_table(table_name)
|
|
156
|
+
self.aggregate = [
|
|
157
|
+
{"indicator": indicator, "operations": ["identity"]}
|
|
158
|
+
for indicator in all_indicators
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
def set_groupby_for_all_columns(self, table_name: str) -> None:
|
|
162
|
+
references = self.all_references_for_table(table_name)
|
|
163
|
+
group_by = [{"reference": reference} for reference in references]
|
|
164
|
+
self.set_group_by(group_by)
|
|
165
|
+
|
|
166
|
+
def all_references_for_table(self, table_name: str) -> list[str]:
|
|
167
|
+
return self.Tukan.get_table(table_name)["references"]
|
|
168
|
+
|
|
169
|
+
def all_indicators_for_table(self, table_name: str) -> list[str]:
|
|
170
|
+
all_indicators = self.Tukan.all_indicators_for_table(table_name)
|
|
171
|
+
all_indicators = [indicator["ref"] for indicator in all_indicators]
|
|
172
|
+
return all_indicators
|
|
173
|
+
|
|
174
|
+
def save_query(self, name: str) -> str:
|
|
175
|
+
BODY = {
|
|
176
|
+
"data_table": self.table_name,
|
|
177
|
+
"language": self.language,
|
|
178
|
+
"name": name,
|
|
179
|
+
"query": self.__request_payload__(),
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
response = self.Tukan.execute_post_operation(BODY, "visualizations/query/")
|
|
183
|
+
|
|
184
|
+
return response
|
|
185
|
+
|
|
186
|
+
def execute_query(
|
|
187
|
+
self, mode: Literal["vertical", "horizontal"] = "vertical"
|
|
188
|
+
) -> dict:
|
|
189
|
+
payload = self.__request_payload__()
|
|
190
|
+
payload["mode"] = mode
|
|
191
|
+
endpoint = "data/retrieve/" if self.engine == "blizzard" else "data/new_retrieve/"
|
|
192
|
+
response = self.Tukan.execute_post_operation(payload, endpoint)
|
|
193
|
+
|
|
194
|
+
if self.engine == "blizzard":
|
|
195
|
+
all_data = response["data"]
|
|
196
|
+
offset = len(all_data)
|
|
197
|
+
while response.get("has_more_data", False):
|
|
198
|
+
payload["offset"] = offset
|
|
199
|
+
response = self.Tukan.execute_post_operation(payload, endpoint)
|
|
200
|
+
all_data.extend(response["data"])
|
|
201
|
+
offset += len(response["data"])
|
|
202
|
+
df = pd.DataFrame(all_data)
|
|
203
|
+
else:
|
|
204
|
+
df = pd.DataFrame(response["data"])
|
|
205
|
+
|
|
206
|
+
result = {"df": df}
|
|
207
|
+
if "indicators" in response:
|
|
208
|
+
result["indicators"] = response["indicators"]
|
|
209
|
+
return result
|
tukan_python/tukan.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
from functools import partial, update_wrapper
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from random import randint
|
|
7
|
+
from time import sleep
|
|
8
|
+
from typing import Callable, Optional
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import requests
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
|
|
14
|
+
load_dotenv()
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Tukan:
|
|
20
|
+
def __init__(self, token: Optional[str] = None):
|
|
21
|
+
env_token = os.getenv("API_TUKAN")
|
|
22
|
+
if token is None and not env_token:
|
|
23
|
+
raise ValueError(
|
|
24
|
+
"Token not provided and not found in environment variables"
|
|
25
|
+
)
|
|
26
|
+
self.token = token or env_token
|
|
27
|
+
self.env = "https://client.tukanmx.com/"
|
|
28
|
+
|
|
29
|
+
def execute_post_operation(self, payload: dict, source: str):
|
|
30
|
+
target_url = self.env + source
|
|
31
|
+
headers = {
|
|
32
|
+
"Content-Type": "application/json",
|
|
33
|
+
"Authorization": f"token {self.token}",
|
|
34
|
+
}
|
|
35
|
+
request_partial = wrapped_partial(
|
|
36
|
+
requests.request,
|
|
37
|
+
method="POST",
|
|
38
|
+
url=target_url,
|
|
39
|
+
json=payload,
|
|
40
|
+
headers=headers,
|
|
41
|
+
timeout=90,
|
|
42
|
+
)
|
|
43
|
+
response = self.persistent_request(request_partial)
|
|
44
|
+
if response.status_code < 300:
|
|
45
|
+
message = response.json()
|
|
46
|
+
return message
|
|
47
|
+
elif response.status_code == 403:
|
|
48
|
+
logger.info(f"{response.text}")
|
|
49
|
+
raise Exception("Operation not allowed on admin. Contact administrator!")
|
|
50
|
+
else:
|
|
51
|
+
message = response.text
|
|
52
|
+
return json.loads(message)
|
|
53
|
+
|
|
54
|
+
def persistent_request(self, request_partial: Callable):
|
|
55
|
+
attempts = 0
|
|
56
|
+
while attempts < 2:
|
|
57
|
+
try:
|
|
58
|
+
response = request_partial()
|
|
59
|
+
if response.status_code < 300:
|
|
60
|
+
break
|
|
61
|
+
except Exception as e:
|
|
62
|
+
pass
|
|
63
|
+
attempts += 1
|
|
64
|
+
sleep(randint(3, 5))
|
|
65
|
+
return response
|
|
66
|
+
|
|
67
|
+
def all_tables(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
|
|
68
|
+
payload = {
|
|
69
|
+
"resource": "datatable",
|
|
70
|
+
"operation": "view",
|
|
71
|
+
"page": page,
|
|
72
|
+
"page_size": page_size,
|
|
73
|
+
}
|
|
74
|
+
response = self.execute_post_operation(payload, "data/")
|
|
75
|
+
# Filter out tables that should not be indexed (restricted tables)
|
|
76
|
+
tables = [t for t in response["data"] if t.get("should_index", True)]
|
|
77
|
+
return tables
|
|
78
|
+
|
|
79
|
+
def get_table(self, table_name: str) -> dict:
|
|
80
|
+
payload = {
|
|
81
|
+
"resource": "datatable",
|
|
82
|
+
"operation": "view",
|
|
83
|
+
"page": "1",
|
|
84
|
+
"page_size": "1",
|
|
85
|
+
"filter_by": {"id": table_name},
|
|
86
|
+
}
|
|
87
|
+
response = self.execute_post_operation(payload, "data/")
|
|
88
|
+
return response["data"][0]
|
|
89
|
+
|
|
90
|
+
def does_table_exist(self, table_name: str) -> bool:
|
|
91
|
+
try:
|
|
92
|
+
self.get_table(table_name)
|
|
93
|
+
return True
|
|
94
|
+
except IndexError:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
def get_table_metadata(self, table_name: str, language="en") -> dict:
|
|
98
|
+
payload = {"data": {"id": table_name, "language": language}}
|
|
99
|
+
response = self.execute_post_operation(payload, "data/metadata/")
|
|
100
|
+
return response
|
|
101
|
+
|
|
102
|
+
def does_indicator_ref_exist(self, indicator_ref: str) -> bool:
|
|
103
|
+
indicator_info = self.get_indicator_by_ref(indicator_ref, page_size=1)
|
|
104
|
+
return bool(indicator_info)
|
|
105
|
+
|
|
106
|
+
def all_indicators(self, page: int = 1, page_size: int = 2_500) -> list[dict]:
|
|
107
|
+
payload = {
|
|
108
|
+
"resource": "indicator",
|
|
109
|
+
"operation": "view",
|
|
110
|
+
"page": page,
|
|
111
|
+
"page_size": page_size,
|
|
112
|
+
}
|
|
113
|
+
response = self.execute_post_operation(payload, "data/")
|
|
114
|
+
return response["data"]
|
|
115
|
+
|
|
116
|
+
def all_indicators_for_table(
|
|
117
|
+
self, table_name: str, page: int = 1, page_size: int = 2_500
|
|
118
|
+
) -> list[dict]:
|
|
119
|
+
payload = {
|
|
120
|
+
"resource": "indicator",
|
|
121
|
+
"operation": "view",
|
|
122
|
+
"page": page,
|
|
123
|
+
"page_size": page_size,
|
|
124
|
+
"filter_by": {"data_table": table_name},
|
|
125
|
+
}
|
|
126
|
+
response = self.execute_post_operation(payload, "data/")
|
|
127
|
+
return response["data"]
|
|
128
|
+
|
|
129
|
+
def get_indicator_by_ref(
|
|
130
|
+
self, indicator_ref: str, page: int = 1, page_size: int = 2_500
|
|
131
|
+
) -> dict:
|
|
132
|
+
payload = {
|
|
133
|
+
"resource": "indicator",
|
|
134
|
+
"operation": "view",
|
|
135
|
+
"page": page,
|
|
136
|
+
"page_size": page_size,
|
|
137
|
+
"filter_by": {"ref": indicator_ref},
|
|
138
|
+
}
|
|
139
|
+
response = self.execute_post_operation(payload, "data/")
|
|
140
|
+
return response["data"][0]
|
|
141
|
+
|
|
142
|
+
def ask_leah(self, query: str, language: str = "en") -> dict:
|
|
143
|
+
payload = {"query": query, "language": language}
|
|
144
|
+
response = self.execute_post_operation(payload, "leah/")
|
|
145
|
+
parsed_response = parse_leah(response)
|
|
146
|
+
return parsed_response
|
|
147
|
+
|
|
148
|
+
def get_saved_query_with_query_name(self, query_name: str) -> list[dict]:
|
|
149
|
+
BODY = {
|
|
150
|
+
"resource": "query",
|
|
151
|
+
"operation": "view",
|
|
152
|
+
"filter_by": {"name": query_name},
|
|
153
|
+
"page_size": 10_000,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
response = self.execute_post_operation(BODY, "visualizations/queries")
|
|
157
|
+
return response["data"]
|
|
158
|
+
|
|
159
|
+
def get_query_from_id(self, query_id: str) -> dict:
|
|
160
|
+
BODY = {
|
|
161
|
+
"resource": "query",
|
|
162
|
+
"operation": "view",
|
|
163
|
+
"id": query_id,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
response = self.execute_post_operation(BODY, "visualizations/queries")
|
|
167
|
+
data = response["data"]
|
|
168
|
+
query = next(x for x in data if x["id"] == query_id)
|
|
169
|
+
return query
|
|
170
|
+
|
|
171
|
+
def get_reference_flat_tree(
|
|
172
|
+
self, table_name: str, reference: str, only_in_table: bool = False
|
|
173
|
+
) -> pd.DataFrame:
|
|
174
|
+
"""
|
|
175
|
+
Get all available values for a reference (column/category) in a table.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
table_name: The table ID
|
|
179
|
+
reference: The reference/column name
|
|
180
|
+
only_in_table: If True, only return values that exist in the data
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
DataFrame with columns: raw, ref, name, name_en, parent_ref, order, in_table
|
|
184
|
+
"""
|
|
185
|
+
url = f"{self.env}data/visualizations/flat-tree/{table_name}/{reference}/?export=csv"
|
|
186
|
+
headers = {
|
|
187
|
+
"Authorization": f"token {self.token}",
|
|
188
|
+
}
|
|
189
|
+
response = requests.get(url, headers=headers, timeout=20)
|
|
190
|
+
if response.status_code >= 300:
|
|
191
|
+
raise Exception(f"Failed to get reference values: {response.text}")
|
|
192
|
+
|
|
193
|
+
# Decode response with proper encoding (API returns UTF-8 but sometimes needs fixing)
|
|
194
|
+
text = response.content.decode("utf-8")
|
|
195
|
+
df = pd.read_csv(StringIO(text), sep="|")
|
|
196
|
+
if only_in_table:
|
|
197
|
+
df = df[df["in_table"] == True]
|
|
198
|
+
return df
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def wrapped_partial(func, *args, **kwargs) -> Callable:
|
|
202
|
+
partial_func = partial(func, *args, **kwargs)
|
|
203
|
+
update_wrapper(partial_func, func)
|
|
204
|
+
return partial_func
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def parse_leah(response: dict) -> dict:
|
|
208
|
+
ans = []
|
|
209
|
+
for element in response["openai_completion"]:
|
|
210
|
+
table_metadata = element["metadata"]["data_table"]
|
|
211
|
+
ans.append(
|
|
212
|
+
{
|
|
213
|
+
"id": table_metadata["id"],
|
|
214
|
+
"description": table_metadata["description"],
|
|
215
|
+
"name": table_metadata["name"],
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
return ans
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tukan_python
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: SDK de Python para acceder a datos oficiales de México a través de la API de Tukan.
|
|
5
|
+
Author-email: TukanMx <contacto@tukanmx.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://tukanmx.com
|
|
8
|
+
Project-URL: Repository, https://github.com/TukanMx/tukan_python
|
|
9
|
+
Project-URL: Documentation, https://github.com/TukanMx/tukan_python#readme
|
|
10
|
+
Keywords: tukan,mexico,data,api,inegi,banxico,cnbv,analytics
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Operating System :: OS Independent
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: pandas>=1.5.0
|
|
25
|
+
Requires-Dist: requests>=2.28.0
|
|
26
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# Tukan Python SDK
|
|
30
|
+
|
|
31
|
+
[](https://badge.fury.io/py/tukan_python)
|
|
32
|
+
[](https://www.python.org/downloads/)
|
|
33
|
+
[](https://opensource.org/licenses/MIT)
|
|
34
|
+
|
|
35
|
+
[Tukan](https://tukanmx.com) es la plataforma de datos más completa de México.
|
|
36
|
+
|
|
37
|
+
Si estás en búsqueda de una forma sencilla y eficiente para acceder a todas las estadísticas oficiales de la economía mexicana desde Python, esta es la solución para ti.
|
|
38
|
+
|
|
39
|
+
## Descripción
|
|
40
|
+
|
|
41
|
+
Tukan agrega y estandariza fuentes oficiales de datos públicos como INEGI, Banxico, CNBV, CNSF, CONSAR, entre otras.
|
|
42
|
+
|
|
43
|
+
Este SDK permite consultar y analizar estos datos de forma sencilla desde Python.
|
|
44
|
+
|
|
45
|
+
Para acceder a la gran variedad de datos disponibles es necesario contar con un token y una suscripción activa en [Tukan](https://tukanmx.com). Sin embargo, algunas tablas pueden ser consultadas de forma gratuita.
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
## Instalación
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install tukan_python
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Autenticación
|
|
55
|
+
|
|
56
|
+
Para obtener tu token debes primero regidstrarte en [Tukan](https://app.tukanmx.com/user/register).
|
|
57
|
+
|
|
58
|
+
Luego, podrás encontrar tu token en el [panel de usuario](https://app.tukanmx.com/account/).
|
|
59
|
+
|
|
60
|
+

|
|
61
|
+
|
|
62
|
+
Una vez obtenido tu token, puedes configurarlo como una variable de entorno
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
export API_TUKAN="tu_token_aqui"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
o en un archivo `.env`:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
API_TUKAN=tu_token_aqui
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Otra alternativa es pasar el token directamente al inicializar el cliente:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from tukan_python import Tukan
|
|
78
|
+
|
|
79
|
+
tukan = Tukan(token="tu_token_aqui")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Inicio rápido
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from tukan_python import Query
|
|
86
|
+
|
|
87
|
+
# Consultar tipo de cambio FIX (MXN por USD)
|
|
88
|
+
q = Query()
|
|
89
|
+
q.set_table_name("mex_banxico_cf102")
|
|
90
|
+
q.add_date_filter("date", "2025-01-01", "2025-01-15")
|
|
91
|
+
q.add_date_reference_to_group_by("date", level="as_is")
|
|
92
|
+
q.add_aggregate("be08668718242ff", ["identity"]) # Tipo de cambio FIX
|
|
93
|
+
q.set_language("es")
|
|
94
|
+
|
|
95
|
+
resultado = q.execute_query()
|
|
96
|
+
print(resultado["df"])
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Salida:
|
|
100
|
+
```
|
|
101
|
+
date indicator value
|
|
102
|
+
0 2025-01-02 be08668718242ff 20.6917
|
|
103
|
+
1 2025-01-03 be08668718242ff 20.6708
|
|
104
|
+
2 2025-01-06 be08668718242ff 20.3195
|
|
105
|
+
3 2025-01-07 be08668718242ff 20.3440
|
|
106
|
+
4 2025-01-08 be08668718242ff 20.3823
|
|
107
|
+
...
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Ejemplos
|
|
111
|
+
|
|
112
|
+
### 1. Explorar tablas disponibles
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from tukan_python import Tukan
|
|
116
|
+
|
|
117
|
+
tukan = Tukan()
|
|
118
|
+
|
|
119
|
+
# Listar todas las tablas
|
|
120
|
+
tablas = tukan.all_tables()
|
|
121
|
+
print(f"Tablas disponibles: {len(tablas)}")
|
|
122
|
+
|
|
123
|
+
# Filtrar tablas de CNBV y ordenar alfabéticamente
|
|
124
|
+
cnbv = [t for t in tablas if t["id"].startswith("mex_cnbv")]
|
|
125
|
+
cnbv.sort(key=lambda x: x["id"])
|
|
126
|
+
|
|
127
|
+
for t in cnbv[:5]:
|
|
128
|
+
print(f"- {t['id']}: {t['name']}")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Salida:
|
|
132
|
+
```
|
|
133
|
+
Tablas disponibles: 150+
|
|
134
|
+
- mex_cnbv_cb_balance_sheet_ifrs9: Instituciones de Banca Múltiple - Balance General Detallado (IFRS9)
|
|
135
|
+
- mex_cnbv_cb_capital_ratios: Instituciones de Banca Múltiple - Índice de Capitalización
|
|
136
|
+
- mex_cnbv_cb_ccl: Instituciones de Banca Múltiple - Coeficiente de Cobertura de Liquidez
|
|
137
|
+
- mex_cnbv_cb_claims_by_channel_and_status: Instituciones de Banca Múltiple - Reclamaciones por Estatus y Canal
|
|
138
|
+
- mex_cnbv_cb_clients_per_product: Clientes por Producto Financiero
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### 2. Obtener metadata de una tabla
|
|
142
|
+
|
|
143
|
+
Antes de consultar datos, es útil explorar la estructura de la tabla:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from tukan_python import Tukan
|
|
147
|
+
|
|
148
|
+
tukan = Tukan()
|
|
149
|
+
|
|
150
|
+
# Obtener metadata
|
|
151
|
+
meta = tukan.get_table_metadata("mex_cnbv_cb_orig_by_gender_monthly", language="es")
|
|
152
|
+
|
|
153
|
+
print("Nombre:", meta["data_table"]["name"])
|
|
154
|
+
print("Referencias:", [r["id"] for r in meta["data_table_references"]])
|
|
155
|
+
print("Rango de fechas:", meta["data_table"]["date_ranges"])
|
|
156
|
+
|
|
157
|
+
# Ver indicadores disponibles
|
|
158
|
+
indicadores = tukan.all_indicators_for_table("mex_cnbv_cb_orig_by_gender_monthly")
|
|
159
|
+
for ind in indicadores:
|
|
160
|
+
print(f"- {ind['ref']}: {ind['name']}")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Salida:
|
|
164
|
+
```
|
|
165
|
+
Nombre: Banca Múltiple - Colocación de Créditos Empresariales y de Vivienda, por Sexo
|
|
166
|
+
Referencias: ['end_date', 'sex', 'start_date', 'institutions', 'geography', ...]
|
|
167
|
+
Rango de fechas: {'end_date': {'max': '2025-11-30', 'min': '2019-12-31'}}
|
|
168
|
+
|
|
169
|
+
- 05451c0b6d5ea78: Monto colocado
|
|
170
|
+
- 78256b18c54451f: Número de créditos
|
|
171
|
+
- b577c6dfc51ebef: Tasa ponderada
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### 3. Consultar colocación de crédito bancario por sexo
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from tukan_python import Query
|
|
178
|
+
|
|
179
|
+
q = Query()
|
|
180
|
+
q.set_table_name("mex_cnbv_cb_orig_by_gender_monthly")
|
|
181
|
+
q.add_date_filter("end_date", "2024-01-01", "2024-06-30")
|
|
182
|
+
q.add_standard_filter("institutions", ["0c959ae6bc0d063"]) # Banca múltiple (agregado)
|
|
183
|
+
q.add_date_reference_to_group_by("end_date", level="monthly")
|
|
184
|
+
q.add_non_date_reference_to_group_by("sex")
|
|
185
|
+
q.add_aggregate("05451c0b6d5ea78", ["sum"]) # Monto colocado
|
|
186
|
+
q.set_language("es")
|
|
187
|
+
|
|
188
|
+
resultado = q.execute_query()
|
|
189
|
+
print(resultado["df"])
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Salida:
|
|
193
|
+
```
|
|
194
|
+
sex__ref end_date indicator value sex
|
|
195
|
+
0 34e63c6a4f88758 2024-01-01 05451c0b6d5ea78__sum 1.468993e+10 Femenino
|
|
196
|
+
1 34e63c6a4f88758 2024-02-01 05451c0b6d5ea78__sum 1.721803e+10 Femenino
|
|
197
|
+
2 653a519004568cb 2024-01-01 05451c0b6d5ea78__sum 3.000290e+10 Masculino
|
|
198
|
+
3 653a519004568cb 2024-02-01 05451c0b6d5ea78__sum 3.302666e+10 Masculino
|
|
199
|
+
...
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### 4. Consultar el INPC (inflación)
|
|
203
|
+
|
|
204
|
+
El INPC requiere filtrar por producto. Primero exploramos el catálogo:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from tukan_python import Tukan, Query
|
|
208
|
+
|
|
209
|
+
tukan = Tukan()
|
|
210
|
+
|
|
211
|
+
# Ver productos disponibles
|
|
212
|
+
productos = tukan.get_reference_flat_tree(
|
|
213
|
+
table_name="mex_inegi_inpc_original_product_monthly",
|
|
214
|
+
reference="mex_inegi_cpi_product_structure",
|
|
215
|
+
only_in_table=True
|
|
216
|
+
)
|
|
217
|
+
print(productos[["ref", "name"]].head(10))
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
Salida:
|
|
221
|
+
```
|
|
222
|
+
ref name
|
|
223
|
+
0 9329306b0b5268c Todos los productos
|
|
224
|
+
1 a38da228dc862e7 Alimentos, bebidas y tabaco
|
|
225
|
+
2 714d22fe124b834 Alimentos
|
|
226
|
+
3 1c70d647c151be7 Vivienda
|
|
227
|
+
4 da9ee7065e99719 Transporte
|
|
228
|
+
...
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Ahora consultamos el índice general:
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
q = Query()
|
|
235
|
+
q.set_table_name("mex_inegi_inpc_original_product_monthly")
|
|
236
|
+
q.add_date_filter("date", "2024-01-01", "2024-06-30")
|
|
237
|
+
q.add_standard_filter("mex_inegi_cpi_product_structure", ["9329306b0b5268c"]) # Todos los productos
|
|
238
|
+
q.add_date_reference_to_group_by("date", level="monthly")
|
|
239
|
+
q.add_aggregate("c572db59b8cd109", ["identity"]) # INPC
|
|
240
|
+
q.set_language("es")
|
|
241
|
+
|
|
242
|
+
resultado = q.execute_query()
|
|
243
|
+
print(resultado["df"])
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Salida:
|
|
247
|
+
```
|
|
248
|
+
date indicator value
|
|
249
|
+
0 2024-01-01 c572db59b8cd109 133.555
|
|
250
|
+
1 2024-02-01 c572db59b8cd109 133.681
|
|
251
|
+
2 2024-03-01 c572db59b8cd109 134.065
|
|
252
|
+
3 2024-04-01 c572db59b8cd109 134.336
|
|
253
|
+
4 2024-05-01 c572db59b8cd109 134.087
|
|
254
|
+
5 2024-06-01 c572db59b8cd109 134.594
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### 5. Explorar catálogos jerárquicos
|
|
258
|
+
|
|
259
|
+
Las referencias estándar tienen estructura jerárquica (país → estado → municipio). El catálogo incluye columnas importantes como `raw` (ID original de la fuente) e `in_table` (si el valor tiene datos en la tabla):
|
|
260
|
+
|
|
261
|
+
```python
|
|
262
|
+
from tukan_python import Tukan
|
|
263
|
+
|
|
264
|
+
tukan = Tukan()
|
|
265
|
+
|
|
266
|
+
# Obtener catálogo de geografía
|
|
267
|
+
df_geo = tukan.get_reference_flat_tree(
|
|
268
|
+
table_name="mex_inegi_census_people_reduced",
|
|
269
|
+
reference="geography"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
print(df_geo[["raw", "ref", "name", "parent_ref", "in_table"]].head(10))
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
Salida:
|
|
276
|
+
```
|
|
277
|
+
raw ref name parent_ref in_table
|
|
278
|
+
0 wd 2064d512d0da97d Mundo FALSE False
|
|
279
|
+
1 na e5fc8e04967fe49 Norteamérica 2064d512d0da97d False
|
|
280
|
+
2 mex b815762a2c6a283 México e5fc8e04967fe49 True
|
|
281
|
+
3 mex_10 db3b32c946ffd13 Durango b815762a2c6a283 True
|
|
282
|
+
4 mex_21 bd8b4a37deee845 Puebla b815762a2c6a283 True
|
|
283
|
+
5 mex_11 a3aa918bd45ac53 Guanajuato b815762a2c6a283 True
|
|
284
|
+
6 mex_24 468bc66c95ecfe6 San Luis Potosí b815762a2c6a283 True
|
|
285
|
+
...
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
La columna `raw` contiene el ID original (ej: `mex_10` para Durango), mientras que `ref` es el ID interno de Tukan. Usa `only_in_table=True` para filtrar solo valores con datos.
|
|
289
|
+
|
|
290
|
+
### 6. Motor Blizzard para consultas pesadas
|
|
291
|
+
|
|
292
|
+
Para consultas con grandes volúmenes de datos, usa el motor Blizzard:
|
|
293
|
+
|
|
294
|
+
```python
|
|
295
|
+
from tukan_python import Query
|
|
296
|
+
|
|
297
|
+
q = Query(engine="blizzard")
|
|
298
|
+
q.set_table_name("mex_cnbv_cb_orig_by_gender_monthly")
|
|
299
|
+
# ... configurar filtros y agregaciones ...
|
|
300
|
+
resultado = q.execute_query()
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## Conceptos clave
|
|
304
|
+
|
|
305
|
+
### Tablas
|
|
306
|
+
Cada tabla representa una fuente de datos específica. Los IDs siguen el patrón general:
|
|
307
|
+
`{pais}_{fuente}_{dataset}`
|
|
308
|
+
|
|
309
|
+
Ejemplos:
|
|
310
|
+
- `mex_inegi_inpc_original_product_monthly` - INPC de INEGI
|
|
311
|
+
- `mex_cnbv_cb_orig_by_gender_monthly` - Colocación de créditos de CNBV
|
|
312
|
+
- `mex_inegi_census_people_reduced` - Censo de población de INEGI
|
|
313
|
+
- `mex_shcp_budget_expenditures_by_fc` - Gastos presupuestarios de SHCP
|
|
314
|
+
|
|
315
|
+
### Indicadores
|
|
316
|
+
Son las métricas o valores que se pueden consultar. Cada tabla tiene sus propios indicadores con IDs únicos (ej: `c572db59b8cd109` para el INPC).
|
|
317
|
+
|
|
318
|
+
### Referencias
|
|
319
|
+
Son las dimensiones que contextualizan los datos:
|
|
320
|
+
- **date**: Fechas (pueden tener diferentes nombres como `date`, `end_date`, `start_date`)
|
|
321
|
+
- **standard**: Catálogos jerárquicos como `geography`, `sex`, `economic_activity`
|
|
322
|
+
- **free**: Texto libre
|
|
323
|
+
- **numeric**: Valores numéricos adicionales
|
|
324
|
+
|
|
325
|
+
### Operaciones de agregación
|
|
326
|
+
- `identity`: Valor original sin modificar
|
|
327
|
+
- `sum`: Suma de valores
|
|
328
|
+
- `avg`: Promedio
|
|
329
|
+
|
|
330
|
+
## Fuentes de datos
|
|
331
|
+
|
|
332
|
+
Tukan integra datos de múltiples fuentes oficiales mexicanas:
|
|
333
|
+
|
|
334
|
+
| Fuente | Datos disponibles |
|
|
335
|
+
|--------|-------------------|
|
|
336
|
+
| **INEGI** | Censos, INPC, PIB, encuestas económicas |
|
|
337
|
+
| **Banxico** | Tasas de interés, tipo de cambio, agregados monetarios |
|
|
338
|
+
| **CNBV** | Estados financieros de bancos, SOFOMES, aseguradoras |
|
|
339
|
+
| **SHCP** | Finanzas públicas, presupuesto, deuda |
|
|
340
|
+
| **CONAPO** | Proyecciones de población |
|
|
341
|
+
|
|
342
|
+
## Licencia
|
|
343
|
+
|
|
344
|
+
MIT License - ver [LICENSE](LICENSE) para más detalles.
|
|
345
|
+
|
|
346
|
+
## Links
|
|
347
|
+
|
|
348
|
+
- [Sitio web](https://tukanmx.com)
|
|
349
|
+
- [Documentación](https://github.com/TukanMx/tukan_python#readme)
|
|
350
|
+
- [Reportar issues](https://github.com/TukanMx/tukan_python/issues)
|
|
351
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
tukan_python/__init__.py,sha256=pCYrURHfbVrGS_7FuII0l-3lz0TS7TaNGY2dfJ0NCNk,117
|
|
2
|
+
tukan_python/query.py,sha256=xXm7OB1PKpg5f1PYdkWrnrLJ4L-t_TeAMHq0piZvwuY,7829
|
|
3
|
+
tukan_python/tukan.py,sha256=DH61_iGFBDukWsjVfz7SOc_9_iQhlz2Ta5MGpMcmRJc,7326
|
|
4
|
+
tukan_python-0.2.1.dist-info/licenses/LICENSE,sha256=4-KXxjgpAywK6yAHKLHmKoSJ2KqqKzOZYTcey0B0xGY,1064
|
|
5
|
+
tukan_python-0.2.1.dist-info/METADATA,sha256=iVGf15amCtZBfaAradLA7D-qqBfOp76OE4A8CDRhrwg,11428
|
|
6
|
+
tukan_python-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
7
|
+
tukan_python-0.2.1.dist-info/top_level.txt,sha256=64Ewy3_aoQ0bzlFumNTozOHZ60jP-EGxgC-duqmPlnM,13
|
|
8
|
+
tukan_python-0.2.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 TukanMx
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tukan_python
|