tinybird 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/__cli__.py +8 -0
- tinybird/ch_utils/constants.py +244 -0
- tinybird/ch_utils/engine.py +855 -0
- tinybird/check_pypi.py +25 -0
- tinybird/client.py +1281 -0
- tinybird/config.py +117 -0
- tinybird/connectors.py +428 -0
- tinybird/context.py +23 -0
- tinybird/datafile.py +5589 -0
- tinybird/datatypes.py +434 -0
- tinybird/feedback_manager.py +1022 -0
- tinybird/git_settings.py +145 -0
- tinybird/sql.py +865 -0
- tinybird/sql_template.py +2343 -0
- tinybird/sql_template_fmt.py +281 -0
- tinybird/sql_toolset.py +350 -0
- tinybird/syncasync.py +682 -0
- tinybird/tb_cli.py +25 -0
- tinybird/tb_cli_modules/auth.py +252 -0
- tinybird/tb_cli_modules/branch.py +1043 -0
- tinybird/tb_cli_modules/cicd.py +434 -0
- tinybird/tb_cli_modules/cli.py +1571 -0
- tinybird/tb_cli_modules/common.py +2082 -0
- tinybird/tb_cli_modules/config.py +344 -0
- tinybird/tb_cli_modules/connection.py +803 -0
- tinybird/tb_cli_modules/datasource.py +900 -0
- tinybird/tb_cli_modules/exceptions.py +91 -0
- tinybird/tb_cli_modules/fmt.py +91 -0
- tinybird/tb_cli_modules/job.py +85 -0
- tinybird/tb_cli_modules/pipe.py +858 -0
- tinybird/tb_cli_modules/regions.py +9 -0
- tinybird/tb_cli_modules/tag.py +100 -0
- tinybird/tb_cli_modules/telemetry.py +310 -0
- tinybird/tb_cli_modules/test.py +107 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit.py +340 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit_lib.py +71 -0
- tinybird/tb_cli_modules/token.py +349 -0
- tinybird/tb_cli_modules/workspace.py +269 -0
- tinybird/tb_cli_modules/workspace_members.py +212 -0
- tinybird/tornado_template.py +1194 -0
- tinybird-0.0.1.dev0.dist-info/METADATA +2815 -0
- tinybird-0.0.1.dev0.dist-info/RECORD +45 -0
- tinybird-0.0.1.dev0.dist-info/WHEEL +5 -0
- tinybird-0.0.1.dev0.dist-info/entry_points.txt +2 -0
- tinybird-0.0.1.dev0.dist-info/top_level.txt +4 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from functools import partial
|
|
4
|
+
from types import MethodType
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from sqlfmt import actions, api
|
|
8
|
+
from sqlfmt.analyzer import Analyzer, Rule, group
|
|
9
|
+
from sqlfmt.comment import Comment
|
|
10
|
+
from sqlfmt.dialect import ClickHouse
|
|
11
|
+
from sqlfmt.exception import SqlfmtBracketError
|
|
12
|
+
from sqlfmt.jinjafmt import JinjaFormatter, JinjaTag
|
|
13
|
+
from sqlfmt.line import Line
|
|
14
|
+
from sqlfmt.mode import Mode
|
|
15
|
+
from sqlfmt.node import Node, get_previous_token
|
|
16
|
+
from sqlfmt.node_manager import NodeManager
|
|
17
|
+
from sqlfmt.token import Token, TokenType
|
|
18
|
+
|
|
19
|
+
# This class extends and monkey patches https://github.com/tconbeer/sqlfm
|
|
20
|
+
INDENT = " " * 4
|
|
21
|
+
# This is the default value in tb fmt --diff, let's use the same
|
|
22
|
+
DEFAULT_FMT_LINE_LENGTH = 100
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class TBLine(Line):
|
|
27
|
+
@property
|
|
28
|
+
def prefix(self) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Returns the whitespace to be printed at the start of this Line for
|
|
31
|
+
proper indentation.
|
|
32
|
+
|
|
33
|
+
Tinybird => This is overriden from the base Line because we want SQL inside a template to be indented
|
|
34
|
+
https://github.com/tconbeer/sqlfmt/blob/c11775b92d8a45f0e91d871b81a88a894d620bec/src/sqlfmt/line.py#L92
|
|
35
|
+
"""
|
|
36
|
+
prefix = INDENT * (self.depth[0] + self.depth[1])
|
|
37
|
+
return prefix
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def from_nodes(
|
|
41
|
+
cls,
|
|
42
|
+
previous_node: Optional[Node],
|
|
43
|
+
nodes: List[Node],
|
|
44
|
+
comments: List[Comment],
|
|
45
|
+
) -> "Line":
|
|
46
|
+
"""
|
|
47
|
+
Creates and returns a new line from a list of Nodes. Useful for line
|
|
48
|
+
splitting and merging.
|
|
49
|
+
|
|
50
|
+
Tinybird => Monkey patched to use `TBLine` and our own indentation logic.
|
|
51
|
+
"""
|
|
52
|
+
if nodes:
|
|
53
|
+
line = TBLine(
|
|
54
|
+
previous_node=previous_node,
|
|
55
|
+
nodes=nodes,
|
|
56
|
+
comments=comments,
|
|
57
|
+
formatting_disabled=nodes[0].formatting_disabled or nodes[-1].formatting_disabled,
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
line = TBLine(
|
|
61
|
+
previous_node=previous_node,
|
|
62
|
+
nodes=nodes,
|
|
63
|
+
comments=comments,
|
|
64
|
+
formatting_disabled=previous_node.formatting_disabled if previous_node else False,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return line
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _format_jinja_node(self, node: Node, max_length: int) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Format a single jinja tag. No-ops for nodes that
|
|
73
|
+
are not jinja. Returns True if the node was blackened
|
|
74
|
+
"""
|
|
75
|
+
if node.is_jinja:
|
|
76
|
+
formatter = JinjaFormatter(TBMode())
|
|
77
|
+
tag = JinjaTag.from_string(node.value, node.depth[0] + node.depth[1])
|
|
78
|
+
if formatter.use_black:
|
|
79
|
+
# Monkey patching black to not normalize strings
|
|
80
|
+
old_format_str = formatter.code_formatter.black.format_str
|
|
81
|
+
|
|
82
|
+
def format_str(string, mode=None):
|
|
83
|
+
black_mode = formatter.code_formatter.black.Mode(
|
|
84
|
+
line_length=mode.line_length, string_normalization=False
|
|
85
|
+
)
|
|
86
|
+
return old_format_str(string, mode=black_mode)
|
|
87
|
+
|
|
88
|
+
# ugly way to monkeypatch the above function only once
|
|
89
|
+
if "add_formatter_only_once" not in formatter.code_formatter.PY_RESERVED_WORDS:
|
|
90
|
+
formatter.code_formatter.black.format_str = format_str
|
|
91
|
+
formatter.code_formatter.PY_RESERVED_WORDS.append("add_formatter_only_once")
|
|
92
|
+
|
|
93
|
+
if tag.code and formatter.use_black:
|
|
94
|
+
tag.code, tag.is_blackened = formatter.code_formatter.format_string(
|
|
95
|
+
tag.code,
|
|
96
|
+
max_length=tag.max_code_length(max_length),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if "{%" in node.value:
|
|
100
|
+
parts = tag.code.split("\n")
|
|
101
|
+
prefix = INDENT * (node.depth[0] + node.depth[1])
|
|
102
|
+
if len(parts) > 1:
|
|
103
|
+
tag.code = "\n".join([f'{prefix if i != 0 else ""}{part}' for i, part in enumerate(parts)])
|
|
104
|
+
|
|
105
|
+
node.value = str(tag)
|
|
106
|
+
|
|
107
|
+
return tag.is_blackened
|
|
108
|
+
|
|
109
|
+
else:
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# Some monkey patching
|
|
114
|
+
Line.from_nodes = MethodType(from_nodes, Line)
|
|
115
|
+
JinjaFormatter._format_jinja_node = MethodType(_format_jinja_node, JinjaFormatter)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class TinybirdNodeManager(NodeManager):
|
|
119
|
+
def __init__(self, case_sensitive_names: bool, lower_keywords: bool) -> None:
|
|
120
|
+
super().__init__(case_sensitive_names)
|
|
121
|
+
self.lower_keywords = lower_keywords
|
|
122
|
+
|
|
123
|
+
def create_node(self, token: Token, previous_node: Optional[Node]) -> Node:
|
|
124
|
+
try:
|
|
125
|
+
return super().create_node(token, previous_node)
|
|
126
|
+
except SqlfmtBracketError:
|
|
127
|
+
# we already have tb check and the toolset to check the SQL, no need for this extra validation
|
|
128
|
+
prev_token, extra_whitespace = get_previous_token(previous_node)
|
|
129
|
+
prefix = self.whitespace(token, prev_token, extra_whitespace)
|
|
130
|
+
value = self.standardize_value(token)
|
|
131
|
+
return Node(
|
|
132
|
+
token=token,
|
|
133
|
+
previous_node=previous_node,
|
|
134
|
+
prefix=prefix,
|
|
135
|
+
value=value,
|
|
136
|
+
formatting_disabled=False,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def raise_on_mismatched_bracket(self, token: Token, last_bracket: Node) -> None:
|
|
140
|
+
# we already have tb check and the toolset to check the SQL, no need for this extra validation
|
|
141
|
+
pass
|
|
142
|
+
|
|
143
|
+
def standardize_value(self, token: Token) -> str:
|
|
144
|
+
"""
|
|
145
|
+
Tinybird => Patched to not lower keywords
|
|
146
|
+
https://github.com/tconbeer/sqlfmt/blob/c11775b92d8a45f0e91d871b81a88a894d620bec/src/sqlfmt/node_manager.py#L215
|
|
147
|
+
"""
|
|
148
|
+
if self.lower_keywords:
|
|
149
|
+
return super().standardize_value(token)
|
|
150
|
+
|
|
151
|
+
if token.type in (
|
|
152
|
+
TokenType.UNTERM_KEYWORD,
|
|
153
|
+
TokenType.STATEMENT_START,
|
|
154
|
+
TokenType.STATEMENT_END,
|
|
155
|
+
TokenType.WORD_OPERATOR,
|
|
156
|
+
TokenType.ON,
|
|
157
|
+
TokenType.BOOLEAN_OPERATOR,
|
|
158
|
+
TokenType.SET_OPERATOR,
|
|
159
|
+
):
|
|
160
|
+
return " ".join(token.token.split())
|
|
161
|
+
else:
|
|
162
|
+
return token.token
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class TinybirdDialect(ClickHouse):
|
|
166
|
+
"""
|
|
167
|
+
This is an extension of the base rules.
|
|
168
|
+
|
|
169
|
+
We might need to override the base `word_operator` and `unterm_keyword` rules with some custom ClickHouse terms.
|
|
170
|
+
|
|
171
|
+
For now we are just overriding the end blocks for `if` and `for` to work with Tornado templates.
|
|
172
|
+
|
|
173
|
+
https://github.com/tconbeer/sqlfmt/blob/c11775b92d8a45f0e91d871b81a88a894d620bec/src/sqlfmt/dialect.py#L55
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __init__(self, lower_keywords: bool = False) -> None:
|
|
177
|
+
super().__init__()
|
|
178
|
+
self.lower_keywords = lower_keywords
|
|
179
|
+
|
|
180
|
+
override_rules = {
|
|
181
|
+
"main": [
|
|
182
|
+
Rule(
|
|
183
|
+
name="quoted_name",
|
|
184
|
+
priority=200,
|
|
185
|
+
pattern=group(
|
|
186
|
+
# tripled single quotes (optionally raw/bytes)
|
|
187
|
+
r"(rb?|b|br)?'''.*?'''",
|
|
188
|
+
# tripled double quotes
|
|
189
|
+
r'(rb?|b|br)?""".*?"""',
|
|
190
|
+
# possibly escaped double quotes
|
|
191
|
+
r'(rb?|b|br|u&|@)?"([^"\\]*(\\.[^"\\]*|""[^"\\]*)*)"',
|
|
192
|
+
# possibly escaped single quotes
|
|
193
|
+
r"(rb?|b|br|u&|x)?'([^'\\]*(\\.[^'\\]*|''[^'\\]*)*)'",
|
|
194
|
+
r"\$\w*\$[^$]*?\$\w*\$", # pg dollar-delimited strings
|
|
195
|
+
# possibly escaped backtick
|
|
196
|
+
r"`([^`\\]*(\\.[^`\\]*)*)`",
|
|
197
|
+
r"((?<!\\)\$[\w$]+)", # Words starting with $ for .incl files
|
|
198
|
+
),
|
|
199
|
+
action=partial(actions.add_node_to_buffer, token_type=TokenType.QUOTED_NAME),
|
|
200
|
+
),
|
|
201
|
+
],
|
|
202
|
+
"jinja": [
|
|
203
|
+
Rule(
|
|
204
|
+
name="jinja_if_block_end",
|
|
205
|
+
priority=203,
|
|
206
|
+
pattern=group(r"\{%-?\s*end\s*-?%\}"),
|
|
207
|
+
action=actions.raise_sqlfmt_bracket_error,
|
|
208
|
+
),
|
|
209
|
+
Rule(
|
|
210
|
+
name="jinja_for_block_end",
|
|
211
|
+
priority=211,
|
|
212
|
+
pattern=group(r"\{%-?\s*end\s*-?%\}"),
|
|
213
|
+
action=actions.raise_sqlfmt_bracket_error,
|
|
214
|
+
),
|
|
215
|
+
],
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
for section in override_rules:
|
|
219
|
+
for rule in override_rules[section]:
|
|
220
|
+
for rr in self.RULES[section]:
|
|
221
|
+
if rr.name == rule.name:
|
|
222
|
+
self.RULES[section].remove(rr)
|
|
223
|
+
self.RULES[section].append(rule)
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
def initialize_analyzer(self, line_length: int) -> Analyzer:
|
|
227
|
+
"""
|
|
228
|
+
Creates and returns an analyzer that uses the Dialect's rules for lexing
|
|
229
|
+
Custom NodeManager for Tinybird
|
|
230
|
+
"""
|
|
231
|
+
analyzer = Analyzer(
|
|
232
|
+
line_length=line_length,
|
|
233
|
+
rules=self.get_rules(),
|
|
234
|
+
node_manager=TinybirdNodeManager(self.case_sensitive_names, self.lower_keywords),
|
|
235
|
+
)
|
|
236
|
+
return analyzer
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
@dataclass
|
|
240
|
+
class TBMode(Mode):
|
|
241
|
+
lower_keywords: bool = False
|
|
242
|
+
|
|
243
|
+
def __post_init__(self) -> None:
|
|
244
|
+
"""
|
|
245
|
+
Tinybird => Overriden to use `TinybirdDialect`
|
|
246
|
+
https://github.com/tconbeer/sqlfmt/blob/c11775b92d8a45f0e91d871b81a88a894d620bec/src/sqlfmt/mode.py#L31
|
|
247
|
+
"""
|
|
248
|
+
self.dialect = TinybirdDialect(self.lower_keywords)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _calc_str(self) -> str:
|
|
252
|
+
if self.is_multiline:
|
|
253
|
+
return self.token.token + "\n"
|
|
254
|
+
else:
|
|
255
|
+
marker, comment_text = self._comment_parts()
|
|
256
|
+
if comment_text == "":
|
|
257
|
+
return marker + "\n"
|
|
258
|
+
return marker + " " + comment_text + "\n"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
Comment._calc_str = property(_calc_str)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def format_sql_template(sql: str, line_length: Optional[int] = None, lower_keywords: bool = False) -> str:
|
|
265
|
+
try:
|
|
266
|
+
# https://github.com/tconbeer/sqlfmt/blob/c11775b92d8a45f0e91d871b81a88a894d620bec/src/sqlfmt/mode.py#L16-L29
|
|
267
|
+
config: Dict[str, Any] = {
|
|
268
|
+
"line_length": line_length or DEFAULT_FMT_LINE_LENGTH,
|
|
269
|
+
"lower_keywords": lower_keywords,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
mode = TBMode(**config)
|
|
273
|
+
sql = sql.strip()
|
|
274
|
+
return (
|
|
275
|
+
"%\n" + api.format_string(sql[1:], mode=mode).strip()
|
|
276
|
+
if sql[0] == "%"
|
|
277
|
+
else api.format_string(sql, mode=mode).strip()
|
|
278
|
+
)
|
|
279
|
+
except Exception as e:
|
|
280
|
+
logging.warning(f"sqlfmt error: {str(e)}")
|
|
281
|
+
return sql
|
tinybird/sql_toolset.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import logging
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from typing import Any, FrozenSet, List, Optional, Set, Tuple
|
|
7
|
+
|
|
8
|
+
from chtoolset import query as chquery
|
|
9
|
+
from toposort import toposort
|
|
10
|
+
|
|
11
|
+
from tinybird.ch_utils.constants import COPY_ENABLED_TABLE_FUNCTIONS, ENABLED_TABLE_FUNCTIONS
|
|
12
|
+
|
|
13
|
+
VALID_REMOTE = "VALID_REMOTE"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InvalidFunction(ValueError):
|
|
17
|
+
def __init__(self, msg: str = "", table_function_name: str = ""):
|
|
18
|
+
if any([fn for fn in COPY_ENABLED_TABLE_FUNCTIONS if fn in msg]):
|
|
19
|
+
msg = msg.replace("is restricted", "is restricted to Copy Pipes")
|
|
20
|
+
|
|
21
|
+
if table_function_name:
|
|
22
|
+
if table_function_name in COPY_ENABLED_TABLE_FUNCTIONS:
|
|
23
|
+
self.msg = f"The {table_function_name} table function is only allowed in Copy Pipes"
|
|
24
|
+
else:
|
|
25
|
+
self.msg = f"The query uses disabled table functions: '{table_function_name}'"
|
|
26
|
+
else:
|
|
27
|
+
self.msg = msg
|
|
28
|
+
super().__init__(self.msg)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class InvalidResource(ValueError):
|
|
32
|
+
def __init__(self, database: str, table: str, default_database: str = ""):
|
|
33
|
+
if default_database and database == default_database:
|
|
34
|
+
database = ""
|
|
35
|
+
self.msg = f"{database}.{table}" if database else table
|
|
36
|
+
self.msg = f"Resource '{self.msg}' not found"
|
|
37
|
+
super().__init__(self.msg)
|
|
38
|
+
self.database = database
|
|
39
|
+
self.table = table
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def format_sql(sql: str) -> str:
|
|
43
|
+
return chquery.format(sql)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_where_for_mutation_command(where_clause: str) -> str:
|
|
47
|
+
"""
|
|
48
|
+
>>> format_where_for_mutation_command("numnights = 99")
|
|
49
|
+
'DELETE WHERE numnights = 99'
|
|
50
|
+
>>> format_where_for_mutation_command("\\nnumnights = 99")
|
|
51
|
+
'DELETE WHERE numnights = 99'
|
|
52
|
+
>>> format_where_for_mutation_command("reservationid = 'foo'")
|
|
53
|
+
"DELETE WHERE reservationid = \\\\'foo\\\\'"
|
|
54
|
+
>>> format_where_for_mutation_command("reservationid = '''foo'")
|
|
55
|
+
"DELETE WHERE reservationid = \\\\'\\\\\\\\\\\\'foo\\\\'"
|
|
56
|
+
>>> format_where_for_mutation_command("reservationid = '\\\\'foo'")
|
|
57
|
+
"DELETE WHERE reservationid = \\\\'\\\\\\\\\\\\'foo\\\\'"
|
|
58
|
+
"""
|
|
59
|
+
formatted_condition = chquery.format(f"""SELECT {where_clause}""").split("SELECT ")[1]
|
|
60
|
+
formatted_condition = formatted_condition.replace("\\", "\\\\").replace("'", "''")
|
|
61
|
+
quoted_condition = chquery.format(f"SELECT '{formatted_condition}'").split("SELECT ")[1]
|
|
62
|
+
return f"DELETE WHERE {quoted_condition[1:-1]}"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@lru_cache(maxsize=2**13)
|
|
66
|
+
def sql_get_used_tables_cached(
|
|
67
|
+
sql: str,
|
|
68
|
+
raising: bool = False,
|
|
69
|
+
default_database: str = "",
|
|
70
|
+
table_functions: bool = True,
|
|
71
|
+
function_allow_list: Optional[FrozenSet[str]] = None,
|
|
72
|
+
) -> List[Tuple[str, str, str]]:
|
|
73
|
+
"""More like: get used sql names
|
|
74
|
+
|
|
75
|
+
Returns a list of tuples: (database_or_namespace, table_name, table_func).
|
|
76
|
+
>>> sql_get_used_tables("SELECT 1 FROM the_table")
|
|
77
|
+
[('', 'the_table', '')]
|
|
78
|
+
>>> sql_get_used_tables("SELECT 1 FROM the_database.the_table")
|
|
79
|
+
[('the_database', 'the_table', '')]
|
|
80
|
+
>>> sql_get_used_tables("SELECT * from numbers(100)")
|
|
81
|
+
[('', '', 'numbers')]
|
|
82
|
+
>>> sql_get_used_tables("SELECT * FROM table1, table2")
|
|
83
|
+
[('', 'table1', ''), ('', 'table2', '')]
|
|
84
|
+
>>> sql_get_used_tables("SELECT * FROM table1, table2", table_functions=False)
|
|
85
|
+
[('', 'table1', ''), ('', 'table2', '')]
|
|
86
|
+
>>> sql_get_used_tables("SELECT * FROM numbers(100)", table_functions=False)
|
|
87
|
+
[]
|
|
88
|
+
>>> sql_get_used_tables("SELECT * FROM table1, numbers(100)", table_functions=False)
|
|
89
|
+
[('', 'table1', '')]
|
|
90
|
+
>>> sql_get_used_tables("SELECT * FROM `d_d3926a`.`t_976af08ec4b547419e729c63e754b17b`", table_functions=False)
|
|
91
|
+
[('d_d3926a', 't_976af08ec4b547419e729c63e754b17b', '')]
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
_function_allow_list = list() if function_allow_list is None else list(function_allow_list)
|
|
95
|
+
|
|
96
|
+
tables: List[Tuple[str, str, str]] = chquery.tables(
|
|
97
|
+
sql, default_database=default_database, function_allow_list=_function_allow_list
|
|
98
|
+
)
|
|
99
|
+
if not table_functions:
|
|
100
|
+
return [(t[0], t[1], "") for t in tables if t[0] or t[1]]
|
|
101
|
+
return tables
|
|
102
|
+
except ValueError as e:
|
|
103
|
+
if raising:
|
|
104
|
+
msg = str(e)
|
|
105
|
+
if "is restricted. Contact support@tinybird.co" in msg:
|
|
106
|
+
raise InvalidFunction(msg=msg) from e
|
|
107
|
+
elif "Unknown function tb_secret" in msg:
|
|
108
|
+
raise InvalidFunction(msg="Unknown function tb_secret. Usage: {{tb_secret('secret_name')}}") from e
|
|
109
|
+
elif "Unknown function tb_var" in msg:
|
|
110
|
+
raise InvalidFunction(msg="Unknown function tb_var. Usage: {{tb_var('var_name')}}") from e
|
|
111
|
+
raise
|
|
112
|
+
return [(default_database, sql, "")]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def sql_get_used_tables(
|
|
116
|
+
sql: str,
|
|
117
|
+
raising: bool = False,
|
|
118
|
+
default_database: str = "",
|
|
119
|
+
table_functions: bool = True,
|
|
120
|
+
function_allow_list: Optional[FrozenSet[str]] = None,
|
|
121
|
+
) -> List[Tuple[str, str, str]]:
|
|
122
|
+
"""More like: get used sql names
|
|
123
|
+
|
|
124
|
+
Returns a list of tuples: (database_or_namespace, table_name, table_func).
|
|
125
|
+
"""
|
|
126
|
+
hashable_list = frozenset() if function_allow_list is None else function_allow_list
|
|
127
|
+
|
|
128
|
+
return copy.copy(
|
|
129
|
+
sql_get_used_tables_cached(sql, raising, default_database, table_functions, function_allow_list=hashable_list)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class ReplacementsDict(dict):
|
|
134
|
+
def __getitem__(self, key):
|
|
135
|
+
v = super().__getitem__(key)
|
|
136
|
+
if isinstance(v, tuple):
|
|
137
|
+
k, r = v
|
|
138
|
+
if callable(r):
|
|
139
|
+
r = r()
|
|
140
|
+
super().__setitem__(key, (k, r))
|
|
141
|
+
return k, r
|
|
142
|
+
if callable(v):
|
|
143
|
+
v = v()
|
|
144
|
+
super().__setitem__(key, v)
|
|
145
|
+
return v
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def tables_or_sql(replacement: dict, table_functions=False) -> set:
|
|
149
|
+
try:
|
|
150
|
+
return set(
|
|
151
|
+
sql_get_used_tables(
|
|
152
|
+
replacement[1], default_database=replacement[0], raising=True, table_functions=table_functions
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
if replacement[1][0] == "(":
|
|
157
|
+
raise e
|
|
158
|
+
return {replacement}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _separate_as_tuple_if_contains_database_and_table(definition: str) -> Any:
|
|
162
|
+
if "." in definition:
|
|
163
|
+
database_and_table_separated = definition.split(".")
|
|
164
|
+
return database_and_table_separated[0], database_and_table_separated[1]
|
|
165
|
+
return definition
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def replacements_to_tuples(replacements: dict) -> dict:
|
|
169
|
+
parsed_replacements = {}
|
|
170
|
+
for k, v in replacements.items():
|
|
171
|
+
parsed_replacements[_separate_as_tuple_if_contains_database_and_table(k)] = (
|
|
172
|
+
_separate_as_tuple_if_contains_database_and_table(v)
|
|
173
|
+
)
|
|
174
|
+
return parsed_replacements
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@lru_cache(maxsize=2**13)
|
|
178
|
+
def replace_tables_chquery_cached(
|
|
179
|
+
sql: str,
|
|
180
|
+
sorted_replacements: Optional[tuple] = None,
|
|
181
|
+
default_database: str = "",
|
|
182
|
+
output_one_line: bool = False,
|
|
183
|
+
timestamp: Optional[datetime] = None,
|
|
184
|
+
function_allow_list: Optional[FrozenSet[str]] = None,
|
|
185
|
+
) -> str:
|
|
186
|
+
replacements = dict(sorted_replacements) if sorted_replacements else {}
|
|
187
|
+
_function_allow_list = list() if function_allow_list is None else list(function_allow_list)
|
|
188
|
+
|
|
189
|
+
return chquery.replace_tables(
|
|
190
|
+
sql,
|
|
191
|
+
replacements,
|
|
192
|
+
default_database=default_database,
|
|
193
|
+
one_line=output_one_line,
|
|
194
|
+
function_allow_list=_function_allow_list,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def replace_tables(
|
|
199
|
+
sql: str,
|
|
200
|
+
replacements: dict,
|
|
201
|
+
default_database: str = "",
|
|
202
|
+
check_functions: bool = False,
|
|
203
|
+
only_replacements: bool = False,
|
|
204
|
+
valid_tables: Optional[Set[Tuple[str, str]]] = None,
|
|
205
|
+
output_one_line: bool = False,
|
|
206
|
+
timestamp: Optional[datetime] = None,
|
|
207
|
+
function_allow_list: Optional[FrozenSet[str]] = None,
|
|
208
|
+
) -> str:
|
|
209
|
+
"""
|
|
210
|
+
Given a query and a list of table replacements, returns the query after applying the table replacements.
|
|
211
|
+
It takes into account dependencies between replacement subqueries (if any)
|
|
212
|
+
It also validates the sql to verify it's valid and doesn't use unknown or prohibited functions
|
|
213
|
+
"""
|
|
214
|
+
hashable_list = frozenset() if function_allow_list is None else function_allow_list
|
|
215
|
+
if not replacements:
|
|
216
|
+
# Always call replace_tables to do validation and formatting
|
|
217
|
+
return replace_tables_chquery_cached(
|
|
218
|
+
sql, None, output_one_line=output_one_line, timestamp=timestamp, function_allow_list=hashable_list
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
_replaced_with = set()
|
|
222
|
+
_replacements = ReplacementsDict()
|
|
223
|
+
for k, r in replacements.items():
|
|
224
|
+
rk = k if isinstance(k, tuple) else (default_database, k)
|
|
225
|
+
_replacements[rk] = r if isinstance(r, tuple) else (default_database, r)
|
|
226
|
+
_replaced_with.add(r)
|
|
227
|
+
|
|
228
|
+
deps: defaultdict = defaultdict(set)
|
|
229
|
+
_tables = sql_get_used_tables(
|
|
230
|
+
sql,
|
|
231
|
+
default_database=default_database,
|
|
232
|
+
raising=True,
|
|
233
|
+
table_functions=check_functions,
|
|
234
|
+
function_allow_list=function_allow_list,
|
|
235
|
+
)
|
|
236
|
+
seen_tables = set()
|
|
237
|
+
table: Tuple[str, str] | Tuple[str, str, str]
|
|
238
|
+
if function_allow_list is None:
|
|
239
|
+
_enabled_table_functions = ENABLED_TABLE_FUNCTIONS
|
|
240
|
+
else:
|
|
241
|
+
_enabled_table_functions = ENABLED_TABLE_FUNCTIONS.union(set(function_allow_list))
|
|
242
|
+
while _tables:
|
|
243
|
+
table = _tables.pop()
|
|
244
|
+
if len(table) == 3:
|
|
245
|
+
first_table, second_table, last_table = table
|
|
246
|
+
if last_table and last_table not in _enabled_table_functions:
|
|
247
|
+
raise InvalidFunction(table_function_name=last_table)
|
|
248
|
+
if first_table or second_table:
|
|
249
|
+
table = (first_table, second_table)
|
|
250
|
+
else:
|
|
251
|
+
continue
|
|
252
|
+
seen_tables.add(table)
|
|
253
|
+
if table in _replacements:
|
|
254
|
+
replacement = _replacements[table]
|
|
255
|
+
dependent_tables = tables_or_sql(replacement, table_functions=check_functions)
|
|
256
|
+
deps[table] |= {(d[0], d[1]) for d in dependent_tables}
|
|
257
|
+
for dependent_table in list(dependent_tables):
|
|
258
|
+
if len(dependent_table) == 3:
|
|
259
|
+
if (
|
|
260
|
+
dependent_table[2]
|
|
261
|
+
and dependent_table[2] not in _enabled_table_functions
|
|
262
|
+
and not (dependent_table[2] in ["cluster"] and replacement[0] == VALID_REMOTE)
|
|
263
|
+
):
|
|
264
|
+
raise InvalidFunction(table_function_name=dependent_table[2])
|
|
265
|
+
if dependent_table[0] or dependent_table[1]:
|
|
266
|
+
dependent_table = (dependent_table[0], dependent_table[1])
|
|
267
|
+
else:
|
|
268
|
+
continue
|
|
269
|
+
if dependent_table not in seen_tables:
|
|
270
|
+
_tables.append(dependent_table)
|
|
271
|
+
else:
|
|
272
|
+
deps[table] |= set()
|
|
273
|
+
deps_sorted = list(reversed(list(toposort(deps))))
|
|
274
|
+
|
|
275
|
+
if not deps_sorted:
|
|
276
|
+
return replace_tables_chquery_cached(
|
|
277
|
+
sql, None, output_one_line=output_one_line, timestamp=timestamp, function_allow_list=hashable_list
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
for current_deps in deps_sorted:
|
|
281
|
+
current_replacements = {}
|
|
282
|
+
for r in current_deps:
|
|
283
|
+
if r in _replacements:
|
|
284
|
+
replacement = _replacements[r]
|
|
285
|
+
current_replacements[r] = replacement
|
|
286
|
+
else:
|
|
287
|
+
if only_replacements:
|
|
288
|
+
continue
|
|
289
|
+
database, table_name = r
|
|
290
|
+
if (
|
|
291
|
+
table_name
|
|
292
|
+
and default_database != ""
|
|
293
|
+
and is_invalid_resource(r, database, default_database, _replaced_with, valid_tables)
|
|
294
|
+
):
|
|
295
|
+
logging.info(
|
|
296
|
+
"Resource not found in replace_tables in sql_toolset: %s",
|
|
297
|
+
{
|
|
298
|
+
"r": r,
|
|
299
|
+
"default_database": default_database,
|
|
300
|
+
"_replaced_with": _replaced_with,
|
|
301
|
+
"valid_tables": valid_tables,
|
|
302
|
+
},
|
|
303
|
+
)
|
|
304
|
+
raise InvalidResource(database, table_name, default_database=default_database)
|
|
305
|
+
|
|
306
|
+
if current_replacements:
|
|
307
|
+
# We need to transform the dictionary into something cacheable, so a sorted tuple of tuples it is
|
|
308
|
+
r = tuple(sorted([(k, v) for k, v in current_replacements.items()]))
|
|
309
|
+
sql = replace_tables_chquery_cached(
|
|
310
|
+
sql,
|
|
311
|
+
r,
|
|
312
|
+
default_database=default_database,
|
|
313
|
+
output_one_line=output_one_line,
|
|
314
|
+
timestamp=timestamp,
|
|
315
|
+
function_allow_list=hashable_list,
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
sql = replace_tables_chquery_cached(
|
|
319
|
+
sql, None, output_one_line=output_one_line, timestamp=timestamp, function_allow_list=hashable_list
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
return sql
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def is_invalid_resource(
|
|
326
|
+
r: Tuple[str, str],
|
|
327
|
+
database: str,
|
|
328
|
+
default_database: str,
|
|
329
|
+
_replaced_with: Set[Tuple[str, str]],
|
|
330
|
+
valid_tables: Optional[Set[Tuple[str, str]]] = None,
|
|
331
|
+
) -> bool:
|
|
332
|
+
return is_invalid_resource_from_other_workspace(
|
|
333
|
+
r, database, default_database, _replaced_with
|
|
334
|
+
) or is_invalid_resource_from_current_workspace(r, database, default_database, _replaced_with, valid_tables)
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def is_invalid_resource_from_other_workspace(
|
|
338
|
+
r: Tuple[str, str], database: str, default_database: str, _replaced_with: Set[Tuple[str, str]]
|
|
339
|
+
) -> bool:
|
|
340
|
+
return database not in [default_database, "tinybird", VALID_REMOTE] and r not in _replaced_with
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def is_invalid_resource_from_current_workspace(
|
|
344
|
+
r: Tuple[str, str],
|
|
345
|
+
database: str,
|
|
346
|
+
default_database: str,
|
|
347
|
+
_replaced_with: Set[Tuple[str, str]],
|
|
348
|
+
valid_tables: Optional[Set[Tuple[str, str]]],
|
|
349
|
+
) -> bool:
|
|
350
|
+
return bool(database == default_database and valid_tables and r not in valid_tables and r not in _replaced_with)
|