tinybird 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/__cli__.py +8 -0
- tinybird/ch_utils/constants.py +244 -0
- tinybird/ch_utils/engine.py +855 -0
- tinybird/check_pypi.py +25 -0
- tinybird/client.py +1281 -0
- tinybird/config.py +117 -0
- tinybird/connectors.py +428 -0
- tinybird/context.py +23 -0
- tinybird/datafile.py +5589 -0
- tinybird/datatypes.py +434 -0
- tinybird/feedback_manager.py +1022 -0
- tinybird/git_settings.py +145 -0
- tinybird/sql.py +865 -0
- tinybird/sql_template.py +2343 -0
- tinybird/sql_template_fmt.py +281 -0
- tinybird/sql_toolset.py +350 -0
- tinybird/syncasync.py +682 -0
- tinybird/tb_cli.py +25 -0
- tinybird/tb_cli_modules/auth.py +252 -0
- tinybird/tb_cli_modules/branch.py +1043 -0
- tinybird/tb_cli_modules/cicd.py +434 -0
- tinybird/tb_cli_modules/cli.py +1571 -0
- tinybird/tb_cli_modules/common.py +2082 -0
- tinybird/tb_cli_modules/config.py +344 -0
- tinybird/tb_cli_modules/connection.py +803 -0
- tinybird/tb_cli_modules/datasource.py +900 -0
- tinybird/tb_cli_modules/exceptions.py +91 -0
- tinybird/tb_cli_modules/fmt.py +91 -0
- tinybird/tb_cli_modules/job.py +85 -0
- tinybird/tb_cli_modules/pipe.py +858 -0
- tinybird/tb_cli_modules/regions.py +9 -0
- tinybird/tb_cli_modules/tag.py +100 -0
- tinybird/tb_cli_modules/telemetry.py +310 -0
- tinybird/tb_cli_modules/test.py +107 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit.py +340 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit_lib.py +71 -0
- tinybird/tb_cli_modules/token.py +349 -0
- tinybird/tb_cli_modules/workspace.py +269 -0
- tinybird/tb_cli_modules/workspace_members.py +212 -0
- tinybird/tornado_template.py +1194 -0
- tinybird-0.0.1.dev0.dist-info/METADATA +2815 -0
- tinybird-0.0.1.dev0.dist-info/RECORD +45 -0
- tinybird-0.0.1.dev0.dist-info/WHEEL +5 -0
- tinybird-0.0.1.dev0.dist-info/entry_points.txt +2 -0
- tinybird-0.0.1.dev0.dist-info/top_level.txt +4 -0
|
@@ -0,0 +1,855 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
6
|
+
|
|
7
|
+
from ..sql import (
|
|
8
|
+
TableIndex,
|
|
9
|
+
TableProjection,
|
|
10
|
+
col_name,
|
|
11
|
+
engine_replicated_to_local,
|
|
12
|
+
parse_indexes_structure,
|
|
13
|
+
parse_table_structure,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
DEFAULT_EMPTY_PARAMETERS = ["ttl", "partition_key", "sorting_key"]
|
|
17
|
+
DEFAULT_JOIN_EMPTY_PARAMETERS = ["join_strictness", "join_type", "key_columns"]
|
|
18
|
+
|
|
19
|
+
# Currently we only support the simplest TTLs
|
|
20
|
+
# f(X) + toIntervalZ(N)
|
|
21
|
+
# * `f()` is an optional CH function or chain of functions (we don't care)
|
|
22
|
+
# * `X` is a column. We accept also spaces and / as some TTL do (column / 1000)
|
|
23
|
+
# * `+` is the exact char
|
|
24
|
+
# * `toInverval` are exact chars
|
|
25
|
+
# * `Z` is [AZaZ]*
|
|
26
|
+
# * `N` is a number
|
|
27
|
+
SIMPLE_TTL_DEFINITION = re.compile(r"""^([a-zA-Z0-9_\-\.\(\)\ \/\*]*) \+ (toInterval[a-zA-Z]*\([0-9]+\))$""")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TableDetails:
|
|
31
|
+
"""
|
|
32
|
+
>>> ed = TableDetails({})
|
|
33
|
+
>>> ed.engine_full == None
|
|
34
|
+
True
|
|
35
|
+
>>> ed.engine == ''
|
|
36
|
+
True
|
|
37
|
+
>>> ed.to_json()
|
|
38
|
+
{'engine_full': None, 'engine': ''}
|
|
39
|
+
>>> ed.to_datafile()
|
|
40
|
+
''
|
|
41
|
+
|
|
42
|
+
>>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048", "engine": "MergeTree", "partition_key": "toYear(timestamp)", "sorting_key": "timestamp, cityHash64(location)", "primary_key": "timestamp, cityHash64(location)", "sampling_key": "cityHash64(location)", "settings": "index_granularity = 32, index_granularity_bytes = 2048", "ttl": None })
|
|
43
|
+
>>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(1)")
|
|
44
|
+
True
|
|
45
|
+
>>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY", "engine": "MergeTree", "partition_key": "toYear(timestamp)", "sorting_key": "timestamp, cityHash64(location)", "primary_key": "timestamp, cityHash64(location)", "sampling_key": "cityHash64(location)", "settings": "index_granularity = 32, index_granularity_bytes = 2048", "ttl": "toDate(timestamp) + INTERVAL 1 DAY" })
|
|
46
|
+
>>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(1)")
|
|
47
|
+
False
|
|
48
|
+
>>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(2)")
|
|
49
|
+
True
|
|
50
|
+
>>> ed.diff_ttl("toDate(timestamp) + INTERVAL DAY 2")
|
|
51
|
+
True
|
|
52
|
+
>>> ed.diff_ttl("toDate(timestamp) + INTERVAL 1 DAY")
|
|
53
|
+
False
|
|
54
|
+
>>> ed.diff_ttl("")
|
|
55
|
+
True
|
|
56
|
+
>>> ed.engine_full
|
|
57
|
+
'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY'
|
|
58
|
+
>>> ed.engine
|
|
59
|
+
'MergeTree'
|
|
60
|
+
>>> ed.to_json()
|
|
61
|
+
{'engine_full': 'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY', 'engine': 'MergeTree', 'partition_key': 'toYear(timestamp)', 'sorting_key': 'timestamp, cityHash64(location)', 'sampling_key': 'cityHash64(location)', 'settings': 'index_granularity = 32, index_granularity_bytes = 2048', 'ttl': 'toDate(timestamp) + INTERVAL 1 DAY'}
|
|
62
|
+
>>> ed.to_datafile()
|
|
63
|
+
'ENGINE "MergeTree"\\nENGINE_PARTITION_KEY "toYear(timestamp)"\\nENGINE_SORTING_KEY "timestamp, cityHash64(location)"\\nENGINE_SAMPLING_KEY "cityHash64(location)"\\nENGINE_SETTINGS "index_granularity = 32, index_granularity_bytes = 2048"\\nENGINE_TTL "toDate(timestamp) + INTERVAL 1 DAY"'
|
|
64
|
+
|
|
65
|
+
>>> ed = TableDetails({"engine_full": "Join(ANY, LEFT, id)", "engine": "Join", "partition_key": "", "sorting_key": "", "primary_key": "", "sampling_key": ""})
|
|
66
|
+
>>> ed.engine_full
|
|
67
|
+
'Join(ANY, LEFT, id)'
|
|
68
|
+
>>> ed.engine
|
|
69
|
+
'Join'
|
|
70
|
+
>>> ed.to_json()
|
|
71
|
+
{'engine_full': 'Join(ANY, LEFT, id)', 'engine': 'Join', 'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
|
|
72
|
+
>>> ed.to_datafile()
|
|
73
|
+
'ENGINE "Join"\\nENGINE_JOIN_STRICTNESS "ANY"\\nENGINE_JOIN_TYPE "LEFT"\\nENGINE_KEY_COLUMNS "id"'
|
|
74
|
+
|
|
75
|
+
>>> ed = TableDetails({"database": "d_01", "name": "t_01", "engine": "Join", "join_strictness": "ANY", "join_type": "LEFT", "key_columns": "id"})
|
|
76
|
+
>>> ed.engine_full == None
|
|
77
|
+
True
|
|
78
|
+
>>> ed.engine
|
|
79
|
+
'Join'
|
|
80
|
+
>>> ed.to_json()
|
|
81
|
+
{'engine_full': None, 'engine': 'Join', 'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
|
|
82
|
+
>>> ed.to_datafile()
|
|
83
|
+
'ENGINE "Join"\\nENGINE_JOIN_STRICTNESS "ANY"\\nENGINE_JOIN_TYPE "LEFT"\\nENGINE_KEY_COLUMNS "id"'
|
|
84
|
+
>>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800 TTL toDate(timestamp) + INTERVAL 1 DAY"})
|
|
85
|
+
>>> ed.engine_full
|
|
86
|
+
'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800 TTL toDate(timestamp) + INTERVAL 1 DAY'
|
|
87
|
+
|
|
88
|
+
>>> x = TableDetails({'database': 'd_01', 'name': 't_01', 'create_table_query': "CREATE TABLE d_01.t_01 (`project_id` String, `project_name` String, `project_repo` String, `owner_id` String, `updated_at` DateTime64(3)) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_test_1ad5e496b29246e1ade99117e9180f6f.t_1bac899a56b34b33921fbf468b4500f7', '{replica}', updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32", 'engine': 'ReplicatedReplacingMergeTree', 'partition_key': 'tuple()', 'sorting_key': 'project_id', 'primary_key': 'project_id', 'sampling_key': '', 'engine_full': "ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_01.t_01', '{replica}', updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32", 'settings': 'index_granularity = 32', 'ttl': ''})
|
|
89
|
+
>>> x.primary_key
|
|
90
|
+
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self, details: Optional[Dict[str, Any]] = None):
|
|
94
|
+
self.details = details or {}
|
|
95
|
+
|
|
96
|
+
def __bool__(self):
|
|
97
|
+
return bool(self.details)
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def engine_full(self) -> Optional[str]:
|
|
101
|
+
_engine_full: Optional[str] = self.details.get("engine_full", None)
|
|
102
|
+
if not _engine_full:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
settings = self.details.get("settings", None)
|
|
106
|
+
# We cannot remove index_granularity = 8192 blindly because it might be followed by other settings
|
|
107
|
+
if settings and settings.strip().lower() == "index_granularity = 8192":
|
|
108
|
+
_engine_full = _engine_full.replace(" SETTINGS index_granularity = 8192", "")
|
|
109
|
+
return engine_replicated_to_local(_engine_full)
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def original_engine_full(self) -> Optional[str]:
|
|
113
|
+
return self.details.get("engine_full", None)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def name(self) -> str:
|
|
117
|
+
return self.details.get("name", "")
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def database(self) -> str:
|
|
121
|
+
return self.details.get("database", "")
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def engine(self) -> str:
|
|
125
|
+
_engine = self.details.get("engine", "")
|
|
126
|
+
return _engine and _engine.replace("Replicated", "")
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def original_engine(self) -> Optional[str]:
|
|
130
|
+
return self.details.get("engine", None)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def version(self):
|
|
134
|
+
_version = self.details.get("version", None)
|
|
135
|
+
return _version
|
|
136
|
+
|
|
137
|
+
def is_replicated(self):
|
|
138
|
+
return "Replicated" in self.details.get("engine", None)
|
|
139
|
+
|
|
140
|
+
def is_mergetree_family(self) -> bool:
|
|
141
|
+
return self.engine is not None and "mergetree" in self.engine.lower()
|
|
142
|
+
|
|
143
|
+
def supports_alter_add_column(self) -> bool:
|
|
144
|
+
return self.is_mergetree_family() or (self.engine is not None and self.engine.lower() == "null")
|
|
145
|
+
|
|
146
|
+
def is_replacing_engine(self) -> bool:
|
|
147
|
+
if self.engine:
|
|
148
|
+
engine_lower = self.engine.lower()
|
|
149
|
+
is_aggregating = "aggregatingmergetree" in engine_lower
|
|
150
|
+
is_replacing = "replacingmergetree" in engine_lower
|
|
151
|
+
is_collapsing = "collapsingmergetree" in engine_lower
|
|
152
|
+
return is_aggregating or is_replacing or is_collapsing
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def diff_ttl(self, new_ttl: str) -> bool:
|
|
156
|
+
try:
|
|
157
|
+
from tinybird.sql_toolset import format_sql
|
|
158
|
+
|
|
159
|
+
current_ttl = format_sql(f"select {self.ttl}")[7:]
|
|
160
|
+
new_ttl = format_sql(f"select {new_ttl}")[7:]
|
|
161
|
+
return current_ttl != new_ttl
|
|
162
|
+
except Exception:
|
|
163
|
+
return self.ttl != new_ttl
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def partition_key(self) -> Optional[str]:
|
|
167
|
+
return self.details.get("partition_key", None)
|
|
168
|
+
|
|
169
|
+
@property
|
|
170
|
+
def sorting_key(self) -> Optional[str]:
|
|
171
|
+
_sorting_key = self.details.get("sorting_key", None)
|
|
172
|
+
if self.is_replacing_engine() and not _sorting_key:
|
|
173
|
+
raise ValueError(f"SORTING_KEY must be defined for the {self.engine} engine")
|
|
174
|
+
if self.is_mergetree_family():
|
|
175
|
+
return _sorting_key or "tuple()"
|
|
176
|
+
return _sorting_key
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def primary_key(self) -> Optional[str]:
|
|
180
|
+
_primary_key = self.details.get("primary_key", None)
|
|
181
|
+
# When querying `system.tables`, it will return the `sorting_key` as `primary_key` even if it was not specify
|
|
182
|
+
# So we need to ignore it
|
|
183
|
+
if self.sorting_key == _primary_key:
|
|
184
|
+
return None
|
|
185
|
+
return _primary_key
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def sampling_key(self) -> Optional[str]:
|
|
189
|
+
return self.details.get("sampling_key", None)
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def settings(self):
|
|
193
|
+
settings = self.details.get("settings", None)
|
|
194
|
+
if settings and settings.strip().lower() != "index_granularity = 8192":
|
|
195
|
+
return settings
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def ttl(self):
|
|
199
|
+
return self.details.get("ttl", None)
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def ver(self):
|
|
203
|
+
_ver = self.details.get("ver", None)
|
|
204
|
+
return _ver
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def is_deleted(self):
|
|
208
|
+
_is_deleted = self.details.get("is_deleted", None)
|
|
209
|
+
return _is_deleted
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def columns(self):
|
|
213
|
+
_columns = self.details.get("columns", None)
|
|
214
|
+
return _columns
|
|
215
|
+
|
|
216
|
+
@property
|
|
217
|
+
def sign(self):
|
|
218
|
+
_sign = self.details.get("sign", None)
|
|
219
|
+
return _sign
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def join_strictness(self):
|
|
223
|
+
_join_strictness = self.details.get("join_strictness", None)
|
|
224
|
+
return _join_strictness
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def join_type(self):
|
|
228
|
+
_join_type = self.details.get("join_type", None)
|
|
229
|
+
return _join_type
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def key_columns(self):
|
|
233
|
+
_key_columns = self.details.get("key_columns", None)
|
|
234
|
+
return _key_columns
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def statistics(self) -> Dict[str, Any]:
|
|
238
|
+
return {
|
|
239
|
+
"bytes": self.details.get("total_bytes", None),
|
|
240
|
+
"row_count": self.details.get("total_rows", None),
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def indexes(self) -> List[TableIndex]:
|
|
245
|
+
return _parse_indexes(str(self.details.get("create_table_query", "")))
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def projections(self) -> List[TableProjection]:
|
|
249
|
+
return _parse_projections(self.details.get("create_table_query", ""))
|
|
250
|
+
|
|
251
|
+
def to_json(self, exclude: Optional[List[str]] = None, include_empty_details: bool = False):
|
|
252
|
+
# name, database are not exported since they are not part of the engine
|
|
253
|
+
d: Dict[str, Any] = {
|
|
254
|
+
"engine_full": self.engine_full,
|
|
255
|
+
"engine": self.engine,
|
|
256
|
+
}
|
|
257
|
+
if self.partition_key:
|
|
258
|
+
d["partition_key"] = self.partition_key
|
|
259
|
+
if self.sorting_key:
|
|
260
|
+
d["sorting_key"] = self.sorting_key
|
|
261
|
+
if self.primary_key:
|
|
262
|
+
d["primary_key"] = self.primary_key
|
|
263
|
+
if self.sampling_key:
|
|
264
|
+
d["sampling_key"] = self.sampling_key
|
|
265
|
+
if self.settings:
|
|
266
|
+
d["settings"] = self.settings
|
|
267
|
+
if self.join_strictness:
|
|
268
|
+
d["join_strictness"] = self.join_strictness
|
|
269
|
+
if self.join_type:
|
|
270
|
+
d["join_type"] = self.join_type
|
|
271
|
+
if self.key_columns:
|
|
272
|
+
d["key_columns"] = self.key_columns
|
|
273
|
+
if self.ver:
|
|
274
|
+
d["ver"] = self.ver
|
|
275
|
+
if self.is_deleted:
|
|
276
|
+
d["is_deleted"] = self.is_deleted
|
|
277
|
+
if self.sign:
|
|
278
|
+
d["sign"] = self.sign
|
|
279
|
+
if self.version:
|
|
280
|
+
d["version"] = self.version
|
|
281
|
+
if self.ttl:
|
|
282
|
+
d["ttl"] = self.ttl.strip()
|
|
283
|
+
if self.indexes:
|
|
284
|
+
d["indexes"] = [asdict(index) for index in self.indexes]
|
|
285
|
+
|
|
286
|
+
if self.engine_full:
|
|
287
|
+
engine_params = engine_params_from_engine_full(self.engine_full)
|
|
288
|
+
d = {**d, **engine_params}
|
|
289
|
+
|
|
290
|
+
if include_empty_details:
|
|
291
|
+
if self.engine and self.engine.lower() == "join":
|
|
292
|
+
d = set_empty_details(d, DEFAULT_JOIN_EMPTY_PARAMETERS)
|
|
293
|
+
else:
|
|
294
|
+
d = set_empty_details(d, DEFAULT_EMPTY_PARAMETERS)
|
|
295
|
+
|
|
296
|
+
if exclude:
|
|
297
|
+
for attr in exclude:
|
|
298
|
+
if attr in d:
|
|
299
|
+
del d[attr]
|
|
300
|
+
|
|
301
|
+
return d
|
|
302
|
+
|
|
303
|
+
def to_datafile(self, include_empty_details: bool = False) -> str:
|
|
304
|
+
d: Dict[str, Any] = self.to_json(
|
|
305
|
+
exclude=["engine", "engine_full", "indexes"], include_empty_details=include_empty_details
|
|
306
|
+
)
|
|
307
|
+
engine: str = self.engine
|
|
308
|
+
|
|
309
|
+
datafile: str = ""
|
|
310
|
+
if engine:
|
|
311
|
+
datafile += "\n".join(
|
|
312
|
+
[f'ENGINE "{engine}"'] + [f'ENGINE_{k.upper()} "{v}"' for k, v in d.items() if v is not None]
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
return datafile
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def set_empty_details(details: Dict[str, str], parameters: Iterable[str]):
|
|
319
|
+
for parameter in parameters:
|
|
320
|
+
if parameter not in details:
|
|
321
|
+
details[parameter] = ""
|
|
322
|
+
|
|
323
|
+
return details
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
class EngineOption:
|
|
327
|
+
def __init__(
|
|
328
|
+
self,
|
|
329
|
+
name: str,
|
|
330
|
+
sql: str,
|
|
331
|
+
required: Optional[bool] = None,
|
|
332
|
+
default_value=None,
|
|
333
|
+
is_valid: Optional[Callable[[List[Dict[str, Any]], str], Optional[str]]] = None,
|
|
334
|
+
tb_param: Optional[str] = None,
|
|
335
|
+
):
|
|
336
|
+
self.name = name
|
|
337
|
+
self.sql = sql
|
|
338
|
+
self.required = required
|
|
339
|
+
self.default_value = default_value
|
|
340
|
+
self.is_valid = is_valid
|
|
341
|
+
self.tb_param = tb_param if tb_param else "_".join(["engine", name])
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
class EngineParam:
|
|
345
|
+
def __init__(
|
|
346
|
+
self,
|
|
347
|
+
name: str,
|
|
348
|
+
required: Optional[bool] = None,
|
|
349
|
+
default_value=None,
|
|
350
|
+
is_valid: Optional[Callable[[List[Dict[str, Any]], str], Optional[str]]] = None,
|
|
351
|
+
tb_param: Optional[str] = None,
|
|
352
|
+
):
|
|
353
|
+
self.name = name
|
|
354
|
+
self.required = required
|
|
355
|
+
self.default_value = default_value
|
|
356
|
+
self.is_valid = is_valid
|
|
357
|
+
self.tb_param = tb_param if tb_param else "_".join(["engine", name])
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def engine_config(name: str, params: Optional[List[EngineParam]] = None, options: Optional[List[EngineOption]] = None):
|
|
361
|
+
params = params or []
|
|
362
|
+
options = options or []
|
|
363
|
+
return (name, (params, options))
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def column_is_valid(columns: List[Dict[str, Any]], column_name: str) -> str:
|
|
367
|
+
schema_columns = [col_name(c["name"], backquotes=False) for c in columns]
|
|
368
|
+
if column_name not in schema_columns:
|
|
369
|
+
raise ValueError(f"'{column_name}' column is not present in schema")
|
|
370
|
+
return col_name(column_name, backquotes=False)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def columns_are_valid(columns: List[Dict[str, Any]], column_names: str) -> str:
|
|
374
|
+
schema_columns = [col_name(c["name"], backquotes=False) for c in columns]
|
|
375
|
+
new_column_names = []
|
|
376
|
+
for column_name in [x.strip() for x in column_names.split(",")]:
|
|
377
|
+
if column_name not in schema_columns:
|
|
378
|
+
raise ValueError(f"'{column_name}' column is not present in schema")
|
|
379
|
+
new_column_names.append(col_name(column_name, backquotes=False))
|
|
380
|
+
return ", ".join(new_column_names)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def sorting_key_is_valid(columns: List[Dict[str, Any]], value: Optional[str]) -> str:
|
|
384
|
+
INVALID_SORTING_KEYS = ["tuple()"]
|
|
385
|
+
|
|
386
|
+
if not value:
|
|
387
|
+
raise ValueError("Sorting key can not be empty")
|
|
388
|
+
if value in INVALID_SORTING_KEYS:
|
|
389
|
+
raise ValueError(f"'{value}' is not a valid sorting key")
|
|
390
|
+
return value
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def case_insensitive_check(valid_values: List[str]) -> Callable[[List[Dict[str, Any]], str], Optional[str]]:
|
|
394
|
+
"""
|
|
395
|
+
>>> valid_values = ['ANY', 'ALL']
|
|
396
|
+
>>> checker = case_insensitive_check(valid_values)
|
|
397
|
+
>>> checker([],'ALL')
|
|
398
|
+
|
|
399
|
+
>>> valid_values = ['ANY', 'ALL']
|
|
400
|
+
>>> checker = case_insensitive_check(valid_values)
|
|
401
|
+
>>> checker([],'any')
|
|
402
|
+
|
|
403
|
+
>>> valid_values = ['ANY', 'ALL']
|
|
404
|
+
>>> checker = case_insensitive_check(valid_values)
|
|
405
|
+
>>> checker([],'foo')
|
|
406
|
+
Traceback (most recent call last):
|
|
407
|
+
...
|
|
408
|
+
ValueError: valid values are ANY, ALL
|
|
409
|
+
"""
|
|
410
|
+
|
|
411
|
+
def checker(columns: List[Dict[str, Any]], value: str):
|
|
412
|
+
if value.upper() not in valid_values:
|
|
413
|
+
raise ValueError(f"valid values are {', '.join(valid_values)}")
|
|
414
|
+
|
|
415
|
+
return checker
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
# [PARTITION BY expr]
|
|
419
|
+
# [ORDER BY expr]
|
|
420
|
+
# [PRIMARY KEY expr]
|
|
421
|
+
# [SAMPLE BY expr]
|
|
422
|
+
# [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
|
|
423
|
+
# [SETTINGS name=value, ...]
|
|
424
|
+
MERGETREE_OPTIONS = [
|
|
425
|
+
EngineOption(name="partition_key", sql="PARTITION BY"),
|
|
426
|
+
EngineOption(name="sorting_key", sql="ORDER BY", default_value="tuple()"),
|
|
427
|
+
EngineOption(name="primary_key", sql="PRIMARY KEY"),
|
|
428
|
+
EngineOption(name="sampling_key", sql="SAMPLE BY"),
|
|
429
|
+
EngineOption(name="ttl", sql="TTL"),
|
|
430
|
+
EngineOption(name="settings", sql="SETTINGS"),
|
|
431
|
+
]
|
|
432
|
+
REPLACINGMERGETREE_OPTIONS = [
|
|
433
|
+
EngineOption(name="partition_key", sql="PARTITION BY"),
|
|
434
|
+
EngineOption(name="sorting_key", sql="ORDER BY", required=True, is_valid=sorting_key_is_valid),
|
|
435
|
+
EngineOption(name="primary_key", sql="PRIMARY KEY"),
|
|
436
|
+
EngineOption(name="sampling_key", sql="SAMPLE BY"),
|
|
437
|
+
EngineOption(name="ttl", sql="TTL"),
|
|
438
|
+
EngineOption(name="settings", sql="SETTINGS"),
|
|
439
|
+
]
|
|
440
|
+
ENABLED_ENGINES = [
|
|
441
|
+
# MergeTree()
|
|
442
|
+
engine_config("MergeTree", options=MERGETREE_OPTIONS),
|
|
443
|
+
# ReplacingMergeTree([ver])
|
|
444
|
+
engine_config(
|
|
445
|
+
"ReplacingMergeTree",
|
|
446
|
+
[EngineParam(name="ver", is_valid=column_is_valid), EngineParam(name="is_deleted", is_valid=column_is_valid)],
|
|
447
|
+
REPLACINGMERGETREE_OPTIONS,
|
|
448
|
+
),
|
|
449
|
+
# SummingMergeTree([columns])
|
|
450
|
+
engine_config(
|
|
451
|
+
"SummingMergeTree",
|
|
452
|
+
[
|
|
453
|
+
# This should check the columns are numeric ones
|
|
454
|
+
EngineParam(name="columns", is_valid=columns_are_valid),
|
|
455
|
+
],
|
|
456
|
+
MERGETREE_OPTIONS,
|
|
457
|
+
),
|
|
458
|
+
# AggregatingMergeTree()
|
|
459
|
+
engine_config("AggregatingMergeTree", options=REPLACINGMERGETREE_OPTIONS),
|
|
460
|
+
# CollapsingMergeTree(sign)
|
|
461
|
+
engine_config(
|
|
462
|
+
"CollapsingMergeTree",
|
|
463
|
+
[EngineParam(name="sign", required=True, is_valid=column_is_valid)],
|
|
464
|
+
REPLACINGMERGETREE_OPTIONS,
|
|
465
|
+
),
|
|
466
|
+
# VersionedCollapsingMergeTree(sign, version)
|
|
467
|
+
engine_config(
|
|
468
|
+
"VersionedCollapsingMergeTree",
|
|
469
|
+
[
|
|
470
|
+
EngineParam(name="sign", required=True, is_valid=column_is_valid),
|
|
471
|
+
EngineParam(name="version", required=True, is_valid=column_is_valid),
|
|
472
|
+
],
|
|
473
|
+
MERGETREE_OPTIONS,
|
|
474
|
+
),
|
|
475
|
+
# Join(join_strictness, join_type, k1[, k2, ...])
|
|
476
|
+
engine_config(
|
|
477
|
+
"Join",
|
|
478
|
+
[
|
|
479
|
+
# https://github.com/ClickHouse/ClickHouse/blob/fa8e4e4735b932f08b6beffcb2d069b72de34401/src/Storages/StorageJoin.cpp
|
|
480
|
+
EngineParam(
|
|
481
|
+
name="join_strictness", required=True, is_valid=case_insensitive_check(["ANY", "ALL", "SEMI", "ANTI"])
|
|
482
|
+
),
|
|
483
|
+
EngineParam(
|
|
484
|
+
name="join_type", required=True, is_valid=case_insensitive_check(["LEFT", "INNER", "RIGHT", "FULL"])
|
|
485
|
+
),
|
|
486
|
+
EngineParam(name="key_columns", required=True, is_valid=columns_are_valid),
|
|
487
|
+
],
|
|
488
|
+
),
|
|
489
|
+
# Null()
|
|
490
|
+
engine_config("Null"),
|
|
491
|
+
]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def get_engine_config(engine: str):
|
|
495
|
+
for name, config in ENABLED_ENGINES:
|
|
496
|
+
if engine.lower() == name.lower():
|
|
497
|
+
return (name, config)
|
|
498
|
+
raise ValueError(
|
|
499
|
+
f"Engine {engine} is not supported, supported engines include: {', '.join([e[0] for e in ENABLED_ENGINES])}"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def engine_params(columns, params: List[EngineParam], args: Dict):
|
|
504
|
+
params_values = []
|
|
505
|
+
for p in params:
|
|
506
|
+
if p.required and p.name not in args:
|
|
507
|
+
raise ValueError(f"Missing required parameter '{p.name}'")
|
|
508
|
+
param_value = args.get(p.name, None) or p.default_value
|
|
509
|
+
if not param_value:
|
|
510
|
+
continue
|
|
511
|
+
if p.is_valid:
|
|
512
|
+
check_is_valid(
|
|
513
|
+
valid_check=p.is_valid, check_type="parameter", columns=columns, tb_param=p.tb_param, value=param_value
|
|
514
|
+
)
|
|
515
|
+
params_values.append(param_value)
|
|
516
|
+
return params_values
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def engine_options(columns, options: List[EngineOption], args: Dict):
|
|
520
|
+
options_values = []
|
|
521
|
+
engine_settings = ""
|
|
522
|
+
|
|
523
|
+
for o in options:
|
|
524
|
+
if o.required and o.name not in args:
|
|
525
|
+
raise ValueError(f"Missing required option '{o.name}'")
|
|
526
|
+
option_value = args.get(o.name) or o.default_value
|
|
527
|
+
if o.is_valid:
|
|
528
|
+
check_is_valid(
|
|
529
|
+
valid_check=o.is_valid, check_type="option", columns=columns, tb_param=o.tb_param, value=option_value
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
if option_value:
|
|
533
|
+
if o.sql.lower() == "settings":
|
|
534
|
+
engine_settings = f"{o.sql} {option_value}"
|
|
535
|
+
else:
|
|
536
|
+
options_values.append(f"{o.sql} ({option_value})")
|
|
537
|
+
|
|
538
|
+
if engine_settings:
|
|
539
|
+
options_values.append(engine_settings)
|
|
540
|
+
|
|
541
|
+
return options_values
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def check_is_valid(
|
|
545
|
+
valid_check: Callable[[List[Dict[str, Any]], str], Optional[str]],
|
|
546
|
+
check_type: str,
|
|
547
|
+
columns: List[Dict[str, Any]],
|
|
548
|
+
tb_param: str,
|
|
549
|
+
value: str,
|
|
550
|
+
):
|
|
551
|
+
"""
|
|
552
|
+
>>> check_is_valid(sorting_key_is_valid, 'option', ['column-name'], 'sorting_key', 'date')
|
|
553
|
+
|
|
554
|
+
>>> check_is_valid(sorting_key_is_valid, 'option', ['column-name'], 'sorting_key', 'tuple()')
|
|
555
|
+
Traceback (most recent call last):
|
|
556
|
+
...
|
|
557
|
+
ValueError: Invalid value 'tuple()' for option 'sorting_key', reason: 'tuple()' is not a valid sorting key
|
|
558
|
+
"""
|
|
559
|
+
try:
|
|
560
|
+
new_value = valid_check(columns, value)
|
|
561
|
+
if new_value:
|
|
562
|
+
value = new_value
|
|
563
|
+
except Exception as e:
|
|
564
|
+
raise ValueError(f"Invalid value '{value}' for {check_type} '{tb_param}', reason: {e}")
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def build_engine(
|
|
568
|
+
engine: str, columns: Optional[List], params: List[EngineParam], options: List[EngineOption], args: Dict
|
|
569
|
+
):
|
|
570
|
+
return f"{engine}({', '.join(engine_params(columns, params, args))}) {' '.join(engine_options(columns, options, args))}".strip()
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def engine_full_from_dict(
|
|
574
|
+
engine: str, args: dict, schema: Optional[str] = None, columns: Optional[List[Dict[str, Any]]] = None
|
|
575
|
+
):
|
|
576
|
+
"""
|
|
577
|
+
>>> schema = ''
|
|
578
|
+
>>> engine_full_from_dict('wadus', {}, schema=schema)
|
|
579
|
+
Traceback (most recent call last):
|
|
580
|
+
...
|
|
581
|
+
ValueError: Engine wadus is not supported, supported engines include: MergeTree, ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree, VersionedCollapsingMergeTree, Join, Null
|
|
582
|
+
>>> schema = ''
|
|
583
|
+
>>> engine_full_from_dict('null', {}, schema=schema)
|
|
584
|
+
'Null()'
|
|
585
|
+
>>> schema = ''
|
|
586
|
+
>>> engine_full_from_dict('null', {}, columns=[])
|
|
587
|
+
'Null()'
|
|
588
|
+
|
|
589
|
+
>>> schema = 'cid Int32'
|
|
590
|
+
>>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid'}, schema=schema)
|
|
591
|
+
'Join(ANY, LEFT, cid)'
|
|
592
|
+
>>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid'}, columns=[{'name': 'cid', 'type': 'Int32', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'cid'}])
|
|
593
|
+
'Join(ANY, LEFT, cid)'
|
|
594
|
+
>>> schema = 'cid1 Int32, cid2 Int8'
|
|
595
|
+
>>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid1, cid2'}, schema=schema)
|
|
596
|
+
'Join(ANY, LEFT, cid1, cid2)'
|
|
597
|
+
>>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'OUTER', 'key_columns': 'cid'}, schema=schema)
|
|
598
|
+
Traceback (most recent call last):
|
|
599
|
+
...
|
|
600
|
+
ValueError: Invalid value 'OUTER' for parameter 'engine_join_type', reason: valid values are LEFT, INNER, RIGHT, FULL
|
|
601
|
+
|
|
602
|
+
>>> schema = ''
|
|
603
|
+
>>> engine_full_from_dict('MergeTree', {}, schema=schema)
|
|
604
|
+
'MergeTree() ORDER BY (tuple())'
|
|
605
|
+
>>> engine_full_from_dict('MergeTree', {'sorting_key': 'local_date, cod_store'}, schema=schema)
|
|
606
|
+
'MergeTree() ORDER BY (local_date, cod_store)'
|
|
607
|
+
>>> engine_full_from_dict('MergeTree', {'partition_key': 'toDate(timestamp)', 'sorting_key': 'local_date, cod_store', 'settings': 'index_granularity = 32, index_granularity_bytes = 2048', 'ttl': 'toDate(local_date) + INTERVAL 1 DAY'}, schema=schema)
|
|
608
|
+
'MergeTree() PARTITION BY (toDate(timestamp)) ORDER BY (local_date, cod_store) TTL (toDate(local_date) + INTERVAL 1 DAY) SETTINGS index_granularity = 32, index_granularity_bytes = 2048'
|
|
609
|
+
|
|
610
|
+
>>> schema = ''
|
|
611
|
+
>>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column'}, schema=schema)
|
|
612
|
+
Traceback (most recent call last):
|
|
613
|
+
...
|
|
614
|
+
ValueError: Invalid value 'sign_column' for parameter 'engine_sign', reason: 'sign_column' column is not present in schema
|
|
615
|
+
|
|
616
|
+
>>> schema = 'sign_column Int8'
|
|
617
|
+
>>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column'}, schema=schema)
|
|
618
|
+
Traceback (most recent call last):
|
|
619
|
+
...
|
|
620
|
+
ValueError: Missing required option 'sorting_key'
|
|
621
|
+
|
|
622
|
+
>>> schema = 'sign_column Int8, key_column Int8'
|
|
623
|
+
>>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column', 'sorting_key': 'key_column'}, schema=schema)
|
|
624
|
+
'CollapsingMergeTree(sign_column) ORDER BY (key_column)'
|
|
625
|
+
|
|
626
|
+
>>> columns=[]
|
|
627
|
+
>>> columns.append({'name': 'sign_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'sign_column'})
|
|
628
|
+
>>> columns.append({'name': 'key_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'key_column'})
|
|
629
|
+
>>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column', 'sorting_key': 'key_column' }, columns=columns)
|
|
630
|
+
'CollapsingMergeTree(sign_column) ORDER BY (key_column)'
|
|
631
|
+
|
|
632
|
+
>>> schema = 'sign_column Int8'
|
|
633
|
+
>>> engine_full_from_dict('AggregatingMergeTree', {}, schema=schema)
|
|
634
|
+
Traceback (most recent call last):
|
|
635
|
+
...
|
|
636
|
+
ValueError: Missing required option 'sorting_key'
|
|
637
|
+
|
|
638
|
+
>>> columns=[]
|
|
639
|
+
>>> columns.append({'name': 'key_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'key_column'})
|
|
640
|
+
>>> engine_full_from_dict('AggregatingMergeTree', { 'sorting_key': 'key_column' }, columns=columns)
|
|
641
|
+
'AggregatingMergeTree() ORDER BY (key_column)'
|
|
642
|
+
|
|
643
|
+
>>> schema = 'ver_column Int8, key_column Int8'
|
|
644
|
+
>>> engine_full_from_dict('ReplacingMergeTree', {}, schema=schema)
|
|
645
|
+
Traceback (most recent call last):
|
|
646
|
+
...
|
|
647
|
+
ValueError: Missing required option 'sorting_key'
|
|
648
|
+
|
|
649
|
+
>>> engine_full_from_dict('ReplacingMergeTree', {'sorting_key': 'key_column'}, schema=schema)
|
|
650
|
+
'ReplacingMergeTree() ORDER BY (key_column)'
|
|
651
|
+
|
|
652
|
+
>>> engine_full_from_dict('ReplacingMergeTree', {'ver': 'ver_column', 'sorting_key': 'key_column'}, schema=schema)
|
|
653
|
+
'ReplacingMergeTree(ver_column) ORDER BY (key_column)'
|
|
654
|
+
|
|
655
|
+
>>> engine_full_from_dict('ReplacingMergeTree', {'ver': 'other_column'}, schema=schema)
|
|
656
|
+
Traceback (most recent call last):
|
|
657
|
+
...
|
|
658
|
+
ValueError: Invalid value 'other_column' for parameter 'engine_ver', reason: 'other_column' column is not present in schema
|
|
659
|
+
|
|
660
|
+
>>> schema = 'col0 Int8, col1 Int8, col2 Int8'
|
|
661
|
+
>>> engine_full_from_dict('SummingMergeTree', {}, schema=schema)
|
|
662
|
+
'SummingMergeTree() ORDER BY (tuple())'
|
|
663
|
+
>>> engine_full_from_dict('SummingMergeTree', {'columns': 'col0'}, schema=schema)
|
|
664
|
+
'SummingMergeTree(col0) ORDER BY (tuple())'
|
|
665
|
+
>>> engine_full_from_dict('SummingMergeTree', {'columns': 'col0, col2'}, schema=schema)
|
|
666
|
+
'SummingMergeTree(col0, col2) ORDER BY (tuple())'
|
|
667
|
+
>>> engine_full_from_dict('SummingMergeTree', {'columns': 'col1, other_column'}, schema=schema)
|
|
668
|
+
Traceback (most recent call last):
|
|
669
|
+
...
|
|
670
|
+
ValueError: Invalid value 'col1, other_column' for parameter 'engine_columns', reason: 'other_column' column is not present in schema
|
|
671
|
+
>>> engine_full_from_dict('SummingMergeTree', {'columns': 'col1, other_column'}, schema=schema, columns=[])
|
|
672
|
+
Traceback (most recent call last):
|
|
673
|
+
...
|
|
674
|
+
ValueError: You can not use 'schema' and 'columns' at the same time
|
|
675
|
+
>>> engine_full_from_dict('ReplacingMergeTree', {'partition_key': 'tuple()', 'sorting_key': 'project_id', 'settings': 'index_granularity = 32', 'ver': 'updated_at'}, "`project_id` String, `project_name` String, `project_repo` String, `owner_id` String, `updated_at` DateTime64(3)")
|
|
676
|
+
'ReplacingMergeTree(updated_at) PARTITION BY (tuple()) ORDER BY (project_id) SETTINGS index_granularity = 32'
|
|
677
|
+
"""
|
|
678
|
+
|
|
679
|
+
if schema is not None and columns is not None:
|
|
680
|
+
raise ValueError("You can not use 'schema' and 'columns' at the same time")
|
|
681
|
+
engine_config = get_engine_config(engine)
|
|
682
|
+
name, (params, options) = engine_config
|
|
683
|
+
if columns is None and schema is not None:
|
|
684
|
+
columns = parse_table_structure(schema)
|
|
685
|
+
|
|
686
|
+
engine_settings = {key.replace("engine_", ""): value for key, value in args.items()}
|
|
687
|
+
|
|
688
|
+
for arg in engine_settings:
|
|
689
|
+
if not hasattr(TableDetails, arg):
|
|
690
|
+
raise ValueError(f"engine_{arg} is not a valid option")
|
|
691
|
+
|
|
692
|
+
return build_engine(name, columns, params, options, engine_settings)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def engine_params_from_engine_full(engine_full: str) -> Dict[str, Any]:
|
|
696
|
+
"""
|
|
697
|
+
>>> engine_params_from_engine_full("Null()")
|
|
698
|
+
{}
|
|
699
|
+
>>> engine_params_from_engine_full("Join(ANY, LEFT, id)")
|
|
700
|
+
{'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
|
|
701
|
+
>>> engine_params_from_engine_full("Join(ANY, LEFT, k1, k2)")
|
|
702
|
+
{'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'k1, k2'}
|
|
703
|
+
>>> engine_params_from_engine_full("AggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/d_f837aa.sales_by_country_rt__v0_staging_t_00c3091e7530472caebda05e97288a1d', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY (purchase_location, cod_device, date) SETTINGS index_granularity = 8192")
|
|
704
|
+
{}
|
|
705
|
+
>>> engine_params_from_engine_full("ReplicatedSummingMergeTree('/clickhouse/tables/{layer}-{shard}/d_abcf3e.t_69f9da31f4524995b8911e1b24c80ab4', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY (date, purchase_location, sku_rank_lc) SETTINGS index_granularity = 8192")
|
|
706
|
+
{}
|
|
707
|
+
>>> engine_params_from_engine_full("ReplicatedSummingMergeTree('/clickhouse/tables/{layer}-{shard}/d_abcf3e.t_69f9da31f4524995b8911e1b24c80ab4', '{replica}', c1, c2) PARTITION BY toYYYYMM(date) ORDER BY (date, purchase_location, sku_rank_lc) SETTINGS index_granularity = 8192")
|
|
708
|
+
{'columns': 'c1, c2'}
|
|
709
|
+
>>> engine_params_from_engine_full("ReplacingMergeTree(insert_date) ORDER BY date")
|
|
710
|
+
{'ver': 'insert_date'}
|
|
711
|
+
>>> engine_params_from_engine_full("ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_f837aa.t_d3aaad001dee4d9e9e3067ccb789fb59_n1', '{replica}', insert_date) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
712
|
+
{'ver': 'insert_date'}
|
|
713
|
+
>>> engine_params_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
714
|
+
{'sign': 'sign_c', 'version': 'version_c'}
|
|
715
|
+
>>> engine_params_from_engine_full("ReplacingMergeTree(updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32")
|
|
716
|
+
{'ver': 'updated_at'}
|
|
717
|
+
"""
|
|
718
|
+
engine_full = engine_replicated_to_local(engine_full)
|
|
719
|
+
for engine, (params, _options) in ENABLED_ENGINES:
|
|
720
|
+
if engine_full.startswith(engine):
|
|
721
|
+
m = re.search(rf"{engine}\(([^\)]*)\).*", engine_full)
|
|
722
|
+
params_used = []
|
|
723
|
+
if m:
|
|
724
|
+
params_used = [x.strip() for x in m.group(1).split(",")]
|
|
725
|
+
params_dict = defaultdict(list)
|
|
726
|
+
param = None
|
|
727
|
+
for i, v in enumerate(params_used):
|
|
728
|
+
if i < len(params):
|
|
729
|
+
param = params[i]
|
|
730
|
+
if param and v:
|
|
731
|
+
params_dict[param.name].append(v)
|
|
732
|
+
|
|
733
|
+
return {k: ", ".join(v) for k, v in params_dict.items()}
|
|
734
|
+
return {}
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def engine_local_to_replicated(engine: str, database: str, name: str) -> str:
|
|
738
|
+
"""
|
|
739
|
+
transforms an engine definition to a replicated one
|
|
740
|
+
|
|
741
|
+
>>> engine_local_to_replicated('MergeTree() order by (test)', 'test', 'foo')
|
|
742
|
+
"ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
|
|
743
|
+
>>> engine_local_to_replicated('MergeTree order by (test)', 'test', 'foo')
|
|
744
|
+
"ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
|
|
745
|
+
>>> engine_local_to_replicated('ReplacingMergeTree(timestamp) order by (test)', 'test', 'foo')
|
|
746
|
+
"ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}',timestamp) order by (test)"
|
|
747
|
+
>>> engine_local_to_replicated('AggregatingMergeTree order by (test)', 'test', 'foo')
|
|
748
|
+
"ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
|
|
749
|
+
>>> engine_local_to_replicated('AggregatingMergeTree order by (test) settings index_granularity = 8129', 'test', 'foo')
|
|
750
|
+
"ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test) settings index_granularity = 8129"
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
def _replace(m: Any) -> str:
|
|
754
|
+
parts = m.groups()
|
|
755
|
+
|
|
756
|
+
engine_type = parts[0]
|
|
757
|
+
engine_args = f",{parts[2]}" if parts[2] else ""
|
|
758
|
+
engine_settings = parts[3]
|
|
759
|
+
replication_args = f"'/clickhouse/tables/{{layer}}-{{shard}}/{database}.{name}','{{replica}}'"
|
|
760
|
+
|
|
761
|
+
return f"Replicated{engine_type}MergeTree({replication_args}{engine_args}){engine_settings}"
|
|
762
|
+
|
|
763
|
+
return re.sub(r"(.*)MergeTree(\(([^\)]*)\))*(.*)", _replace, engine.strip())
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
def ttl_condition_from_engine_full(engine_full: Optional[str]) -> Optional[str]:
|
|
767
|
+
"""
|
|
768
|
+
>>> ttl_condition_from_engine_full(None)
|
|
769
|
+
|
|
770
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
771
|
+
'toDate(local_timeplaced) >= now() - toIntervalDay(3)'
|
|
772
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL local_timeplaced + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
773
|
+
'local_timeplaced >= now() - toIntervalDay(3)'
|
|
774
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced / 1000) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
775
|
+
'toDate(local_timeplaced / 1000) >= now() - toIntervalDay(3)'
|
|
776
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toStartOfWeek(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
777
|
+
'toStartOfWeek(local_timeplaced) >= now() - toIntervalDay(3)'
|
|
778
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) + toIntervalDay(3) SETTINGS index_granularity = 8192")
|
|
779
|
+
'toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) >= now() - toIntervalDay(3)'
|
|
780
|
+
>>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk SETTINGS index_granularity = 8192")
|
|
781
|
+
|
|
782
|
+
>>> ttl_condition_from_engine_full("ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/d_test_4683b7e9803547218ba5010eb0364233.t_42fc0805b31f4763991cccefa71eeda3', '{replica}') PARTITION BY toYear(date) ORDER BY date TTL toDate(date) + toIntervalDay(1) SETTINGS index_granularity = 8192")
|
|
783
|
+
'toDate(date) >= now() - toIntervalDay(1)'
|
|
784
|
+
>>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYear(date) ORDER BY date TTL toDate(date) + toIntervalDay(1)")
|
|
785
|
+
'toDate(date) >= now() - toIntervalDay(1)'
|
|
786
|
+
>>> ttl_condition_from_engine_full("ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/d_03d680.t_ea9e3e784ef149caa1fcd5d772e61c42', '{replica}') PARTITION BY toStartOfHour(snapshot_id) ORDER BY (snapshot_id, ID_LOCALIZACION, ID_INSTALACION_RFID, COD_PRODUCTO_AS400, MODELO, CALIDAD, COLOR, TALLA, UBICACION_RFID) TTL snapshot_id + toIntervalHour(1) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800")
|
|
787
|
+
'snapshot_id >= now() - toIntervalHour(1)'
|
|
788
|
+
>>> ttl_condition_from_engine_full("ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/d_03d680.t_9ffe6f6790be4fae908685d3da4ee6f1', '{replica}') PARTITION BY snapshot_id ORDER BY (snapshot_id, COD_PRODUCTO_AS400, MODELO, CALIDAD) TTL snapshot_id + toIntervalHour(1) SETTINGS index_granularity = 8192")
|
|
789
|
+
'snapshot_id >= now() - toIntervalHour(1)'
|
|
790
|
+
>>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYYYYMM(t) ORDER BY (t, c) TTL t + toIntervalDay(90) SETTINGS index_granularity = 8192")
|
|
791
|
+
't >= now() - toIntervalDay(90)'
|
|
792
|
+
>>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYYYYMM(t) ORDER BY (t, c) TTL t + toIntervalDay(90)")
|
|
793
|
+
't >= now() - toIntervalDay(90)'
|
|
794
|
+
>>> ttl_condition_from_engine_full("MergeTree ORDER BY a TTL (toStartOfDay(a) + toIntervalSecond(b)) + toIntervalDay(1) SETTINGS index_granularity = 8192")
|
|
795
|
+
|
|
796
|
+
>>> ttl_condition_from_engine_full("MergeTree ORDER BY a TTL (toStartOfDay(a) - toIntervalSecond(b)) + toIntervalSecond(1800) SETTINGS index_granularity = 8192")
|
|
797
|
+
'(toStartOfDay(a) - toIntervalSecond(b)) >= now() - toIntervalSecond(1800)'
|
|
798
|
+
>>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL parseDateTimeBestEffortOrZero(toString(round(epoch / 1000))) + toIntervalDay(2)")
|
|
799
|
+
'parseDateTimeBestEffortOrZero(toString(round(epoch / 1000))) >= now() - toIntervalDay(2)'
|
|
800
|
+
>>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) + toIntervalDay(10)")
|
|
801
|
+
'toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) >= now() - toIntervalDay(10)'
|
|
802
|
+
|
|
803
|
+
# Unsupported currently
|
|
804
|
+
>>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL toDate(__timestamp) + 60")
|
|
805
|
+
|
|
806
|
+
>>> ttl_condition_from_engine_full("ttl")
|
|
807
|
+
|
|
808
|
+
"""
|
|
809
|
+
if not engine_full:
|
|
810
|
+
return None
|
|
811
|
+
|
|
812
|
+
try:
|
|
813
|
+
ttl_array = engine_full.split(" TTL ")
|
|
814
|
+
if len(ttl_array) <= 1:
|
|
815
|
+
return None
|
|
816
|
+
settings_array = engine_full.split(" SETTINGS ")
|
|
817
|
+
settings = " SETTINGS " + settings_array[1] if len(settings_array) > 1 else None
|
|
818
|
+
ttl = ttl_array[1][: -(len(settings))] if settings else ttl_array[1]
|
|
819
|
+
|
|
820
|
+
groups = SIMPLE_TTL_DEFINITION.search(ttl)
|
|
821
|
+
if not groups:
|
|
822
|
+
return None
|
|
823
|
+
|
|
824
|
+
return f"{groups[1]} >= now() - {groups[2]}"
|
|
825
|
+
|
|
826
|
+
except Exception as e:
|
|
827
|
+
logging.error(str(e))
|
|
828
|
+
return None
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _parse_indexes(create_table_query_expr: str) -> List[TableIndex]:
|
|
832
|
+
if create_table_query_expr == "":
|
|
833
|
+
return []
|
|
834
|
+
try:
|
|
835
|
+
from tinybird.sql_toolset import format_sql
|
|
836
|
+
|
|
837
|
+
indexes = [
|
|
838
|
+
line.strip()
|
|
839
|
+
for line in format_sql(create_table_query_expr).splitlines()
|
|
840
|
+
if line.strip().startswith("INDEX")
|
|
841
|
+
]
|
|
842
|
+
except ModuleNotFoundError:
|
|
843
|
+
# this is not needed from CLI
|
|
844
|
+
return []
|
|
845
|
+
|
|
846
|
+
return parse_indexes_structure(indexes)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def _parse_projections(create_table_query_expr: str) -> List[TableProjection]:
|
|
850
|
+
return [
|
|
851
|
+
TableProjection(name, expr)
|
|
852
|
+
for name, expr in re.findall(
|
|
853
|
+
r"PROJECTION\s+(\w+)\s*\(((?:[^()]|\((?:[^()]|\([^()]*\))*\))*)\)", create_table_query_expr
|
|
854
|
+
)
|
|
855
|
+
]
|