tinybird 0.0.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (45) hide show
  1. tinybird/__cli__.py +8 -0
  2. tinybird/ch_utils/constants.py +244 -0
  3. tinybird/ch_utils/engine.py +855 -0
  4. tinybird/check_pypi.py +25 -0
  5. tinybird/client.py +1281 -0
  6. tinybird/config.py +117 -0
  7. tinybird/connectors.py +428 -0
  8. tinybird/context.py +23 -0
  9. tinybird/datafile.py +5589 -0
  10. tinybird/datatypes.py +434 -0
  11. tinybird/feedback_manager.py +1022 -0
  12. tinybird/git_settings.py +145 -0
  13. tinybird/sql.py +865 -0
  14. tinybird/sql_template.py +2343 -0
  15. tinybird/sql_template_fmt.py +281 -0
  16. tinybird/sql_toolset.py +350 -0
  17. tinybird/syncasync.py +682 -0
  18. tinybird/tb_cli.py +25 -0
  19. tinybird/tb_cli_modules/auth.py +252 -0
  20. tinybird/tb_cli_modules/branch.py +1043 -0
  21. tinybird/tb_cli_modules/cicd.py +434 -0
  22. tinybird/tb_cli_modules/cli.py +1571 -0
  23. tinybird/tb_cli_modules/common.py +2082 -0
  24. tinybird/tb_cli_modules/config.py +344 -0
  25. tinybird/tb_cli_modules/connection.py +803 -0
  26. tinybird/tb_cli_modules/datasource.py +900 -0
  27. tinybird/tb_cli_modules/exceptions.py +91 -0
  28. tinybird/tb_cli_modules/fmt.py +91 -0
  29. tinybird/tb_cli_modules/job.py +85 -0
  30. tinybird/tb_cli_modules/pipe.py +858 -0
  31. tinybird/tb_cli_modules/regions.py +9 -0
  32. tinybird/tb_cli_modules/tag.py +100 -0
  33. tinybird/tb_cli_modules/telemetry.py +310 -0
  34. tinybird/tb_cli_modules/test.py +107 -0
  35. tinybird/tb_cli_modules/tinyunit/tinyunit.py +340 -0
  36. tinybird/tb_cli_modules/tinyunit/tinyunit_lib.py +71 -0
  37. tinybird/tb_cli_modules/token.py +349 -0
  38. tinybird/tb_cli_modules/workspace.py +269 -0
  39. tinybird/tb_cli_modules/workspace_members.py +212 -0
  40. tinybird/tornado_template.py +1194 -0
  41. tinybird-0.0.1.dev0.dist-info/METADATA +2815 -0
  42. tinybird-0.0.1.dev0.dist-info/RECORD +45 -0
  43. tinybird-0.0.1.dev0.dist-info/WHEEL +5 -0
  44. tinybird-0.0.1.dev0.dist-info/entry_points.txt +2 -0
  45. tinybird-0.0.1.dev0.dist-info/top_level.txt +4 -0
@@ -0,0 +1,855 @@
1
+ import logging
2
+ import re
3
+ from collections import defaultdict
4
+ from dataclasses import asdict
5
+ from typing import Any, Callable, Dict, Iterable, List, Optional
6
+
7
+ from ..sql import (
8
+ TableIndex,
9
+ TableProjection,
10
+ col_name,
11
+ engine_replicated_to_local,
12
+ parse_indexes_structure,
13
+ parse_table_structure,
14
+ )
15
+
16
+ DEFAULT_EMPTY_PARAMETERS = ["ttl", "partition_key", "sorting_key"]
17
+ DEFAULT_JOIN_EMPTY_PARAMETERS = ["join_strictness", "join_type", "key_columns"]
18
+
19
+ # Currently we only support the simplest TTLs
20
+ # f(X) + toIntervalZ(N)
21
+ # * `f()` is an optional CH function or chain of functions (we don't care)
22
+ # * `X` is a column. We accept also spaces and / as some TTL do (column / 1000)
23
+ # * `+` is the exact char
24
+ # * `toInverval` are exact chars
25
+ # * `Z` is [AZaZ]*
26
+ # * `N` is a number
27
+ SIMPLE_TTL_DEFINITION = re.compile(r"""^([a-zA-Z0-9_\-\.\(\)\ \/\*]*) \+ (toInterval[a-zA-Z]*\([0-9]+\))$""")
28
+
29
+
30
+ class TableDetails:
31
+ """
32
+ >>> ed = TableDetails({})
33
+ >>> ed.engine_full == None
34
+ True
35
+ >>> ed.engine == ''
36
+ True
37
+ >>> ed.to_json()
38
+ {'engine_full': None, 'engine': ''}
39
+ >>> ed.to_datafile()
40
+ ''
41
+
42
+ >>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048", "engine": "MergeTree", "partition_key": "toYear(timestamp)", "sorting_key": "timestamp, cityHash64(location)", "primary_key": "timestamp, cityHash64(location)", "sampling_key": "cityHash64(location)", "settings": "index_granularity = 32, index_granularity_bytes = 2048", "ttl": None })
43
+ >>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(1)")
44
+ True
45
+ >>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY", "engine": "MergeTree", "partition_key": "toYear(timestamp)", "sorting_key": "timestamp, cityHash64(location)", "primary_key": "timestamp, cityHash64(location)", "sampling_key": "cityHash64(location)", "settings": "index_granularity = 32, index_granularity_bytes = 2048", "ttl": "toDate(timestamp) + INTERVAL 1 DAY" })
46
+ >>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(1)")
47
+ False
48
+ >>> ed.diff_ttl("toDate(timestamp) + toIntervalDay(2)")
49
+ True
50
+ >>> ed.diff_ttl("toDate(timestamp) + INTERVAL DAY 2")
51
+ True
52
+ >>> ed.diff_ttl("toDate(timestamp) + INTERVAL 1 DAY")
53
+ False
54
+ >>> ed.diff_ttl("")
55
+ True
56
+ >>> ed.engine_full
57
+ 'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY'
58
+ >>> ed.engine
59
+ 'MergeTree'
60
+ >>> ed.to_json()
61
+ {'engine_full': 'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 32, index_granularity_bytes = 2048 TTL toDate(timestamp) + INTERVAL 1 DAY', 'engine': 'MergeTree', 'partition_key': 'toYear(timestamp)', 'sorting_key': 'timestamp, cityHash64(location)', 'sampling_key': 'cityHash64(location)', 'settings': 'index_granularity = 32, index_granularity_bytes = 2048', 'ttl': 'toDate(timestamp) + INTERVAL 1 DAY'}
62
+ >>> ed.to_datafile()
63
+ 'ENGINE "MergeTree"\\nENGINE_PARTITION_KEY "toYear(timestamp)"\\nENGINE_SORTING_KEY "timestamp, cityHash64(location)"\\nENGINE_SAMPLING_KEY "cityHash64(location)"\\nENGINE_SETTINGS "index_granularity = 32, index_granularity_bytes = 2048"\\nENGINE_TTL "toDate(timestamp) + INTERVAL 1 DAY"'
64
+
65
+ >>> ed = TableDetails({"engine_full": "Join(ANY, LEFT, id)", "engine": "Join", "partition_key": "", "sorting_key": "", "primary_key": "", "sampling_key": ""})
66
+ >>> ed.engine_full
67
+ 'Join(ANY, LEFT, id)'
68
+ >>> ed.engine
69
+ 'Join'
70
+ >>> ed.to_json()
71
+ {'engine_full': 'Join(ANY, LEFT, id)', 'engine': 'Join', 'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
72
+ >>> ed.to_datafile()
73
+ 'ENGINE "Join"\\nENGINE_JOIN_STRICTNESS "ANY"\\nENGINE_JOIN_TYPE "LEFT"\\nENGINE_KEY_COLUMNS "id"'
74
+
75
+ >>> ed = TableDetails({"database": "d_01", "name": "t_01", "engine": "Join", "join_strictness": "ANY", "join_type": "LEFT", "key_columns": "id"})
76
+ >>> ed.engine_full == None
77
+ True
78
+ >>> ed.engine
79
+ 'Join'
80
+ >>> ed.to_json()
81
+ {'engine_full': None, 'engine': 'Join', 'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
82
+ >>> ed.to_datafile()
83
+ 'ENGINE "Join"\\nENGINE_JOIN_STRICTNESS "ANY"\\nENGINE_JOIN_TYPE "LEFT"\\nENGINE_KEY_COLUMNS "id"'
84
+ >>> ed = TableDetails({ "engine_full": "MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800 TTL toDate(timestamp) + INTERVAL 1 DAY"})
85
+ >>> ed.engine_full
86
+ 'MergeTree() PARTITION BY toYear(timestamp) ORDER BY (timestamp, cityHash64(location)) SAMPLE BY cityHash64(location) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800 TTL toDate(timestamp) + INTERVAL 1 DAY'
87
+
88
+ >>> x = TableDetails({'database': 'd_01', 'name': 't_01', 'create_table_query': "CREATE TABLE d_01.t_01 (`project_id` String, `project_name` String, `project_repo` String, `owner_id` String, `updated_at` DateTime64(3)) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_test_1ad5e496b29246e1ade99117e9180f6f.t_1bac899a56b34b33921fbf468b4500f7', '{replica}', updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32", 'engine': 'ReplicatedReplacingMergeTree', 'partition_key': 'tuple()', 'sorting_key': 'project_id', 'primary_key': 'project_id', 'sampling_key': '', 'engine_full': "ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_01.t_01', '{replica}', updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32", 'settings': 'index_granularity = 32', 'ttl': ''})
89
+ >>> x.primary_key
90
+
91
+ """
92
+
93
+ def __init__(self, details: Optional[Dict[str, Any]] = None):
94
+ self.details = details or {}
95
+
96
+ def __bool__(self):
97
+ return bool(self.details)
98
+
99
+ @property
100
+ def engine_full(self) -> Optional[str]:
101
+ _engine_full: Optional[str] = self.details.get("engine_full", None)
102
+ if not _engine_full:
103
+ return None
104
+
105
+ settings = self.details.get("settings", None)
106
+ # We cannot remove index_granularity = 8192 blindly because it might be followed by other settings
107
+ if settings and settings.strip().lower() == "index_granularity = 8192":
108
+ _engine_full = _engine_full.replace(" SETTINGS index_granularity = 8192", "")
109
+ return engine_replicated_to_local(_engine_full)
110
+
111
+ @property
112
+ def original_engine_full(self) -> Optional[str]:
113
+ return self.details.get("engine_full", None)
114
+
115
+ @property
116
+ def name(self) -> str:
117
+ return self.details.get("name", "")
118
+
119
+ @property
120
+ def database(self) -> str:
121
+ return self.details.get("database", "")
122
+
123
+ @property
124
+ def engine(self) -> str:
125
+ _engine = self.details.get("engine", "")
126
+ return _engine and _engine.replace("Replicated", "")
127
+
128
+ @property
129
+ def original_engine(self) -> Optional[str]:
130
+ return self.details.get("engine", None)
131
+
132
+ @property
133
+ def version(self):
134
+ _version = self.details.get("version", None)
135
+ return _version
136
+
137
+ def is_replicated(self):
138
+ return "Replicated" in self.details.get("engine", None)
139
+
140
+ def is_mergetree_family(self) -> bool:
141
+ return self.engine is not None and "mergetree" in self.engine.lower()
142
+
143
+ def supports_alter_add_column(self) -> bool:
144
+ return self.is_mergetree_family() or (self.engine is not None and self.engine.lower() == "null")
145
+
146
+ def is_replacing_engine(self) -> bool:
147
+ if self.engine:
148
+ engine_lower = self.engine.lower()
149
+ is_aggregating = "aggregatingmergetree" in engine_lower
150
+ is_replacing = "replacingmergetree" in engine_lower
151
+ is_collapsing = "collapsingmergetree" in engine_lower
152
+ return is_aggregating or is_replacing or is_collapsing
153
+ return False
154
+
155
+ def diff_ttl(self, new_ttl: str) -> bool:
156
+ try:
157
+ from tinybird.sql_toolset import format_sql
158
+
159
+ current_ttl = format_sql(f"select {self.ttl}")[7:]
160
+ new_ttl = format_sql(f"select {new_ttl}")[7:]
161
+ return current_ttl != new_ttl
162
+ except Exception:
163
+ return self.ttl != new_ttl
164
+
165
+ @property
166
+ def partition_key(self) -> Optional[str]:
167
+ return self.details.get("partition_key", None)
168
+
169
+ @property
170
+ def sorting_key(self) -> Optional[str]:
171
+ _sorting_key = self.details.get("sorting_key", None)
172
+ if self.is_replacing_engine() and not _sorting_key:
173
+ raise ValueError(f"SORTING_KEY must be defined for the {self.engine} engine")
174
+ if self.is_mergetree_family():
175
+ return _sorting_key or "tuple()"
176
+ return _sorting_key
177
+
178
+ @property
179
+ def primary_key(self) -> Optional[str]:
180
+ _primary_key = self.details.get("primary_key", None)
181
+ # When querying `system.tables`, it will return the `sorting_key` as `primary_key` even if it was not specify
182
+ # So we need to ignore it
183
+ if self.sorting_key == _primary_key:
184
+ return None
185
+ return _primary_key
186
+
187
+ @property
188
+ def sampling_key(self) -> Optional[str]:
189
+ return self.details.get("sampling_key", None)
190
+
191
+ @property
192
+ def settings(self):
193
+ settings = self.details.get("settings", None)
194
+ if settings and settings.strip().lower() != "index_granularity = 8192":
195
+ return settings
196
+
197
+ @property
198
+ def ttl(self):
199
+ return self.details.get("ttl", None)
200
+
201
+ @property
202
+ def ver(self):
203
+ _ver = self.details.get("ver", None)
204
+ return _ver
205
+
206
+ @property
207
+ def is_deleted(self):
208
+ _is_deleted = self.details.get("is_deleted", None)
209
+ return _is_deleted
210
+
211
+ @property
212
+ def columns(self):
213
+ _columns = self.details.get("columns", None)
214
+ return _columns
215
+
216
+ @property
217
+ def sign(self):
218
+ _sign = self.details.get("sign", None)
219
+ return _sign
220
+
221
+ @property
222
+ def join_strictness(self):
223
+ _join_strictness = self.details.get("join_strictness", None)
224
+ return _join_strictness
225
+
226
+ @property
227
+ def join_type(self):
228
+ _join_type = self.details.get("join_type", None)
229
+ return _join_type
230
+
231
+ @property
232
+ def key_columns(self):
233
+ _key_columns = self.details.get("key_columns", None)
234
+ return _key_columns
235
+
236
+ @property
237
+ def statistics(self) -> Dict[str, Any]:
238
+ return {
239
+ "bytes": self.details.get("total_bytes", None),
240
+ "row_count": self.details.get("total_rows", None),
241
+ }
242
+
243
+ @property
244
+ def indexes(self) -> List[TableIndex]:
245
+ return _parse_indexes(str(self.details.get("create_table_query", "")))
246
+
247
+ @property
248
+ def projections(self) -> List[TableProjection]:
249
+ return _parse_projections(self.details.get("create_table_query", ""))
250
+
251
+ def to_json(self, exclude: Optional[List[str]] = None, include_empty_details: bool = False):
252
+ # name, database are not exported since they are not part of the engine
253
+ d: Dict[str, Any] = {
254
+ "engine_full": self.engine_full,
255
+ "engine": self.engine,
256
+ }
257
+ if self.partition_key:
258
+ d["partition_key"] = self.partition_key
259
+ if self.sorting_key:
260
+ d["sorting_key"] = self.sorting_key
261
+ if self.primary_key:
262
+ d["primary_key"] = self.primary_key
263
+ if self.sampling_key:
264
+ d["sampling_key"] = self.sampling_key
265
+ if self.settings:
266
+ d["settings"] = self.settings
267
+ if self.join_strictness:
268
+ d["join_strictness"] = self.join_strictness
269
+ if self.join_type:
270
+ d["join_type"] = self.join_type
271
+ if self.key_columns:
272
+ d["key_columns"] = self.key_columns
273
+ if self.ver:
274
+ d["ver"] = self.ver
275
+ if self.is_deleted:
276
+ d["is_deleted"] = self.is_deleted
277
+ if self.sign:
278
+ d["sign"] = self.sign
279
+ if self.version:
280
+ d["version"] = self.version
281
+ if self.ttl:
282
+ d["ttl"] = self.ttl.strip()
283
+ if self.indexes:
284
+ d["indexes"] = [asdict(index) for index in self.indexes]
285
+
286
+ if self.engine_full:
287
+ engine_params = engine_params_from_engine_full(self.engine_full)
288
+ d = {**d, **engine_params}
289
+
290
+ if include_empty_details:
291
+ if self.engine and self.engine.lower() == "join":
292
+ d = set_empty_details(d, DEFAULT_JOIN_EMPTY_PARAMETERS)
293
+ else:
294
+ d = set_empty_details(d, DEFAULT_EMPTY_PARAMETERS)
295
+
296
+ if exclude:
297
+ for attr in exclude:
298
+ if attr in d:
299
+ del d[attr]
300
+
301
+ return d
302
+
303
+ def to_datafile(self, include_empty_details: bool = False) -> str:
304
+ d: Dict[str, Any] = self.to_json(
305
+ exclude=["engine", "engine_full", "indexes"], include_empty_details=include_empty_details
306
+ )
307
+ engine: str = self.engine
308
+
309
+ datafile: str = ""
310
+ if engine:
311
+ datafile += "\n".join(
312
+ [f'ENGINE "{engine}"'] + [f'ENGINE_{k.upper()} "{v}"' for k, v in d.items() if v is not None]
313
+ )
314
+
315
+ return datafile
316
+
317
+
318
+ def set_empty_details(details: Dict[str, str], parameters: Iterable[str]):
319
+ for parameter in parameters:
320
+ if parameter not in details:
321
+ details[parameter] = ""
322
+
323
+ return details
324
+
325
+
326
+ class EngineOption:
327
+ def __init__(
328
+ self,
329
+ name: str,
330
+ sql: str,
331
+ required: Optional[bool] = None,
332
+ default_value=None,
333
+ is_valid: Optional[Callable[[List[Dict[str, Any]], str], Optional[str]]] = None,
334
+ tb_param: Optional[str] = None,
335
+ ):
336
+ self.name = name
337
+ self.sql = sql
338
+ self.required = required
339
+ self.default_value = default_value
340
+ self.is_valid = is_valid
341
+ self.tb_param = tb_param if tb_param else "_".join(["engine", name])
342
+
343
+
344
+ class EngineParam:
345
+ def __init__(
346
+ self,
347
+ name: str,
348
+ required: Optional[bool] = None,
349
+ default_value=None,
350
+ is_valid: Optional[Callable[[List[Dict[str, Any]], str], Optional[str]]] = None,
351
+ tb_param: Optional[str] = None,
352
+ ):
353
+ self.name = name
354
+ self.required = required
355
+ self.default_value = default_value
356
+ self.is_valid = is_valid
357
+ self.tb_param = tb_param if tb_param else "_".join(["engine", name])
358
+
359
+
360
+ def engine_config(name: str, params: Optional[List[EngineParam]] = None, options: Optional[List[EngineOption]] = None):
361
+ params = params or []
362
+ options = options or []
363
+ return (name, (params, options))
364
+
365
+
366
+ def column_is_valid(columns: List[Dict[str, Any]], column_name: str) -> str:
367
+ schema_columns = [col_name(c["name"], backquotes=False) for c in columns]
368
+ if column_name not in schema_columns:
369
+ raise ValueError(f"'{column_name}' column is not present in schema")
370
+ return col_name(column_name, backquotes=False)
371
+
372
+
373
+ def columns_are_valid(columns: List[Dict[str, Any]], column_names: str) -> str:
374
+ schema_columns = [col_name(c["name"], backquotes=False) for c in columns]
375
+ new_column_names = []
376
+ for column_name in [x.strip() for x in column_names.split(",")]:
377
+ if column_name not in schema_columns:
378
+ raise ValueError(f"'{column_name}' column is not present in schema")
379
+ new_column_names.append(col_name(column_name, backquotes=False))
380
+ return ", ".join(new_column_names)
381
+
382
+
383
+ def sorting_key_is_valid(columns: List[Dict[str, Any]], value: Optional[str]) -> str:
384
+ INVALID_SORTING_KEYS = ["tuple()"]
385
+
386
+ if not value:
387
+ raise ValueError("Sorting key can not be empty")
388
+ if value in INVALID_SORTING_KEYS:
389
+ raise ValueError(f"'{value}' is not a valid sorting key")
390
+ return value
391
+
392
+
393
+ def case_insensitive_check(valid_values: List[str]) -> Callable[[List[Dict[str, Any]], str], Optional[str]]:
394
+ """
395
+ >>> valid_values = ['ANY', 'ALL']
396
+ >>> checker = case_insensitive_check(valid_values)
397
+ >>> checker([],'ALL')
398
+
399
+ >>> valid_values = ['ANY', 'ALL']
400
+ >>> checker = case_insensitive_check(valid_values)
401
+ >>> checker([],'any')
402
+
403
+ >>> valid_values = ['ANY', 'ALL']
404
+ >>> checker = case_insensitive_check(valid_values)
405
+ >>> checker([],'foo')
406
+ Traceback (most recent call last):
407
+ ...
408
+ ValueError: valid values are ANY, ALL
409
+ """
410
+
411
+ def checker(columns: List[Dict[str, Any]], value: str):
412
+ if value.upper() not in valid_values:
413
+ raise ValueError(f"valid values are {', '.join(valid_values)}")
414
+
415
+ return checker
416
+
417
+
418
+ # [PARTITION BY expr]
419
+ # [ORDER BY expr]
420
+ # [PRIMARY KEY expr]
421
+ # [SAMPLE BY expr]
422
+ # [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
423
+ # [SETTINGS name=value, ...]
424
+ MERGETREE_OPTIONS = [
425
+ EngineOption(name="partition_key", sql="PARTITION BY"),
426
+ EngineOption(name="sorting_key", sql="ORDER BY", default_value="tuple()"),
427
+ EngineOption(name="primary_key", sql="PRIMARY KEY"),
428
+ EngineOption(name="sampling_key", sql="SAMPLE BY"),
429
+ EngineOption(name="ttl", sql="TTL"),
430
+ EngineOption(name="settings", sql="SETTINGS"),
431
+ ]
432
+ REPLACINGMERGETREE_OPTIONS = [
433
+ EngineOption(name="partition_key", sql="PARTITION BY"),
434
+ EngineOption(name="sorting_key", sql="ORDER BY", required=True, is_valid=sorting_key_is_valid),
435
+ EngineOption(name="primary_key", sql="PRIMARY KEY"),
436
+ EngineOption(name="sampling_key", sql="SAMPLE BY"),
437
+ EngineOption(name="ttl", sql="TTL"),
438
+ EngineOption(name="settings", sql="SETTINGS"),
439
+ ]
440
+ ENABLED_ENGINES = [
441
+ # MergeTree()
442
+ engine_config("MergeTree", options=MERGETREE_OPTIONS),
443
+ # ReplacingMergeTree([ver])
444
+ engine_config(
445
+ "ReplacingMergeTree",
446
+ [EngineParam(name="ver", is_valid=column_is_valid), EngineParam(name="is_deleted", is_valid=column_is_valid)],
447
+ REPLACINGMERGETREE_OPTIONS,
448
+ ),
449
+ # SummingMergeTree([columns])
450
+ engine_config(
451
+ "SummingMergeTree",
452
+ [
453
+ # This should check the columns are numeric ones
454
+ EngineParam(name="columns", is_valid=columns_are_valid),
455
+ ],
456
+ MERGETREE_OPTIONS,
457
+ ),
458
+ # AggregatingMergeTree()
459
+ engine_config("AggregatingMergeTree", options=REPLACINGMERGETREE_OPTIONS),
460
+ # CollapsingMergeTree(sign)
461
+ engine_config(
462
+ "CollapsingMergeTree",
463
+ [EngineParam(name="sign", required=True, is_valid=column_is_valid)],
464
+ REPLACINGMERGETREE_OPTIONS,
465
+ ),
466
+ # VersionedCollapsingMergeTree(sign, version)
467
+ engine_config(
468
+ "VersionedCollapsingMergeTree",
469
+ [
470
+ EngineParam(name="sign", required=True, is_valid=column_is_valid),
471
+ EngineParam(name="version", required=True, is_valid=column_is_valid),
472
+ ],
473
+ MERGETREE_OPTIONS,
474
+ ),
475
+ # Join(join_strictness, join_type, k1[, k2, ...])
476
+ engine_config(
477
+ "Join",
478
+ [
479
+ # https://github.com/ClickHouse/ClickHouse/blob/fa8e4e4735b932f08b6beffcb2d069b72de34401/src/Storages/StorageJoin.cpp
480
+ EngineParam(
481
+ name="join_strictness", required=True, is_valid=case_insensitive_check(["ANY", "ALL", "SEMI", "ANTI"])
482
+ ),
483
+ EngineParam(
484
+ name="join_type", required=True, is_valid=case_insensitive_check(["LEFT", "INNER", "RIGHT", "FULL"])
485
+ ),
486
+ EngineParam(name="key_columns", required=True, is_valid=columns_are_valid),
487
+ ],
488
+ ),
489
+ # Null()
490
+ engine_config("Null"),
491
+ ]
492
+
493
+
494
+ def get_engine_config(engine: str):
495
+ for name, config in ENABLED_ENGINES:
496
+ if engine.lower() == name.lower():
497
+ return (name, config)
498
+ raise ValueError(
499
+ f"Engine {engine} is not supported, supported engines include: {', '.join([e[0] for e in ENABLED_ENGINES])}"
500
+ )
501
+
502
+
503
+ def engine_params(columns, params: List[EngineParam], args: Dict):
504
+ params_values = []
505
+ for p in params:
506
+ if p.required and p.name not in args:
507
+ raise ValueError(f"Missing required parameter '{p.name}'")
508
+ param_value = args.get(p.name, None) or p.default_value
509
+ if not param_value:
510
+ continue
511
+ if p.is_valid:
512
+ check_is_valid(
513
+ valid_check=p.is_valid, check_type="parameter", columns=columns, tb_param=p.tb_param, value=param_value
514
+ )
515
+ params_values.append(param_value)
516
+ return params_values
517
+
518
+
519
+ def engine_options(columns, options: List[EngineOption], args: Dict):
520
+ options_values = []
521
+ engine_settings = ""
522
+
523
+ for o in options:
524
+ if o.required and o.name not in args:
525
+ raise ValueError(f"Missing required option '{o.name}'")
526
+ option_value = args.get(o.name) or o.default_value
527
+ if o.is_valid:
528
+ check_is_valid(
529
+ valid_check=o.is_valid, check_type="option", columns=columns, tb_param=o.tb_param, value=option_value
530
+ )
531
+
532
+ if option_value:
533
+ if o.sql.lower() == "settings":
534
+ engine_settings = f"{o.sql} {option_value}"
535
+ else:
536
+ options_values.append(f"{o.sql} ({option_value})")
537
+
538
+ if engine_settings:
539
+ options_values.append(engine_settings)
540
+
541
+ return options_values
542
+
543
+
544
+ def check_is_valid(
545
+ valid_check: Callable[[List[Dict[str, Any]], str], Optional[str]],
546
+ check_type: str,
547
+ columns: List[Dict[str, Any]],
548
+ tb_param: str,
549
+ value: str,
550
+ ):
551
+ """
552
+ >>> check_is_valid(sorting_key_is_valid, 'option', ['column-name'], 'sorting_key', 'date')
553
+
554
+ >>> check_is_valid(sorting_key_is_valid, 'option', ['column-name'], 'sorting_key', 'tuple()')
555
+ Traceback (most recent call last):
556
+ ...
557
+ ValueError: Invalid value 'tuple()' for option 'sorting_key', reason: 'tuple()' is not a valid sorting key
558
+ """
559
+ try:
560
+ new_value = valid_check(columns, value)
561
+ if new_value:
562
+ value = new_value
563
+ except Exception as e:
564
+ raise ValueError(f"Invalid value '{value}' for {check_type} '{tb_param}', reason: {e}")
565
+
566
+
567
+ def build_engine(
568
+ engine: str, columns: Optional[List], params: List[EngineParam], options: List[EngineOption], args: Dict
569
+ ):
570
+ return f"{engine}({', '.join(engine_params(columns, params, args))}) {' '.join(engine_options(columns, options, args))}".strip()
571
+
572
+
573
+ def engine_full_from_dict(
574
+ engine: str, args: dict, schema: Optional[str] = None, columns: Optional[List[Dict[str, Any]]] = None
575
+ ):
576
+ """
577
+ >>> schema = ''
578
+ >>> engine_full_from_dict('wadus', {}, schema=schema)
579
+ Traceback (most recent call last):
580
+ ...
581
+ ValueError: Engine wadus is not supported, supported engines include: MergeTree, ReplacingMergeTree, SummingMergeTree, AggregatingMergeTree, CollapsingMergeTree, VersionedCollapsingMergeTree, Join, Null
582
+ >>> schema = ''
583
+ >>> engine_full_from_dict('null', {}, schema=schema)
584
+ 'Null()'
585
+ >>> schema = ''
586
+ >>> engine_full_from_dict('null', {}, columns=[])
587
+ 'Null()'
588
+
589
+ >>> schema = 'cid Int32'
590
+ >>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid'}, schema=schema)
591
+ 'Join(ANY, LEFT, cid)'
592
+ >>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid'}, columns=[{'name': 'cid', 'type': 'Int32', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'cid'}])
593
+ 'Join(ANY, LEFT, cid)'
594
+ >>> schema = 'cid1 Int32, cid2 Int8'
595
+ >>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'cid1, cid2'}, schema=schema)
596
+ 'Join(ANY, LEFT, cid1, cid2)'
597
+ >>> engine_full_from_dict('Join', {'join_strictness': 'ANY', 'join_type': 'OUTER', 'key_columns': 'cid'}, schema=schema)
598
+ Traceback (most recent call last):
599
+ ...
600
+ ValueError: Invalid value 'OUTER' for parameter 'engine_join_type', reason: valid values are LEFT, INNER, RIGHT, FULL
601
+
602
+ >>> schema = ''
603
+ >>> engine_full_from_dict('MergeTree', {}, schema=schema)
604
+ 'MergeTree() ORDER BY (tuple())'
605
+ >>> engine_full_from_dict('MergeTree', {'sorting_key': 'local_date, cod_store'}, schema=schema)
606
+ 'MergeTree() ORDER BY (local_date, cod_store)'
607
+ >>> engine_full_from_dict('MergeTree', {'partition_key': 'toDate(timestamp)', 'sorting_key': 'local_date, cod_store', 'settings': 'index_granularity = 32, index_granularity_bytes = 2048', 'ttl': 'toDate(local_date) + INTERVAL 1 DAY'}, schema=schema)
608
+ 'MergeTree() PARTITION BY (toDate(timestamp)) ORDER BY (local_date, cod_store) TTL (toDate(local_date) + INTERVAL 1 DAY) SETTINGS index_granularity = 32, index_granularity_bytes = 2048'
609
+
610
+ >>> schema = ''
611
+ >>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column'}, schema=schema)
612
+ Traceback (most recent call last):
613
+ ...
614
+ ValueError: Invalid value 'sign_column' for parameter 'engine_sign', reason: 'sign_column' column is not present in schema
615
+
616
+ >>> schema = 'sign_column Int8'
617
+ >>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column'}, schema=schema)
618
+ Traceback (most recent call last):
619
+ ...
620
+ ValueError: Missing required option 'sorting_key'
621
+
622
+ >>> schema = 'sign_column Int8, key_column Int8'
623
+ >>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column', 'sorting_key': 'key_column'}, schema=schema)
624
+ 'CollapsingMergeTree(sign_column) ORDER BY (key_column)'
625
+
626
+ >>> columns=[]
627
+ >>> columns.append({'name': 'sign_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'sign_column'})
628
+ >>> columns.append({'name': 'key_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'key_column'})
629
+ >>> engine_full_from_dict('CollapsingMergeTree', {'sign': 'sign_column', 'sorting_key': 'key_column' }, columns=columns)
630
+ 'CollapsingMergeTree(sign_column) ORDER BY (key_column)'
631
+
632
+ >>> schema = 'sign_column Int8'
633
+ >>> engine_full_from_dict('AggregatingMergeTree', {}, schema=schema)
634
+ Traceback (most recent call last):
635
+ ...
636
+ ValueError: Missing required option 'sorting_key'
637
+
638
+ >>> columns=[]
639
+ >>> columns.append({'name': 'key_column', 'type': 'Int8', 'codec': None, 'default_value': None, 'nullable': False, 'normalized_name': 'key_column'})
640
+ >>> engine_full_from_dict('AggregatingMergeTree', { 'sorting_key': 'key_column' }, columns=columns)
641
+ 'AggregatingMergeTree() ORDER BY (key_column)'
642
+
643
+ >>> schema = 'ver_column Int8, key_column Int8'
644
+ >>> engine_full_from_dict('ReplacingMergeTree', {}, schema=schema)
645
+ Traceback (most recent call last):
646
+ ...
647
+ ValueError: Missing required option 'sorting_key'
648
+
649
+ >>> engine_full_from_dict('ReplacingMergeTree', {'sorting_key': 'key_column'}, schema=schema)
650
+ 'ReplacingMergeTree() ORDER BY (key_column)'
651
+
652
+ >>> engine_full_from_dict('ReplacingMergeTree', {'ver': 'ver_column', 'sorting_key': 'key_column'}, schema=schema)
653
+ 'ReplacingMergeTree(ver_column) ORDER BY (key_column)'
654
+
655
+ >>> engine_full_from_dict('ReplacingMergeTree', {'ver': 'other_column'}, schema=schema)
656
+ Traceback (most recent call last):
657
+ ...
658
+ ValueError: Invalid value 'other_column' for parameter 'engine_ver', reason: 'other_column' column is not present in schema
659
+
660
+ >>> schema = 'col0 Int8, col1 Int8, col2 Int8'
661
+ >>> engine_full_from_dict('SummingMergeTree', {}, schema=schema)
662
+ 'SummingMergeTree() ORDER BY (tuple())'
663
+ >>> engine_full_from_dict('SummingMergeTree', {'columns': 'col0'}, schema=schema)
664
+ 'SummingMergeTree(col0) ORDER BY (tuple())'
665
+ >>> engine_full_from_dict('SummingMergeTree', {'columns': 'col0, col2'}, schema=schema)
666
+ 'SummingMergeTree(col0, col2) ORDER BY (tuple())'
667
+ >>> engine_full_from_dict('SummingMergeTree', {'columns': 'col1, other_column'}, schema=schema)
668
+ Traceback (most recent call last):
669
+ ...
670
+ ValueError: Invalid value 'col1, other_column' for parameter 'engine_columns', reason: 'other_column' column is not present in schema
671
+ >>> engine_full_from_dict('SummingMergeTree', {'columns': 'col1, other_column'}, schema=schema, columns=[])
672
+ Traceback (most recent call last):
673
+ ...
674
+ ValueError: You can not use 'schema' and 'columns' at the same time
675
+ >>> engine_full_from_dict('ReplacingMergeTree', {'partition_key': 'tuple()', 'sorting_key': 'project_id', 'settings': 'index_granularity = 32', 'ver': 'updated_at'}, "`project_id` String, `project_name` String, `project_repo` String, `owner_id` String, `updated_at` DateTime64(3)")
676
+ 'ReplacingMergeTree(updated_at) PARTITION BY (tuple()) ORDER BY (project_id) SETTINGS index_granularity = 32'
677
+ """
678
+
679
+ if schema is not None and columns is not None:
680
+ raise ValueError("You can not use 'schema' and 'columns' at the same time")
681
+ engine_config = get_engine_config(engine)
682
+ name, (params, options) = engine_config
683
+ if columns is None and schema is not None:
684
+ columns = parse_table_structure(schema)
685
+
686
+ engine_settings = {key.replace("engine_", ""): value for key, value in args.items()}
687
+
688
+ for arg in engine_settings:
689
+ if not hasattr(TableDetails, arg):
690
+ raise ValueError(f"engine_{arg} is not a valid option")
691
+
692
+ return build_engine(name, columns, params, options, engine_settings)
693
+
694
+
695
+ def engine_params_from_engine_full(engine_full: str) -> Dict[str, Any]:
696
+ """
697
+ >>> engine_params_from_engine_full("Null()")
698
+ {}
699
+ >>> engine_params_from_engine_full("Join(ANY, LEFT, id)")
700
+ {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'id'}
701
+ >>> engine_params_from_engine_full("Join(ANY, LEFT, k1, k2)")
702
+ {'join_strictness': 'ANY', 'join_type': 'LEFT', 'key_columns': 'k1, k2'}
703
+ >>> engine_params_from_engine_full("AggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/d_f837aa.sales_by_country_rt__v0_staging_t_00c3091e7530472caebda05e97288a1d', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY (purchase_location, cod_device, date) SETTINGS index_granularity = 8192")
704
+ {}
705
+ >>> engine_params_from_engine_full("ReplicatedSummingMergeTree('/clickhouse/tables/{layer}-{shard}/d_abcf3e.t_69f9da31f4524995b8911e1b24c80ab4', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY (date, purchase_location, sku_rank_lc) SETTINGS index_granularity = 8192")
706
+ {}
707
+ >>> engine_params_from_engine_full("ReplicatedSummingMergeTree('/clickhouse/tables/{layer}-{shard}/d_abcf3e.t_69f9da31f4524995b8911e1b24c80ab4', '{replica}', c1, c2) PARTITION BY toYYYYMM(date) ORDER BY (date, purchase_location, sku_rank_lc) SETTINGS index_granularity = 8192")
708
+ {'columns': 'c1, c2'}
709
+ >>> engine_params_from_engine_full("ReplacingMergeTree(insert_date) ORDER BY date")
710
+ {'ver': 'insert_date'}
711
+ >>> engine_params_from_engine_full("ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/d_f837aa.t_d3aaad001dee4d9e9e3067ccb789fb59_n1', '{replica}', insert_date) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
712
+ {'ver': 'insert_date'}
713
+ >>> engine_params_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
714
+ {'sign': 'sign_c', 'version': 'version_c'}
715
+ >>> engine_params_from_engine_full("ReplacingMergeTree(updated_at) PARTITION BY tuple() PRIMARY KEY project_id ORDER BY project_id SETTINGS index_granularity = 32")
716
+ {'ver': 'updated_at'}
717
+ """
718
+ engine_full = engine_replicated_to_local(engine_full)
719
+ for engine, (params, _options) in ENABLED_ENGINES:
720
+ if engine_full.startswith(engine):
721
+ m = re.search(rf"{engine}\(([^\)]*)\).*", engine_full)
722
+ params_used = []
723
+ if m:
724
+ params_used = [x.strip() for x in m.group(1).split(",")]
725
+ params_dict = defaultdict(list)
726
+ param = None
727
+ for i, v in enumerate(params_used):
728
+ if i < len(params):
729
+ param = params[i]
730
+ if param and v:
731
+ params_dict[param.name].append(v)
732
+
733
+ return {k: ", ".join(v) for k, v in params_dict.items()}
734
+ return {}
735
+
736
+
737
+ def engine_local_to_replicated(engine: str, database: str, name: str) -> str:
738
+ """
739
+ transforms an engine definition to a replicated one
740
+
741
+ >>> engine_local_to_replicated('MergeTree() order by (test)', 'test', 'foo')
742
+ "ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
743
+ >>> engine_local_to_replicated('MergeTree order by (test)', 'test', 'foo')
744
+ "ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
745
+ >>> engine_local_to_replicated('ReplacingMergeTree(timestamp) order by (test)', 'test', 'foo')
746
+ "ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}',timestamp) order by (test)"
747
+ >>> engine_local_to_replicated('AggregatingMergeTree order by (test)', 'test', 'foo')
748
+ "ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test)"
749
+ >>> engine_local_to_replicated('AggregatingMergeTree order by (test) settings index_granularity = 8129', 'test', 'foo')
750
+ "ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo','{replica}') order by (test) settings index_granularity = 8129"
751
+ """
752
+
753
+ def _replace(m: Any) -> str:
754
+ parts = m.groups()
755
+
756
+ engine_type = parts[0]
757
+ engine_args = f",{parts[2]}" if parts[2] else ""
758
+ engine_settings = parts[3]
759
+ replication_args = f"'/clickhouse/tables/{{layer}}-{{shard}}/{database}.{name}','{{replica}}'"
760
+
761
+ return f"Replicated{engine_type}MergeTree({replication_args}{engine_args}){engine_settings}"
762
+
763
+ return re.sub(r"(.*)MergeTree(\(([^\)]*)\))*(.*)", _replace, engine.strip())
764
+
765
+
766
+ def ttl_condition_from_engine_full(engine_full: Optional[str]) -> Optional[str]:
767
+ """
768
+ >>> ttl_condition_from_engine_full(None)
769
+
770
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
771
+ 'toDate(local_timeplaced) >= now() - toIntervalDay(3)'
772
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL local_timeplaced + toIntervalDay(3) SETTINGS index_granularity = 8192")
773
+ 'local_timeplaced >= now() - toIntervalDay(3)'
774
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDate(local_timeplaced / 1000) + toIntervalDay(3) SETTINGS index_granularity = 8192")
775
+ 'toDate(local_timeplaced / 1000) >= now() - toIntervalDay(3)'
776
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toStartOfWeek(local_timeplaced) + toIntervalDay(3) SETTINGS index_granularity = 8192")
777
+ 'toStartOfWeek(local_timeplaced) >= now() - toIntervalDay(3)'
778
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk TTL toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) + toIntervalDay(3) SETTINGS index_granularity = 8192")
779
+ 'toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) >= now() - toIntervalDay(3)'
780
+ >>> ttl_condition_from_engine_full("ReplicatedVersionedCollapsingMergeTree('/clickhouse/tables/{layer}-{shard}/test.foo', '{replica}', sign_c,version_c) ORDER BY pk SETTINGS index_granularity = 8192")
781
+
782
+ >>> ttl_condition_from_engine_full("ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/d_test_4683b7e9803547218ba5010eb0364233.t_42fc0805b31f4763991cccefa71eeda3', '{replica}') PARTITION BY toYear(date) ORDER BY date TTL toDate(date) + toIntervalDay(1) SETTINGS index_granularity = 8192")
783
+ 'toDate(date) >= now() - toIntervalDay(1)'
784
+ >>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYear(date) ORDER BY date TTL toDate(date) + toIntervalDay(1)")
785
+ 'toDate(date) >= now() - toIntervalDay(1)'
786
+ >>> ttl_condition_from_engine_full("ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/d_03d680.t_ea9e3e784ef149caa1fcd5d772e61c42', '{replica}') PARTITION BY toStartOfHour(snapshot_id) ORDER BY (snapshot_id, ID_LOCALIZACION, ID_INSTALACION_RFID, COD_PRODUCTO_AS400, MODELO, CALIDAD, COLOR, TALLA, UBICACION_RFID) TTL snapshot_id + toIntervalHour(1) SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1, merge_with_ttl_timeout = 1800")
787
+ 'snapshot_id >= now() - toIntervalHour(1)'
788
+ >>> ttl_condition_from_engine_full("ReplicatedAggregatingMergeTree('/clickhouse/tables/{layer}-{shard}/d_03d680.t_9ffe6f6790be4fae908685d3da4ee6f1', '{replica}') PARTITION BY snapshot_id ORDER BY (snapshot_id, COD_PRODUCTO_AS400, MODELO, CALIDAD) TTL snapshot_id + toIntervalHour(1) SETTINGS index_granularity = 8192")
789
+ 'snapshot_id >= now() - toIntervalHour(1)'
790
+ >>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYYYYMM(t) ORDER BY (t, c) TTL t + toIntervalDay(90) SETTINGS index_granularity = 8192")
791
+ 't >= now() - toIntervalDay(90)'
792
+ >>> ttl_condition_from_engine_full("MergeTree() PARTITION BY toYYYYMM(t) ORDER BY (t, c) TTL t + toIntervalDay(90)")
793
+ 't >= now() - toIntervalDay(90)'
794
+ >>> ttl_condition_from_engine_full("MergeTree ORDER BY a TTL (toStartOfDay(a) + toIntervalSecond(b)) + toIntervalDay(1) SETTINGS index_granularity = 8192")
795
+
796
+ >>> ttl_condition_from_engine_full("MergeTree ORDER BY a TTL (toStartOfDay(a) - toIntervalSecond(b)) + toIntervalSecond(1800) SETTINGS index_granularity = 8192")
797
+ '(toStartOfDay(a) - toIntervalSecond(b)) >= now() - toIntervalSecond(1800)'
798
+ >>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL parseDateTimeBestEffortOrZero(toString(round(epoch / 1000))) + toIntervalDay(2)")
799
+ 'parseDateTimeBestEffortOrZero(toString(round(epoch / 1000))) >= now() - toIntervalDay(2)'
800
+ >>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) + toIntervalDay(10)")
801
+ 'toDateTime(fromUnixTimestamp64Milli(-sortingNegativeTS)) >= now() - toIntervalDay(10)'
802
+
803
+ # Unsupported currently
804
+ >>> ttl_condition_from_engine_full("MergeTree ORDER BY col TTL toDate(__timestamp) + 60")
805
+
806
+ >>> ttl_condition_from_engine_full("ttl")
807
+
808
+ """
809
+ if not engine_full:
810
+ return None
811
+
812
+ try:
813
+ ttl_array = engine_full.split(" TTL ")
814
+ if len(ttl_array) <= 1:
815
+ return None
816
+ settings_array = engine_full.split(" SETTINGS ")
817
+ settings = " SETTINGS " + settings_array[1] if len(settings_array) > 1 else None
818
+ ttl = ttl_array[1][: -(len(settings))] if settings else ttl_array[1]
819
+
820
+ groups = SIMPLE_TTL_DEFINITION.search(ttl)
821
+ if not groups:
822
+ return None
823
+
824
+ return f"{groups[1]} >= now() - {groups[2]}"
825
+
826
+ except Exception as e:
827
+ logging.error(str(e))
828
+ return None
829
+
830
+
831
+ def _parse_indexes(create_table_query_expr: str) -> List[TableIndex]:
832
+ if create_table_query_expr == "":
833
+ return []
834
+ try:
835
+ from tinybird.sql_toolset import format_sql
836
+
837
+ indexes = [
838
+ line.strip()
839
+ for line in format_sql(create_table_query_expr).splitlines()
840
+ if line.strip().startswith("INDEX")
841
+ ]
842
+ except ModuleNotFoundError:
843
+ # this is not needed from CLI
844
+ return []
845
+
846
+ return parse_indexes_structure(indexes)
847
+
848
+
849
+ def _parse_projections(create_table_query_expr: str) -> List[TableProjection]:
850
+ return [
851
+ TableProjection(name, expr)
852
+ for name, expr in re.findall(
853
+ r"PROJECTION\s+(\w+)\s*\(((?:[^()]|\((?:[^()]|\([^()]*\))*\))*)\)", create_table_query_expr
854
+ )
855
+ ]