tinybird 0.0.1.dev6__py3-none-any.whl → 0.0.1.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/tb/modules/branch.py +0 -21
- tinybird/tb/modules/build.py +7 -18
- tinybird/tb/modules/cli.py +11 -131
- tinybird/tb/modules/common.py +14 -2
- tinybird/tb/modules/create.py +10 -14
- tinybird/tb/modules/datafile/build.py +2136 -0
- tinybird/tb/modules/datafile/build_common.py +118 -0
- tinybird/tb/modules/datafile/build_datasource.py +413 -0
- tinybird/tb/modules/datafile/build_pipe.py +648 -0
- tinybird/tb/modules/datafile/common.py +898 -0
- tinybird/tb/modules/datafile/diff.py +197 -0
- tinybird/tb/modules/datafile/exceptions.py +23 -0
- tinybird/tb/modules/datafile/format_common.py +66 -0
- tinybird/tb/modules/datafile/format_datasource.py +160 -0
- tinybird/tb/modules/datafile/format_pipe.py +195 -0
- tinybird/tb/modules/datafile/parse_datasource.py +41 -0
- tinybird/tb/modules/datafile/parse_pipe.py +69 -0
- tinybird/tb/modules/datafile/pipe_checker.py +560 -0
- tinybird/tb/modules/datafile/pull.py +157 -0
- tinybird/tb/modules/datasource.py +1 -1
- tinybird/tb/modules/fmt.py +4 -1
- tinybird/tb/modules/local.py +3 -0
- tinybird/tb/modules/pipe.py +8 -2
- tinybird/tb/modules/prompts.py +1 -1
- tinybird/tb/modules/workspace.py +1 -1
- {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/METADATA +1 -1
- {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/RECORD +30 -17
- tinybird/tb/modules/datafile.py +0 -6122
- {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
import os
|
|
3
|
+
from os import getcwd
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Generator, Iterable, List, Optional
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from colorama import Back, Fore, Style, init
|
|
9
|
+
|
|
10
|
+
init()
|
|
11
|
+
except ImportError: # fallback so that the imported classes always exist
|
|
12
|
+
|
|
13
|
+
class ColorFallback:
|
|
14
|
+
def __getattr__(self, name):
|
|
15
|
+
return ""
|
|
16
|
+
|
|
17
|
+
Fore = Back = Style = ColorFallback()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
import shutil
|
|
21
|
+
import sys
|
|
22
|
+
|
|
23
|
+
import click
|
|
24
|
+
|
|
25
|
+
from tinybird.client import TinyB
|
|
26
|
+
from tinybird.feedback_manager import FeedbackManager
|
|
27
|
+
from tinybird.sql_template_fmt import DEFAULT_FMT_LINE_LENGTH
|
|
28
|
+
from tinybird.tb.modules.datafile.common import get_name_version, get_project_filenames, is_file_a_datasource, peek
|
|
29
|
+
from tinybird.tb.modules.datafile.format_datasource import format_datasource
|
|
30
|
+
from tinybird.tb.modules.datafile.format_pipe import format_pipe
|
|
31
|
+
from tinybird.tb.modules.datafile.pull import folder_pull
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def diff_files(
|
|
35
|
+
from_file: str,
|
|
36
|
+
to_file: str,
|
|
37
|
+
from_file_suffix: str = "[remote]",
|
|
38
|
+
to_file_suffix: str = "[local]",
|
|
39
|
+
with_format: bool = True,
|
|
40
|
+
with_color: bool = False,
|
|
41
|
+
client: Optional[TinyB] = None,
|
|
42
|
+
for_deploy: bool = False,
|
|
43
|
+
):
|
|
44
|
+
def file_lines(filename):
|
|
45
|
+
with open(filename) as file:
|
|
46
|
+
return file.readlines()
|
|
47
|
+
|
|
48
|
+
async def parse(filename, with_format=True, unroll_includes=False):
|
|
49
|
+
extensions = Path(filename).suffixes
|
|
50
|
+
lines = None
|
|
51
|
+
if is_file_a_datasource(filename):
|
|
52
|
+
lines = (
|
|
53
|
+
await format_datasource(
|
|
54
|
+
filename,
|
|
55
|
+
unroll_includes=unroll_includes,
|
|
56
|
+
for_diff=True,
|
|
57
|
+
client=client,
|
|
58
|
+
replace_includes=True,
|
|
59
|
+
for_deploy_diff=for_deploy,
|
|
60
|
+
)
|
|
61
|
+
if with_format
|
|
62
|
+
else file_lines(filename)
|
|
63
|
+
)
|
|
64
|
+
elif (".pipe" in extensions) or (".incl" in extensions):
|
|
65
|
+
lines = (
|
|
66
|
+
await format_pipe(
|
|
67
|
+
filename,
|
|
68
|
+
DEFAULT_FMT_LINE_LENGTH,
|
|
69
|
+
unroll_includes=unroll_includes,
|
|
70
|
+
replace_includes=True,
|
|
71
|
+
for_deploy_diff=for_deploy,
|
|
72
|
+
)
|
|
73
|
+
if with_format
|
|
74
|
+
else file_lines(filename)
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
click.echo(f"Unsupported file type: {filename}")
|
|
78
|
+
if lines:
|
|
79
|
+
return [f"{l}\n" for l in lines.split("\n")] if with_format else lines # noqa: E741
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
lines1 = await parse(from_file, with_format)
|
|
83
|
+
lines2 = await parse(to_file, with_format, unroll_includes=True)
|
|
84
|
+
except FileNotFoundError as e:
|
|
85
|
+
filename = os.path.basename(str(e)).strip("'")
|
|
86
|
+
raise click.ClickException(FeedbackManager.error_diff_file(filename=filename))
|
|
87
|
+
|
|
88
|
+
if not lines1 or not lines2:
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
diff = difflib.unified_diff(
|
|
92
|
+
lines1, lines2, fromfile=f"{Path(from_file).name} {from_file_suffix}", tofile=f"{to_file} {to_file_suffix}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if with_color:
|
|
96
|
+
diff = color_diff(diff)
|
|
97
|
+
|
|
98
|
+
return diff
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def diff_command(
|
|
102
|
+
filenames: Optional[List[str]],
|
|
103
|
+
fmt: bool,
|
|
104
|
+
client: TinyB,
|
|
105
|
+
no_color: Optional[bool] = False,
|
|
106
|
+
with_print: Optional[bool] = True,
|
|
107
|
+
verbose: Optional[bool] = None,
|
|
108
|
+
clean_up: Optional[bool] = False,
|
|
109
|
+
progress_bar: bool = False,
|
|
110
|
+
for_deploy: bool = False,
|
|
111
|
+
):
|
|
112
|
+
def is_shared_datasource(name):
|
|
113
|
+
return "." in name
|
|
114
|
+
|
|
115
|
+
with_explicit_filenames = filenames
|
|
116
|
+
verbose = True if verbose is None else verbose
|
|
117
|
+
|
|
118
|
+
target_dir = getcwd() + os.path.sep + ".diff_tmp"
|
|
119
|
+
Path(target_dir).mkdir(parents=True, exist_ok=True)
|
|
120
|
+
|
|
121
|
+
if filenames:
|
|
122
|
+
if len(filenames) == 1:
|
|
123
|
+
filenames = [filenames[0], *get_project_filenames(filenames[0])]
|
|
124
|
+
await folder_pull(client, target_dir, False, None, True, verbose=False)
|
|
125
|
+
else:
|
|
126
|
+
filenames = get_project_filenames(".")
|
|
127
|
+
if verbose:
|
|
128
|
+
click.echo("Saving remote resources in .diff_tmp folder.\n")
|
|
129
|
+
await folder_pull(client, target_dir, False, None, True, verbose=verbose, progress_bar=progress_bar)
|
|
130
|
+
|
|
131
|
+
remote_datasources: List[Dict[str, Any]] = await client.datasources()
|
|
132
|
+
remote_pipes: List[Dict[str, Any]] = await client.pipes()
|
|
133
|
+
|
|
134
|
+
local_resources = {
|
|
135
|
+
Path(file).resolve().stem: file
|
|
136
|
+
for file in filenames
|
|
137
|
+
if (".datasource" in file or ".pipe" in file) and ".incl" not in file
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
changed = {}
|
|
141
|
+
for resource in remote_datasources + remote_pipes:
|
|
142
|
+
properties: Dict[str, Any] = get_name_version(resource["name"])
|
|
143
|
+
name = properties.get("name", None)
|
|
144
|
+
if name:
|
|
145
|
+
(rfilename, file) = next(
|
|
146
|
+
((rfilename, file) for (rfilename, file) in local_resources.items() if name == rfilename),
|
|
147
|
+
("", None),
|
|
148
|
+
)
|
|
149
|
+
if not file:
|
|
150
|
+
if not with_explicit_filenames:
|
|
151
|
+
if with_print:
|
|
152
|
+
click.echo(f"{resource['name']} only exists remotely\n")
|
|
153
|
+
if is_shared_datasource(resource["name"]):
|
|
154
|
+
changed[resource["name"]] = "shared"
|
|
155
|
+
else:
|
|
156
|
+
changed[resource["name"]] = "remote"
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
suffix = ".datasource" if ".datasource" in file else ".pipe"
|
|
160
|
+
target = target_dir + os.path.sep + rfilename + suffix
|
|
161
|
+
|
|
162
|
+
diff_lines = await diff_files(
|
|
163
|
+
target, file, with_format=fmt, with_color=(not no_color), client=client, for_deploy=for_deploy
|
|
164
|
+
)
|
|
165
|
+
not_empty, diff_lines = peek(diff_lines)
|
|
166
|
+
changed[rfilename] = not_empty
|
|
167
|
+
if not_empty and with_print:
|
|
168
|
+
sys.stdout.writelines(diff_lines)
|
|
169
|
+
click.echo("")
|
|
170
|
+
|
|
171
|
+
for rfilename, _ in local_resources.items():
|
|
172
|
+
if rfilename not in changed:
|
|
173
|
+
for resource in remote_datasources + remote_pipes:
|
|
174
|
+
properties = get_name_version(resource["name"])
|
|
175
|
+
name = properties.get("name", None)
|
|
176
|
+
if name and name == rfilename:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
if with_print and rfilename not in changed:
|
|
180
|
+
click.echo(f"{rfilename} only exists locally\n")
|
|
181
|
+
changed[rfilename] = "local"
|
|
182
|
+
if clean_up:
|
|
183
|
+
shutil.rmtree(target_dir)
|
|
184
|
+
|
|
185
|
+
return changed
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def color_diff(diff: Iterable[str]) -> Generator[str, Any, None]:
|
|
189
|
+
for line in diff:
|
|
190
|
+
if line.startswith("+"):
|
|
191
|
+
yield Fore.GREEN + line + Fore.RESET
|
|
192
|
+
elif line.startswith("-"):
|
|
193
|
+
yield Fore.RED + line + Fore.RESET
|
|
194
|
+
elif line.startswith("^"):
|
|
195
|
+
yield Fore.BLUE + line + Fore.RESET
|
|
196
|
+
else:
|
|
197
|
+
yield line
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import click
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AlreadyExistsException(click.ClickException):
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ParseException(Exception):
|
|
9
|
+
def __init__(self, err: str, lineno: int = -1):
|
|
10
|
+
self.lineno: int = lineno
|
|
11
|
+
super().__init__(err)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class IncludeFileNotFoundException(Exception):
|
|
15
|
+
def __init__(self, err: str, lineno: int = -1):
|
|
16
|
+
self.lineno: int = lineno
|
|
17
|
+
super().__init__(err)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ValidationException(Exception):
|
|
21
|
+
def __init__(self, err: str, lineno: int = -1) -> None:
|
|
22
|
+
self.lineno: int = lineno
|
|
23
|
+
super().__init__(err)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from typing import Any, List
|
|
2
|
+
|
|
3
|
+
from tinybird.tb.modules.datafile.common import Datafile
|
|
4
|
+
|
|
5
|
+
DATAFILE_NEW_LINE = "\n"
|
|
6
|
+
DATAFILE_INDENT = " " * 4
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def format_description(file_parts: List[str], doc: Any) -> List[str]:
|
|
10
|
+
description = doc.description if doc.description is not None else ""
|
|
11
|
+
if description:
|
|
12
|
+
file_parts.append("DESCRIPTION >")
|
|
13
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
14
|
+
[
|
|
15
|
+
file_parts.append(f"{DATAFILE_INDENT}{d.strip()}\n") # type: ignore
|
|
16
|
+
for d in description.split(DATAFILE_NEW_LINE)
|
|
17
|
+
if d.strip()
|
|
18
|
+
]
|
|
19
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
20
|
+
return file_parts
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def format_sources(file_parts: List[str], doc: Datafile) -> List[str]:
|
|
24
|
+
for source in doc.sources:
|
|
25
|
+
file_parts.append(f"SOURCE {source}")
|
|
26
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
27
|
+
return file_parts
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def format_maintainer(file_parts: List[str], doc: Datafile) -> List[str]:
|
|
31
|
+
maintainer = doc.maintainer if doc.maintainer is not None else ""
|
|
32
|
+
if maintainer:
|
|
33
|
+
file_parts.append(f"MAINTAINER {maintainer}")
|
|
34
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
35
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
36
|
+
return file_parts
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def format_tokens(file_parts: List[str], doc: Datafile) -> List[str]:
|
|
40
|
+
for token in doc.tokens:
|
|
41
|
+
file_parts.append(f'TOKEN "{token["token_name"]}" {token["permissions"]}')
|
|
42
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
43
|
+
if len(doc.tokens):
|
|
44
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
45
|
+
return file_parts
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def format_tags(file_parts: List[str], doc: Datafile) -> List[str]:
|
|
49
|
+
if doc.filtering_tags:
|
|
50
|
+
file_parts.append(f'TAGS "{", ".join(doc.filtering_tags)}"')
|
|
51
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
52
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
53
|
+
return file_parts
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def format_include(file_parts: List[str], doc: Datafile, unroll_includes: bool = False) -> List[str]:
|
|
57
|
+
if unroll_includes:
|
|
58
|
+
return file_parts
|
|
59
|
+
|
|
60
|
+
assert doc.raw is not None
|
|
61
|
+
|
|
62
|
+
include = [line for line in doc.raw if "INCLUDE" in line and ".incl" in line]
|
|
63
|
+
if len(include):
|
|
64
|
+
file_parts.append(include[0])
|
|
65
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
66
|
+
return file_parts
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from tinybird.client import TinyB
|
|
4
|
+
from tinybird.sql import schema_to_sql_columns
|
|
5
|
+
from tinybird.tb.modules.datafile.common import Datafile
|
|
6
|
+
from tinybird.tb.modules.datafile.format_common import (
|
|
7
|
+
DATAFILE_INDENT,
|
|
8
|
+
DATAFILE_NEW_LINE,
|
|
9
|
+
format_description,
|
|
10
|
+
format_include,
|
|
11
|
+
format_maintainer,
|
|
12
|
+
format_sources,
|
|
13
|
+
format_tags,
|
|
14
|
+
format_tokens,
|
|
15
|
+
)
|
|
16
|
+
from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def format_datasource(
|
|
20
|
+
filename: str,
|
|
21
|
+
unroll_includes: bool = False,
|
|
22
|
+
for_diff: bool = False,
|
|
23
|
+
client: Optional[TinyB] = None,
|
|
24
|
+
replace_includes: bool = False,
|
|
25
|
+
datafile: Optional[Datafile] = None,
|
|
26
|
+
for_deploy_diff: bool = False,
|
|
27
|
+
skip_eval: bool = False,
|
|
28
|
+
content: Optional[str] = None,
|
|
29
|
+
) -> str:
|
|
30
|
+
if datafile:
|
|
31
|
+
doc = datafile
|
|
32
|
+
else:
|
|
33
|
+
doc = parse_datasource(filename, replace_includes=replace_includes, skip_eval=skip_eval, content=content)
|
|
34
|
+
|
|
35
|
+
file_parts: List[str] = []
|
|
36
|
+
if for_diff:
|
|
37
|
+
is_kafka = "kafka_connection_name" in doc.nodes[0]
|
|
38
|
+
if is_kafka:
|
|
39
|
+
kafka_metadata_columns = [
|
|
40
|
+
"__value",
|
|
41
|
+
"__headers",
|
|
42
|
+
"__topic",
|
|
43
|
+
"__partition",
|
|
44
|
+
"__offset",
|
|
45
|
+
"__timestamp",
|
|
46
|
+
"__key",
|
|
47
|
+
]
|
|
48
|
+
columns = [c for c in doc.nodes[0]["columns"] if c["name"] not in kafka_metadata_columns]
|
|
49
|
+
doc.nodes[0].update(
|
|
50
|
+
{
|
|
51
|
+
"columns": columns,
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
if for_deploy_diff:
|
|
55
|
+
format_description(file_parts, doc)
|
|
56
|
+
format_tags(file_parts, doc)
|
|
57
|
+
format_schema(file_parts, doc.nodes[0])
|
|
58
|
+
format_indices(file_parts, doc.nodes[0])
|
|
59
|
+
await format_engine(file_parts, doc.nodes[0], only_ttl=bool(not for_deploy_diff), client=client)
|
|
60
|
+
if for_deploy_diff:
|
|
61
|
+
format_import_settings(file_parts, doc.nodes[0])
|
|
62
|
+
format_shared_with(file_parts, doc)
|
|
63
|
+
else:
|
|
64
|
+
format_sources(file_parts, doc)
|
|
65
|
+
format_maintainer(file_parts, doc)
|
|
66
|
+
format_description(file_parts, doc)
|
|
67
|
+
format_tokens(file_parts, doc)
|
|
68
|
+
format_tags(file_parts, doc)
|
|
69
|
+
format_schema(file_parts, doc.nodes[0])
|
|
70
|
+
format_indices(file_parts, doc.nodes[0])
|
|
71
|
+
await format_engine(file_parts, doc.nodes[0])
|
|
72
|
+
format_include(file_parts, doc, unroll_includes=unroll_includes)
|
|
73
|
+
format_data_connector(file_parts, doc.nodes[0])
|
|
74
|
+
format_import_settings(file_parts, doc.nodes[0])
|
|
75
|
+
format_shared_with(file_parts, doc)
|
|
76
|
+
result = "".join(file_parts)
|
|
77
|
+
result = result.rstrip("\n") + "\n"
|
|
78
|
+
return result
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def format_schema(file_parts: List[str], node: Dict[str, Any]) -> List[str]:
|
|
82
|
+
if node.get("schema"):
|
|
83
|
+
file_parts.append("SCHEMA >")
|
|
84
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
85
|
+
columns = schema_to_sql_columns(node["columns"])
|
|
86
|
+
file_parts.append(f",{DATAFILE_NEW_LINE}".join(map(lambda x: f" {x}", columns)))
|
|
87
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
88
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
89
|
+
|
|
90
|
+
return file_parts
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def format_indices(file_parts: List[str], node: Dict[str, Any]) -> List[str]:
|
|
94
|
+
if node.get("indexes"):
|
|
95
|
+
indexes = node["indexes"]
|
|
96
|
+
file_parts.append("INDEXES >")
|
|
97
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
98
|
+
file_parts.append(f"{DATAFILE_NEW_LINE}".join(map(lambda index: f" {index.to_datafile()}", indexes)))
|
|
99
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
100
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
101
|
+
|
|
102
|
+
return file_parts
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def format_data_connector(file_parts: List[str], node: Dict[str, Any]) -> List[str]:
|
|
106
|
+
ll = len(file_parts)
|
|
107
|
+
quotes = "''"
|
|
108
|
+
[file_parts.append(f"{k.upper()} {v or quotes}{DATAFILE_NEW_LINE}") for k, v in node.items() if "kafka" in k] # type: ignore
|
|
109
|
+
if ll < len(file_parts):
|
|
110
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
111
|
+
return file_parts
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def format_import_settings(file_parts: List[str], node: Dict[str, Any]) -> List[str]:
|
|
115
|
+
ll = len(file_parts)
|
|
116
|
+
[file_parts.append(f"{k.upper()} {v}{DATAFILE_NEW_LINE}") for k, v in node.items() if "import_" in k] # type: ignore
|
|
117
|
+
if ll < len(file_parts):
|
|
118
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
119
|
+
return file_parts
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def format_shared_with(file_parts: List[str], doc: Datafile) -> List[str]:
|
|
123
|
+
if doc.shared_with:
|
|
124
|
+
file_parts.append("SHARED_WITH >")
|
|
125
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
126
|
+
file_parts.append("\n".join([f"{DATAFILE_INDENT}{workspace_name}" for workspace_name in doc.shared_with]))
|
|
127
|
+
return file_parts
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
async def format_engine(
|
|
131
|
+
file_parts: List[str], node: Dict[str, Any], only_ttl: bool = False, client: Optional[TinyB] = None
|
|
132
|
+
) -> List[str]:
|
|
133
|
+
if only_ttl:
|
|
134
|
+
if node.get("engine", None):
|
|
135
|
+
for arg in sorted(node["engine"].get("args", [])):
|
|
136
|
+
if arg[0].upper() == "TTL":
|
|
137
|
+
elem = ", ".join([x.strip() for x in arg[1].split(",")])
|
|
138
|
+
try:
|
|
139
|
+
if client:
|
|
140
|
+
ttl_sql = await client.sql_get_format(f"select {elem}", with_clickhouse_format=True)
|
|
141
|
+
formatted_ttl = ttl_sql[7:]
|
|
142
|
+
else:
|
|
143
|
+
formatted_ttl = elem
|
|
144
|
+
except Exception:
|
|
145
|
+
formatted_ttl = elem
|
|
146
|
+
file_parts.append(f"ENGINE_{arg[0].upper()} {formatted_ttl}")
|
|
147
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
148
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
149
|
+
return file_parts
|
|
150
|
+
else:
|
|
151
|
+
if node.get("engine", None):
|
|
152
|
+
empty = '""'
|
|
153
|
+
file_parts.append(f'ENGINE {node["engine"]["type"]}' if node.get("engine", {}).get("type") else empty)
|
|
154
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
155
|
+
for arg in sorted(node["engine"].get("args", [])):
|
|
156
|
+
elem = ", ".join([x.strip() for x in arg[1].split(",")])
|
|
157
|
+
file_parts.append(f"ENGINE_{arg[0].upper()} {elem if elem else empty}")
|
|
158
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
159
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
160
|
+
return file_parts
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from tinybird.sql_template_fmt import DEFAULT_FMT_LINE_LENGTH, format_sql_template
|
|
6
|
+
from tinybird.tb.modules.datafile.common import (
|
|
7
|
+
ON_DEMAND,
|
|
8
|
+
CopyParameters,
|
|
9
|
+
Datafile,
|
|
10
|
+
ExportReplacements,
|
|
11
|
+
PipeNodeTypes,
|
|
12
|
+
_unquote,
|
|
13
|
+
eval_var,
|
|
14
|
+
)
|
|
15
|
+
from tinybird.tb.modules.datafile.format_common import (
|
|
16
|
+
DATAFILE_INDENT,
|
|
17
|
+
DATAFILE_NEW_LINE,
|
|
18
|
+
format_description,
|
|
19
|
+
format_maintainer,
|
|
20
|
+
format_sources,
|
|
21
|
+
format_tags,
|
|
22
|
+
format_tokens,
|
|
23
|
+
)
|
|
24
|
+
from tinybird.tb.modules.datafile.format_datasource import format_engine
|
|
25
|
+
from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def format_node_sql(
|
|
29
|
+
file_parts: List[str], node: Dict[str, Any], line_length: Optional[int] = None, lower_keywords: bool = False
|
|
30
|
+
) -> List[str]:
|
|
31
|
+
file_parts.append("SQL >")
|
|
32
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
33
|
+
file_parts.append(format_sql(node["sql"], DATAFILE_INDENT, line_length=line_length, lower_keywords=lower_keywords))
|
|
34
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
35
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
36
|
+
return file_parts
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def format_node_type(file_parts: List[str], node: Dict[str, Any]) -> List[str]:
|
|
40
|
+
node_type = node.get("type", "").lower()
|
|
41
|
+
node_type_upper = f"TYPE {node_type.upper()}"
|
|
42
|
+
# Materialized pipe
|
|
43
|
+
if node_type == PipeNodeTypes.MATERIALIZED:
|
|
44
|
+
file_parts.append(node_type_upper)
|
|
45
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
46
|
+
file_parts.append(f'DATASOURCE {node["datasource"]}')
|
|
47
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
48
|
+
await format_engine(file_parts, node)
|
|
49
|
+
|
|
50
|
+
# Copy pipe
|
|
51
|
+
if node_type == PipeNodeTypes.COPY:
|
|
52
|
+
file_parts.append(node_type_upper)
|
|
53
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
54
|
+
file_parts.append(f'TARGET_DATASOURCE {node["target_datasource"]}')
|
|
55
|
+
if node.get("mode"):
|
|
56
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
57
|
+
file_parts.append(f'COPY_MODE {node.get("mode")}')
|
|
58
|
+
|
|
59
|
+
if node.get(CopyParameters.COPY_SCHEDULE):
|
|
60
|
+
is_ondemand = node[CopyParameters.COPY_SCHEDULE].lower() == ON_DEMAND
|
|
61
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
62
|
+
file_parts.append(
|
|
63
|
+
f"{CopyParameters.COPY_SCHEDULE.upper()} {ON_DEMAND if is_ondemand else node[CopyParameters.COPY_SCHEDULE]}"
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
67
|
+
file_parts.append(f"{CopyParameters.COPY_SCHEDULE.upper()} {ON_DEMAND}")
|
|
68
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
69
|
+
|
|
70
|
+
# Sink or Stream pipe
|
|
71
|
+
if ExportReplacements.is_export_node(node):
|
|
72
|
+
file_parts.append(node_type_upper)
|
|
73
|
+
export_params = ExportReplacements.get_params_from_datafile(node)
|
|
74
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
75
|
+
for param, value in export_params.items():
|
|
76
|
+
if param == "schedule_cron" and not value:
|
|
77
|
+
value = ON_DEMAND
|
|
78
|
+
datafile_key = ExportReplacements.get_datafile_key(param, node)
|
|
79
|
+
if datafile_key and value:
|
|
80
|
+
file_parts.append(f"{datafile_key} {value}")
|
|
81
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
82
|
+
|
|
83
|
+
return file_parts
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def format_pipe_include(file_parts: List[str], node: Dict[str, Any], includes: Dict[str, Any]) -> List[str]:
|
|
87
|
+
if includes:
|
|
88
|
+
for k, v in includes.copy().items():
|
|
89
|
+
if node["name"] in v:
|
|
90
|
+
file_parts.append(f"INCLUDE {k}")
|
|
91
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
92
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
93
|
+
del includes[k]
|
|
94
|
+
return file_parts
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def format_sql(sql: str, DATAFILE_INDENT: str, line_length: Optional[int] = None, lower_keywords: bool = False) -> str:
|
|
98
|
+
sql = format_sql_template(sql.strip(), line_length=line_length, lower_keywords=lower_keywords)
|
|
99
|
+
return "\n".join([f"{DATAFILE_INDENT}{part}" for part in sql.split("\n") if len(part.strip())])
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def format_node(
|
|
103
|
+
file_parts: List[str],
|
|
104
|
+
node: Dict[str, Any],
|
|
105
|
+
includes: Dict[str, Any],
|
|
106
|
+
line_length: Optional[int] = None,
|
|
107
|
+
unroll_includes: bool = False,
|
|
108
|
+
lower_keywords: bool = False,
|
|
109
|
+
) -> None:
|
|
110
|
+
if not unroll_includes:
|
|
111
|
+
format_pipe_include(file_parts, node, includes)
|
|
112
|
+
item = [k for k, _ in includes.items() if node["name"].strip() in k]
|
|
113
|
+
if item and not unroll_includes:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
file_parts.append(f'NODE {node["name"].strip()}')
|
|
117
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
118
|
+
|
|
119
|
+
from collections import namedtuple
|
|
120
|
+
|
|
121
|
+
Doc = namedtuple("Doc", ["description"])
|
|
122
|
+
format_description(file_parts, Doc(node.get("description", "")))
|
|
123
|
+
format_node_sql(file_parts, node, line_length=line_length, lower_keywords=lower_keywords)
|
|
124
|
+
await format_node_type(file_parts, node)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
async def format_pipe(
|
|
128
|
+
filename: str,
|
|
129
|
+
line_length: Optional[int] = DEFAULT_FMT_LINE_LENGTH,
|
|
130
|
+
unroll_includes: bool = False,
|
|
131
|
+
replace_includes: bool = False,
|
|
132
|
+
datafile: Optional[Datafile] = None,
|
|
133
|
+
for_deploy_diff: bool = False,
|
|
134
|
+
skip_eval: bool = False,
|
|
135
|
+
content: Optional[str] = None,
|
|
136
|
+
) -> str:
|
|
137
|
+
if datafile:
|
|
138
|
+
doc = datafile
|
|
139
|
+
else:
|
|
140
|
+
doc = parse_pipe(filename, replace_includes=replace_includes, skip_eval=skip_eval, content=content)
|
|
141
|
+
|
|
142
|
+
file_parts: List[str] = []
|
|
143
|
+
format_sources(file_parts, doc)
|
|
144
|
+
format_maintainer(file_parts, doc)
|
|
145
|
+
format_description(file_parts, doc)
|
|
146
|
+
format_tokens(file_parts, doc)
|
|
147
|
+
format_tags(file_parts, doc)
|
|
148
|
+
if doc.includes and not unroll_includes:
|
|
149
|
+
for k in doc.includes:
|
|
150
|
+
# We filter only the include files as we currently have 2 items for each include
|
|
151
|
+
# { 'include_file.incl': 'First node of the include" }
|
|
152
|
+
# { 'first node of the pipe after the include': }
|
|
153
|
+
if ".incl" not in k:
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# We get all the nodes inside the include and remove them from the unrolled pipe as we want things unrolled
|
|
157
|
+
include_parameters = _unquote(k)
|
|
158
|
+
|
|
159
|
+
# If they use an include with parameters like `INCLUDE "xxx.incl" "GROUP_COL=path" "MATERIALIZED_VIEW=speed_insights_path_daily_mv"``
|
|
160
|
+
# We just want the file name to take nodes
|
|
161
|
+
include_file = include_parameters.split('"')[0]
|
|
162
|
+
include_file = (
|
|
163
|
+
Path(os.path.dirname(filename)) / eval_var(include_file)
|
|
164
|
+
if "." in include_file
|
|
165
|
+
else eval_var(include_file)
|
|
166
|
+
)
|
|
167
|
+
included_pipe = parse_pipe(include_file, skip_eval=skip_eval)
|
|
168
|
+
pipe_nodes = doc.nodes.copy()
|
|
169
|
+
for included_node in included_pipe.nodes.copy():
|
|
170
|
+
unrolled_included_node = next(
|
|
171
|
+
(node for node in pipe_nodes if node["name"] == included_node["name"]), None
|
|
172
|
+
)
|
|
173
|
+
if unrolled_included_node:
|
|
174
|
+
doc.nodes.remove(unrolled_included_node)
|
|
175
|
+
for node in doc.nodes:
|
|
176
|
+
await format_node(
|
|
177
|
+
file_parts,
|
|
178
|
+
node,
|
|
179
|
+
doc.includes,
|
|
180
|
+
line_length=line_length,
|
|
181
|
+
unroll_includes=unroll_includes,
|
|
182
|
+
lower_keywords=bool(for_deploy_diff),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if not unroll_includes:
|
|
186
|
+
for k, _ in doc.includes.items():
|
|
187
|
+
if ".incl" not in k:
|
|
188
|
+
continue
|
|
189
|
+
file_parts.append(f"INCLUDE {k}")
|
|
190
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
191
|
+
file_parts.append(DATAFILE_NEW_LINE)
|
|
192
|
+
|
|
193
|
+
result = "".join(file_parts)
|
|
194
|
+
result = result.rstrip("\n") + "\n"
|
|
195
|
+
return result
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from tinybird.feedback_manager import FeedbackManager
|
|
7
|
+
from tinybird.tb.modules.datafile.common import (
|
|
8
|
+
Datafile,
|
|
9
|
+
format_filename,
|
|
10
|
+
parse,
|
|
11
|
+
)
|
|
12
|
+
from tinybird.tb.modules.datafile.exceptions import ParseException
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_datasource(
|
|
16
|
+
filename: str,
|
|
17
|
+
replace_includes: bool = True,
|
|
18
|
+
content: Optional[str] = None,
|
|
19
|
+
skip_eval: bool = False,
|
|
20
|
+
hide_folders: bool = False,
|
|
21
|
+
) -> Datafile:
|
|
22
|
+
basepath = ""
|
|
23
|
+
if not content:
|
|
24
|
+
with open(filename) as file:
|
|
25
|
+
s = file.read()
|
|
26
|
+
basepath = os.path.dirname(filename)
|
|
27
|
+
else:
|
|
28
|
+
s = content
|
|
29
|
+
|
|
30
|
+
filename = format_filename(filename, hide_folders)
|
|
31
|
+
try:
|
|
32
|
+
doc = parse(s, "default", basepath, replace_includes=replace_includes, skip_eval=skip_eval)
|
|
33
|
+
except ParseException as e:
|
|
34
|
+
raise click.ClickException(
|
|
35
|
+
FeedbackManager.error_parsing_file(filename=filename, lineno=e.lineno, error=e)
|
|
36
|
+
) from None
|
|
37
|
+
|
|
38
|
+
if len(doc.nodes) > 1:
|
|
39
|
+
raise ValueError(f"{filename}: datasources can't have more than one node")
|
|
40
|
+
|
|
41
|
+
return doc
|