tinybird 0.0.1.dev71__py3-none-any.whl → 0.0.1.dev73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/client.py +7 -5
- tinybird/feedback_manager.py +2 -2
- tinybird/prompts.py +56 -8
- tinybird/tb/__cli__.py +2 -2
- tinybird/tb/cli.py +1 -0
- tinybird/tb/modules/build.py +2 -2
- tinybird/tb/modules/cicd.py +1 -1
- tinybird/tb/modules/cli.py +12 -12
- tinybird/tb/modules/common.py +3 -4
- tinybird/tb/modules/config.py +3 -3
- tinybird/tb/modules/copy.py +1 -1
- tinybird/tb/modules/datafile/build_pipe.py +35 -186
- tinybird/tb/modules/datafile/playground.py +1400 -0
- tinybird/tb/modules/datasource.py +1 -1
- tinybird/tb/modules/deployment.py +10 -1
- tinybird/tb/modules/endpoint.py +10 -6
- tinybird/tb/modules/feedback_manager.py +2 -2
- tinybird/tb/modules/fmt.py +2 -2
- tinybird/tb/modules/local.py +2 -2
- tinybird/tb/modules/local_common.py +3 -3
- tinybird/tb/modules/materialization.py +1 -1
- tinybird/tb/modules/mock.py +1 -1
- tinybird/tb/modules/pipe.py +1 -1
- tinybird/tb/modules/playground.py +135 -0
- tinybird/tb/modules/shell.py +6 -5
- tinybird/tb/modules/token.py +17 -3
- tinybird/tb/modules/watch.py +105 -1
- tinybird/tb/modules/workspace.py +1 -1
- {tinybird-0.0.1.dev71.dist-info → tinybird-0.0.1.dev73.dist-info}/METADATA +1 -1
- {tinybird-0.0.1.dev71.dist-info → tinybird-0.0.1.dev73.dist-info}/RECORD +33 -31
- {tinybird-0.0.1.dev71.dist-info → tinybird-0.0.1.dev73.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev71.dist-info → tinybird-0.0.1.dev73.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev71.dist-info → tinybird-0.0.1.dev73.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1400 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import os
|
|
3
|
+
import os.path
|
|
4
|
+
import re
|
|
5
|
+
import urllib
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
from toposort import toposort
|
|
13
|
+
|
|
14
|
+
from tinybird.client import TinyB
|
|
15
|
+
from tinybird.sql import parse_table_structure, schema_to_sql_columns
|
|
16
|
+
from tinybird.sql_template import get_used_tables_in_template, render_sql_template
|
|
17
|
+
from tinybird.tb.modules.common import get_ca_pem_content
|
|
18
|
+
from tinybird.tb.modules.datafile.build_datasource import is_datasource
|
|
19
|
+
from tinybird.tb.modules.datafile.build_pipe import (
|
|
20
|
+
get_target_materialized_data_source_name,
|
|
21
|
+
is_endpoint,
|
|
22
|
+
is_endpoint_with_no_dependencies,
|
|
23
|
+
is_materialized,
|
|
24
|
+
new_pipe,
|
|
25
|
+
)
|
|
26
|
+
from tinybird.tb.modules.datafile.common import (
|
|
27
|
+
DEFAULT_CRON_PERIOD,
|
|
28
|
+
INTERNAL_TABLES,
|
|
29
|
+
ON_DEMAND,
|
|
30
|
+
PREVIEW_CONNECTOR_SERVICES,
|
|
31
|
+
CopyModes,
|
|
32
|
+
CopyParameters,
|
|
33
|
+
DataFileExtensions,
|
|
34
|
+
ExportReplacements,
|
|
35
|
+
ImportReplacements,
|
|
36
|
+
PipeNodeTypes,
|
|
37
|
+
find_file_by_name,
|
|
38
|
+
get_name_version,
|
|
39
|
+
get_project_filenames,
|
|
40
|
+
pp,
|
|
41
|
+
)
|
|
42
|
+
from tinybird.tb.modules.datafile.exceptions import AlreadyExistsException, IncludeFileNotFoundException
|
|
43
|
+
from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
|
|
44
|
+
from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
|
|
45
|
+
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
46
|
+
from tinybird.tb.modules.project import Project
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def folder_playground(
|
|
50
|
+
project: Project,
|
|
51
|
+
tb_client: TinyB,
|
|
52
|
+
filenames: Optional[List[str]] = None,
|
|
53
|
+
is_internal: bool = False,
|
|
54
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
55
|
+
local_ws: Optional[Dict[str, Any]] = None,
|
|
56
|
+
):
|
|
57
|
+
build = True
|
|
58
|
+
dry_run = False
|
|
59
|
+
force = True
|
|
60
|
+
only_changes = True
|
|
61
|
+
debug = False
|
|
62
|
+
run_tests = False
|
|
63
|
+
verbose = False
|
|
64
|
+
raise_on_exists = False
|
|
65
|
+
fork_downstream = True
|
|
66
|
+
fork = False
|
|
67
|
+
release_created = False
|
|
68
|
+
folder = str(project.path)
|
|
69
|
+
datasources: List[Dict[str, Any]] = await tb_client.datasources()
|
|
70
|
+
pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
|
|
71
|
+
build = True
|
|
72
|
+
dry_run = False
|
|
73
|
+
force = True
|
|
74
|
+
only_changes = True
|
|
75
|
+
debug = False
|
|
76
|
+
check = True
|
|
77
|
+
populate = False
|
|
78
|
+
populate_subset = None
|
|
79
|
+
populate_condition = None
|
|
80
|
+
tests_to_run = 0
|
|
81
|
+
override_datasource = False
|
|
82
|
+
skip_confirmation = True
|
|
83
|
+
wait = False
|
|
84
|
+
unlink_on_populate_error = False
|
|
85
|
+
only_response_times = False
|
|
86
|
+
run_tests = False
|
|
87
|
+
verbose = False
|
|
88
|
+
as_standard = False
|
|
89
|
+
raise_on_exists = False
|
|
90
|
+
fork_downstream = True
|
|
91
|
+
fork = False
|
|
92
|
+
release_created = False
|
|
93
|
+
tests_relative_change = 0.01
|
|
94
|
+
tests_sample_by_params = 0
|
|
95
|
+
tests_filter_by = None
|
|
96
|
+
tests_failfast = False
|
|
97
|
+
tests_ignore_order = False
|
|
98
|
+
tests_validate_processed_bytes = False
|
|
99
|
+
tests_check_requests_from_branch = False
|
|
100
|
+
user_token = None
|
|
101
|
+
ignore_sql_errors = False
|
|
102
|
+
is_vendor = False
|
|
103
|
+
current_ws = current_ws or local_ws
|
|
104
|
+
|
|
105
|
+
existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
|
|
106
|
+
remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
|
|
107
|
+
|
|
108
|
+
if not filenames:
|
|
109
|
+
filenames = get_project_filenames(folder)
|
|
110
|
+
|
|
111
|
+
# build graph to get new versions for all the files involved in the query
|
|
112
|
+
# dependencies need to be processed always to get the versions
|
|
113
|
+
dependencies_graph = await build_graph(
|
|
114
|
+
filenames,
|
|
115
|
+
tb_client,
|
|
116
|
+
dir_path=folder,
|
|
117
|
+
process_dependencies=True,
|
|
118
|
+
skip_connectors=True,
|
|
119
|
+
vendor_paths=[],
|
|
120
|
+
current_ws=current_ws,
|
|
121
|
+
only_changes=only_changes,
|
|
122
|
+
fork_downstream=fork_downstream,
|
|
123
|
+
is_internal=is_internal,
|
|
124
|
+
build=build,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if debug:
|
|
128
|
+
pp.pprint(dependencies_graph.to_run)
|
|
129
|
+
|
|
130
|
+
def should_push_file(
|
|
131
|
+
name: str,
|
|
132
|
+
remote_resource_names: List[str],
|
|
133
|
+
force: bool,
|
|
134
|
+
run_tests: bool,
|
|
135
|
+
) -> bool:
|
|
136
|
+
"""
|
|
137
|
+
Function to know if we need to run a file or not
|
|
138
|
+
"""
|
|
139
|
+
if name not in remote_resource_names:
|
|
140
|
+
return True
|
|
141
|
+
# When we need to try to push a file when it doesn't exist and the version is different that the existing one
|
|
142
|
+
resource_full_name = name
|
|
143
|
+
if resource_full_name not in existing_resources:
|
|
144
|
+
return True
|
|
145
|
+
if force or run_tests:
|
|
146
|
+
return True
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
async def push(
|
|
150
|
+
name: str,
|
|
151
|
+
to_run: Dict[str, Dict[str, Any]],
|
|
152
|
+
dry_run: bool,
|
|
153
|
+
fork_downstream: Optional[bool] = False,
|
|
154
|
+
fork: Optional[bool] = False,
|
|
155
|
+
):
|
|
156
|
+
if name in to_run:
|
|
157
|
+
resource = to_run[name]["resource"]
|
|
158
|
+
if resource == "datasources":
|
|
159
|
+
return
|
|
160
|
+
if not dry_run:
|
|
161
|
+
if should_push_file(name, remote_resource_names, force, run_tests):
|
|
162
|
+
click.echo(FeedbackManager.info_processing_new_resource(name=name, version=""))
|
|
163
|
+
try:
|
|
164
|
+
await exec_file(
|
|
165
|
+
to_run[name],
|
|
166
|
+
tb_client,
|
|
167
|
+
force,
|
|
168
|
+
check,
|
|
169
|
+
debug and verbose,
|
|
170
|
+
populate,
|
|
171
|
+
populate_subset,
|
|
172
|
+
populate_condition,
|
|
173
|
+
unlink_on_populate_error,
|
|
174
|
+
wait,
|
|
175
|
+
user_token,
|
|
176
|
+
override_datasource,
|
|
177
|
+
ignore_sql_errors,
|
|
178
|
+
skip_confirmation,
|
|
179
|
+
only_response_times,
|
|
180
|
+
run_tests,
|
|
181
|
+
as_standard,
|
|
182
|
+
tests_to_run,
|
|
183
|
+
tests_relative_change,
|
|
184
|
+
tests_sample_by_params,
|
|
185
|
+
tests_filter_by,
|
|
186
|
+
tests_failfast,
|
|
187
|
+
tests_ignore_order,
|
|
188
|
+
tests_validate_processed_bytes,
|
|
189
|
+
tests_check_requests_from_branch,
|
|
190
|
+
current_ws,
|
|
191
|
+
local_ws,
|
|
192
|
+
fork_downstream,
|
|
193
|
+
fork,
|
|
194
|
+
build,
|
|
195
|
+
is_vendor,
|
|
196
|
+
)
|
|
197
|
+
if not run_tests:
|
|
198
|
+
click.echo(
|
|
199
|
+
FeedbackManager.success_create(
|
|
200
|
+
name=(
|
|
201
|
+
name
|
|
202
|
+
if to_run[name]["version"] is None
|
|
203
|
+
else f"{name}__v{to_run[name]['version']}"
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
filename = to_run[name]["filename"]
|
|
209
|
+
exception = FeedbackManager.error_push_file_exception(
|
|
210
|
+
filename=filename,
|
|
211
|
+
error=e,
|
|
212
|
+
)
|
|
213
|
+
raise click.ClickException(exception)
|
|
214
|
+
else:
|
|
215
|
+
if raise_on_exists:
|
|
216
|
+
raise AlreadyExistsException(
|
|
217
|
+
FeedbackManager.warning_name_already_exists(
|
|
218
|
+
name=name if to_run[name]["version"] is None else f"{name}__v{to_run[name]['version']}"
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
if await name_matches_existing_resource(resource, name, tb_client):
|
|
223
|
+
if resource == "pipes":
|
|
224
|
+
click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
|
|
225
|
+
else:
|
|
226
|
+
click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
|
|
227
|
+
else:
|
|
228
|
+
click.echo(
|
|
229
|
+
FeedbackManager.warning_name_already_exists(
|
|
230
|
+
name=(
|
|
231
|
+
name
|
|
232
|
+
if to_run[name]["version"] is None
|
|
233
|
+
else f"{name}__v{to_run[name]['version']}"
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
if should_push_file(name, remote_resource_names, force, run_tests):
|
|
239
|
+
extension = "pipe" if resource == "pipes" else "datasource"
|
|
240
|
+
click.echo(FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=""))
|
|
241
|
+
else:
|
|
242
|
+
if await name_matches_existing_resource(resource, name, tb_client):
|
|
243
|
+
if resource == "pipes":
|
|
244
|
+
click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
|
|
245
|
+
else:
|
|
246
|
+
click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
|
|
247
|
+
else:
|
|
248
|
+
click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
|
|
249
|
+
|
|
250
|
+
async def push_files(
|
|
251
|
+
dependency_graph: GraphDependencies,
|
|
252
|
+
dry_run: bool = False,
|
|
253
|
+
):
|
|
254
|
+
endpoints_dep_map = dict()
|
|
255
|
+
processed = set()
|
|
256
|
+
|
|
257
|
+
resources_to_run = dependency_graph.to_run
|
|
258
|
+
|
|
259
|
+
# This will generate the graph from right to left and will fill the gaps of the dependencies
|
|
260
|
+
# If we have a graph like this:
|
|
261
|
+
# A -> B -> C
|
|
262
|
+
# If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
|
|
263
|
+
# But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
|
|
264
|
+
dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
|
|
265
|
+
dependency_graph.all_dep_map,
|
|
266
|
+
dependency_graph.all_resources,
|
|
267
|
+
resources_to_run,
|
|
268
|
+
list(dependency_graph.dep_map.keys()),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# First, we will deploy the datasources that need to be deployed.
|
|
272
|
+
# We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
|
|
273
|
+
groups = [group for group in toposort(dependencies_graph_fork_downstream)]
|
|
274
|
+
|
|
275
|
+
groups.reverse()
|
|
276
|
+
for group in groups:
|
|
277
|
+
for name in group:
|
|
278
|
+
if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
|
|
279
|
+
continue
|
|
280
|
+
|
|
281
|
+
# If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
|
|
282
|
+
kafka_connection_name = (
|
|
283
|
+
resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
|
|
284
|
+
)
|
|
285
|
+
service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
|
|
286
|
+
if release_created and (kafka_connection_name or service):
|
|
287
|
+
connector = "Kafka" if kafka_connection_name else service
|
|
288
|
+
error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
|
|
289
|
+
raise click.ClickException(error_msg)
|
|
290
|
+
|
|
291
|
+
await push(
|
|
292
|
+
name,
|
|
293
|
+
resources_to_run_fork_downstream,
|
|
294
|
+
dry_run,
|
|
295
|
+
fork_downstream,
|
|
296
|
+
fork,
|
|
297
|
+
)
|
|
298
|
+
processed.add(name)
|
|
299
|
+
|
|
300
|
+
# Now, we will create a map of all the endpoints and there dependencies
|
|
301
|
+
# We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
|
|
302
|
+
# But does not include the missing gaps
|
|
303
|
+
# If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
|
|
304
|
+
# Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
|
|
305
|
+
# The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
|
|
306
|
+
groups = [group for group in toposort(dependencies_graph_fork_downstream)]
|
|
307
|
+
for group in groups:
|
|
308
|
+
for name in group:
|
|
309
|
+
if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
|
|
313
|
+
|
|
314
|
+
# Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
|
|
315
|
+
groups = [group for group in toposort(endpoints_dep_map)]
|
|
316
|
+
|
|
317
|
+
# As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
|
|
318
|
+
# But we need to deploy the endpoints and the dependencies of the endpoints from left to right
|
|
319
|
+
# So we need to reverse the groups
|
|
320
|
+
groups.reverse()
|
|
321
|
+
for group in groups:
|
|
322
|
+
for name in group:
|
|
323
|
+
if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
await push(
|
|
327
|
+
name,
|
|
328
|
+
resources_to_run_fork_downstream,
|
|
329
|
+
dry_run,
|
|
330
|
+
fork_downstream,
|
|
331
|
+
fork,
|
|
332
|
+
)
|
|
333
|
+
processed.add(name)
|
|
334
|
+
|
|
335
|
+
# Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
|
|
336
|
+
# We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
|
|
337
|
+
# In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
|
|
338
|
+
groups = [group for group in toposort(dependencies_graph_fork_downstream)]
|
|
339
|
+
for group in groups:
|
|
340
|
+
for name in group:
|
|
341
|
+
if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
|
|
342
|
+
continue
|
|
343
|
+
|
|
344
|
+
await push(
|
|
345
|
+
name,
|
|
346
|
+
resources_to_run_fork_downstream,
|
|
347
|
+
dry_run,
|
|
348
|
+
fork_downstream,
|
|
349
|
+
fork,
|
|
350
|
+
)
|
|
351
|
+
processed.add(name)
|
|
352
|
+
|
|
353
|
+
# Finally, we need to deploy the materialized views from right to left.
|
|
354
|
+
# We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
|
|
355
|
+
# In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
|
|
356
|
+
groups = [group for group in toposort(dependencies_graph_fork_downstream)]
|
|
357
|
+
for group in groups:
|
|
358
|
+
for name in group:
|
|
359
|
+
if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
await push(
|
|
363
|
+
name,
|
|
364
|
+
resources_to_run_fork_downstream,
|
|
365
|
+
dry_run,
|
|
366
|
+
fork_downstream,
|
|
367
|
+
fork,
|
|
368
|
+
)
|
|
369
|
+
processed.add(name)
|
|
370
|
+
|
|
371
|
+
await push_files(dependencies_graph, dry_run)
|
|
372
|
+
|
|
373
|
+
if not dry_run and not run_tests and verbose:
|
|
374
|
+
click.echo(FeedbackManager.info_not_pushing_fixtures())
|
|
375
|
+
|
|
376
|
+
return dependencies_graph.to_run
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
async def name_matches_existing_resource(resource: str, name: str, tb_client: TinyB):
|
|
380
|
+
if resource == "datasources":
|
|
381
|
+
current_pipes: List[Dict[str, Any]] = await tb_client.pipes()
|
|
382
|
+
if name in [x["name"] for x in current_pipes]:
|
|
383
|
+
return True
|
|
384
|
+
else:
|
|
385
|
+
current_datasources: List[Dict[str, Any]] = await tb_client.datasources()
|
|
386
|
+
if name in [x["name"] for x in current_datasources]:
|
|
387
|
+
return True
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
async def exec_file(
|
|
392
|
+
r: Dict[str, Any],
|
|
393
|
+
tb_client: TinyB,
|
|
394
|
+
force: bool,
|
|
395
|
+
check: bool,
|
|
396
|
+
debug: bool,
|
|
397
|
+
populate: bool,
|
|
398
|
+
populate_subset,
|
|
399
|
+
populate_condition,
|
|
400
|
+
unlink_on_populate_error,
|
|
401
|
+
wait_populate,
|
|
402
|
+
user_token: Optional[str],
|
|
403
|
+
override_datasource: bool = False,
|
|
404
|
+
ignore_sql_errors: bool = False,
|
|
405
|
+
skip_confirmation: bool = False,
|
|
406
|
+
only_response_times: bool = False,
|
|
407
|
+
run_tests=False,
|
|
408
|
+
as_standard=False,
|
|
409
|
+
tests_to_run: int = 0,
|
|
410
|
+
tests_relative_change: float = 0.01,
|
|
411
|
+
tests_to_sample_by_params: int = 0,
|
|
412
|
+
tests_filter_by: Optional[List[str]] = None,
|
|
413
|
+
tests_failfast: bool = False,
|
|
414
|
+
tests_ignore_order: bool = False,
|
|
415
|
+
tests_validate_processed_bytes: bool = False,
|
|
416
|
+
tests_check_requests_from_branch: bool = False,
|
|
417
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
418
|
+
local_ws: Optional[Dict[str, Any]] = None,
|
|
419
|
+
fork_downstream: Optional[bool] = False,
|
|
420
|
+
fork: Optional[bool] = False,
|
|
421
|
+
build: Optional[bool] = False,
|
|
422
|
+
is_vendor: Optional[bool] = False,
|
|
423
|
+
):
|
|
424
|
+
if debug:
|
|
425
|
+
click.echo(FeedbackManager.debug_running_file(file=pp.pformat(r)))
|
|
426
|
+
if r["resource"] == "pipes":
|
|
427
|
+
await new_pipe(
|
|
428
|
+
r,
|
|
429
|
+
tb_client,
|
|
430
|
+
force,
|
|
431
|
+
check,
|
|
432
|
+
populate,
|
|
433
|
+
populate_subset,
|
|
434
|
+
populate_condition,
|
|
435
|
+
unlink_on_populate_error,
|
|
436
|
+
wait_populate,
|
|
437
|
+
ignore_sql_errors=ignore_sql_errors,
|
|
438
|
+
only_response_times=only_response_times,
|
|
439
|
+
run_tests=run_tests,
|
|
440
|
+
as_standard=as_standard,
|
|
441
|
+
tests_to_run=tests_to_run,
|
|
442
|
+
tests_relative_change=tests_relative_change,
|
|
443
|
+
tests_to_sample_by_params=tests_to_sample_by_params,
|
|
444
|
+
tests_filter_by=tests_filter_by,
|
|
445
|
+
tests_failfast=tests_failfast,
|
|
446
|
+
tests_ignore_order=tests_ignore_order,
|
|
447
|
+
tests_validate_processed_bytes=tests_validate_processed_bytes,
|
|
448
|
+
override_datasource=override_datasource,
|
|
449
|
+
tests_check_requests_from_branch=tests_check_requests_from_branch,
|
|
450
|
+
fork_downstream=fork_downstream,
|
|
451
|
+
fork=fork,
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
elif r["resource"] == "datasources":
|
|
455
|
+
pass
|
|
456
|
+
else:
|
|
457
|
+
raise click.ClickException(FeedbackManager.error_unknown_resource(resource=r["resource"]))
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def get_remote_resource_name_without_version(remote_resource_name: str) -> str:
|
|
461
|
+
"""
|
|
462
|
+
>>> get_remote_resource_name_without_version("r__datasource")
|
|
463
|
+
'r__datasource'
|
|
464
|
+
>>> get_remote_resource_name_without_version("r__datasource__v0")
|
|
465
|
+
'r__datasource'
|
|
466
|
+
>>> get_remote_resource_name_without_version("datasource")
|
|
467
|
+
'datasource'
|
|
468
|
+
"""
|
|
469
|
+
parts = get_name_version(remote_resource_name)
|
|
470
|
+
return parts["name"]
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def create_downstream_dependency_graph(dependency_graph: Dict[str, Set[str]], all_resources: Dict[str, Dict[str, Any]]):
|
|
474
|
+
"""
|
|
475
|
+
This function reverses the dependency graph obtained from build_graph so you have downstream dependencies for each node in the graph.
|
|
476
|
+
|
|
477
|
+
Additionally takes into account target_datasource of materialized views
|
|
478
|
+
"""
|
|
479
|
+
downstream_dependency_graph: Dict[str, Set[str]] = {node: set() for node in dependency_graph}
|
|
480
|
+
|
|
481
|
+
for node, dependencies in dependency_graph.items():
|
|
482
|
+
for dependency in dependencies:
|
|
483
|
+
if dependency not in downstream_dependency_graph:
|
|
484
|
+
# a shared data source, we can skip it
|
|
485
|
+
continue
|
|
486
|
+
downstream_dependency_graph[dependency].add(node)
|
|
487
|
+
|
|
488
|
+
for key in dict(downstream_dependency_graph):
|
|
489
|
+
target_datasource = get_target_materialized_data_source_name(all_resources[key])
|
|
490
|
+
if target_datasource:
|
|
491
|
+
downstream_dependency_graph[key].update({target_datasource})
|
|
492
|
+
try:
|
|
493
|
+
downstream_dependency_graph[target_datasource].remove(key)
|
|
494
|
+
except KeyError:
|
|
495
|
+
pass
|
|
496
|
+
|
|
497
|
+
return downstream_dependency_graph
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def update_dep_map_recursively(
|
|
501
|
+
dep_map: Dict[str, Set[str]],
|
|
502
|
+
downstream_dep_map: Dict[str, Set[str]],
|
|
503
|
+
all_resources: Dict[str, Dict[str, Any]],
|
|
504
|
+
to_run: Dict[str, Dict[str, Any]],
|
|
505
|
+
dep_map_keys: List[str],
|
|
506
|
+
key: Optional[str] = None,
|
|
507
|
+
visited: Optional[List[str]] = None,
|
|
508
|
+
):
|
|
509
|
+
"""
|
|
510
|
+
Given a downstream_dep_map obtained from create_downstream_dependency_graph this function updates each node recursively to complete the downstream dependency graph for each node
|
|
511
|
+
"""
|
|
512
|
+
if not visited:
|
|
513
|
+
visited = list()
|
|
514
|
+
if not key and len(dep_map_keys) == 0:
|
|
515
|
+
return
|
|
516
|
+
if not key:
|
|
517
|
+
key = dep_map_keys.pop()
|
|
518
|
+
if key not in dep_map:
|
|
519
|
+
dep_map[key] = set()
|
|
520
|
+
else:
|
|
521
|
+
visited.append(key)
|
|
522
|
+
return
|
|
523
|
+
|
|
524
|
+
for dep in downstream_dep_map.get(key, {}):
|
|
525
|
+
if dep not in downstream_dep_map:
|
|
526
|
+
continue
|
|
527
|
+
to_run[dep] = all_resources.get(dep, {})
|
|
528
|
+
update_dep_map_recursively(
|
|
529
|
+
dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=dep, visited=visited
|
|
530
|
+
)
|
|
531
|
+
dep_map[key].update(downstream_dep_map[dep])
|
|
532
|
+
dep_map[key].update({dep})
|
|
533
|
+
try:
|
|
534
|
+
dep_map[key].remove(key)
|
|
535
|
+
except KeyError:
|
|
536
|
+
pass
|
|
537
|
+
|
|
538
|
+
to_run[key] = all_resources.get(key, {})
|
|
539
|
+
update_dep_map_recursively(
|
|
540
|
+
dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=None, visited=visited
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def generate_forkdownstream_graph(
|
|
545
|
+
all_dep_map: Dict[str, Set[str]],
|
|
546
|
+
all_resources: Dict[str, Dict[str, Any]],
|
|
547
|
+
to_run: Dict[str, Dict[str, Any]],
|
|
548
|
+
dep_map_keys: List[str],
|
|
549
|
+
) -> Tuple[Dict[str, Set[str]], Dict[str, Dict[str, Any]]]:
|
|
550
|
+
"""
|
|
551
|
+
This function for a given graph of dependencies from left to right. It will generate a new graph with the dependencies from right to left, but taking into account that even if some nodes are not inside to_run, they are still dependencies that need to be deployed.
|
|
552
|
+
|
|
553
|
+
>>> deps, _ = generate_forkdownstream_graph(
|
|
554
|
+
... {
|
|
555
|
+
... 'a': {'b'},
|
|
556
|
+
... 'b': {'c'},
|
|
557
|
+
... 'c': set(),
|
|
558
|
+
... },
|
|
559
|
+
... {
|
|
560
|
+
... 'a': {'resource_name': 'a'},
|
|
561
|
+
... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
|
|
562
|
+
... 'c': {'resource_name': 'c'},
|
|
563
|
+
... },
|
|
564
|
+
... {
|
|
565
|
+
... 'a': {'resource_name': 'a'},
|
|
566
|
+
... },
|
|
567
|
+
... ['a', 'b', 'c'],
|
|
568
|
+
... )
|
|
569
|
+
>>> {k: sorted(v) for k, v in deps.items()}
|
|
570
|
+
{'c': [], 'b': ['a', 'c'], 'a': []}
|
|
571
|
+
|
|
572
|
+
>>> deps, _ = generate_forkdownstream_graph(
|
|
573
|
+
... {
|
|
574
|
+
... 'a': {'b'},
|
|
575
|
+
... 'b': {'c'},
|
|
576
|
+
... 'c': set(),
|
|
577
|
+
... },
|
|
578
|
+
... {
|
|
579
|
+
... 'a': {'resource_name': 'a'},
|
|
580
|
+
... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
|
|
581
|
+
... 'c': {'resource_name': 'c'},
|
|
582
|
+
... },
|
|
583
|
+
... {
|
|
584
|
+
... 'b': {'resource_name': 'b'},
|
|
585
|
+
... },
|
|
586
|
+
... ['a', 'b', 'c'],
|
|
587
|
+
... )
|
|
588
|
+
>>> {k: sorted(v) for k, v in deps.items()}
|
|
589
|
+
{'c': [], 'b': ['a', 'c'], 'a': []}
|
|
590
|
+
|
|
591
|
+
>>> deps, _ = generate_forkdownstream_graph(
|
|
592
|
+
... {
|
|
593
|
+
... 'migrated__a': {'a'},
|
|
594
|
+
... 'a': {'b'},
|
|
595
|
+
... 'b': {'c'},
|
|
596
|
+
... 'c': set(),
|
|
597
|
+
... },
|
|
598
|
+
... {
|
|
599
|
+
... 'migrated__a': {'resource_name': 'migrated__a', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'a'}}]},
|
|
600
|
+
... 'a': {'resource_name': 'a'},
|
|
601
|
+
... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
|
|
602
|
+
... 'c': {'resource_name': 'c'},
|
|
603
|
+
... },
|
|
604
|
+
... {
|
|
605
|
+
... 'migrated__a': {'resource_name': 'migrated__a'},
|
|
606
|
+
... 'a': {'resource_name': 'a'},
|
|
607
|
+
... },
|
|
608
|
+
... ['migrated_a', 'a', 'b', 'c'],
|
|
609
|
+
... )
|
|
610
|
+
>>> {k: sorted(v) for k, v in deps.items()}
|
|
611
|
+
{'c': [], 'b': ['a', 'c'], 'a': [], 'migrated_a': []}
|
|
612
|
+
"""
|
|
613
|
+
downstream_dep_map = create_downstream_dependency_graph(all_dep_map, all_resources)
|
|
614
|
+
new_dep_map: Dict[str, Set[str]] = {}
|
|
615
|
+
new_to_run = deepcopy(to_run)
|
|
616
|
+
update_dep_map_recursively(new_dep_map, downstream_dep_map, all_resources, new_to_run, dep_map_keys)
|
|
617
|
+
return new_dep_map, new_to_run
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
@dataclass
|
|
621
|
+
class GraphDependencies:
|
|
622
|
+
"""
|
|
623
|
+
This class is used to store the dependencies graph and the resources that are going to be deployed
|
|
624
|
+
"""
|
|
625
|
+
|
|
626
|
+
dep_map: Dict[str, Set[str]]
|
|
627
|
+
to_run: Dict[str, Dict[str, Any]]
|
|
628
|
+
|
|
629
|
+
# The same as above but for the whole project, not just the resources affected by the current deployment
|
|
630
|
+
all_dep_map: Dict[str, Set[str]]
|
|
631
|
+
all_resources: Dict[str, Dict[str, Any]]
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
async def process(
|
|
635
|
+
filename: str,
|
|
636
|
+
tb_client: TinyB,
|
|
637
|
+
deps: List[str],
|
|
638
|
+
dep_map: Dict[str, Any],
|
|
639
|
+
to_run: Dict[str, Any],
|
|
640
|
+
vendor_paths: Optional[List[Tuple[str, str]]] = None,
|
|
641
|
+
skip_connectors: bool = False,
|
|
642
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
643
|
+
changed: Optional[Dict[str, Any]] = None,
|
|
644
|
+
fork_downstream: Optional[bool] = False,
|
|
645
|
+
is_internal: Optional[bool] = False,
|
|
646
|
+
dir_path: Optional[str] = None,
|
|
647
|
+
verbose: bool = False,
|
|
648
|
+
embedded_datasources: Optional[Dict[str, Any]] = None,
|
|
649
|
+
):
|
|
650
|
+
name, kind = filename.rsplit(".", 1)
|
|
651
|
+
warnings = []
|
|
652
|
+
embedded_datasources = {} if embedded_datasources is None else embedded_datasources
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
res = await process_file(
|
|
656
|
+
filename,
|
|
657
|
+
tb_client,
|
|
658
|
+
skip_connectors=skip_connectors,
|
|
659
|
+
current_ws=current_ws,
|
|
660
|
+
)
|
|
661
|
+
except click.ClickException as e:
|
|
662
|
+
raise e
|
|
663
|
+
except IncludeFileNotFoundException as e:
|
|
664
|
+
raise click.ClickException(FeedbackManager.error_deleted_include(include_file=str(e), filename=filename))
|
|
665
|
+
except Exception as e:
|
|
666
|
+
raise click.ClickException(str(e))
|
|
667
|
+
|
|
668
|
+
# datasource
|
|
669
|
+
# {
|
|
670
|
+
# "resource": "datasources",
|
|
671
|
+
# "resource_name": name,
|
|
672
|
+
# "version": doc.version,
|
|
673
|
+
# "params": params,
|
|
674
|
+
# "filename": filename,
|
|
675
|
+
# "deps": deps,
|
|
676
|
+
# "tokens": doc.tokens,
|
|
677
|
+
# "shared_with": doc.shared_with,
|
|
678
|
+
# "filtering_tags": doc.filtering_tags,
|
|
679
|
+
# }
|
|
680
|
+
# pipe
|
|
681
|
+
# {
|
|
682
|
+
# "resource": "pipes",
|
|
683
|
+
# "resource_name": name,
|
|
684
|
+
# "version": doc.version,
|
|
685
|
+
# "filename": filename,
|
|
686
|
+
# "name": name + version,
|
|
687
|
+
# "nodes": nodes,
|
|
688
|
+
# "deps": [x for x in set(deps)],
|
|
689
|
+
# "tokens": doc.tokens,
|
|
690
|
+
# "description": description,
|
|
691
|
+
# "warnings": doc.warnings,
|
|
692
|
+
# "filtering_tags": doc.filtering_tags,
|
|
693
|
+
# }
|
|
694
|
+
|
|
695
|
+
# r is essentially a Datasource or a Pipe in dict shape, like in the comment above
|
|
696
|
+
for r in res:
|
|
697
|
+
resource_name = r["resource_name"]
|
|
698
|
+
warnings = r.get("warnings", [])
|
|
699
|
+
if (
|
|
700
|
+
changed
|
|
701
|
+
and resource_name in changed
|
|
702
|
+
and (not changed[resource_name] or changed[resource_name] in ["shared", "remote"])
|
|
703
|
+
):
|
|
704
|
+
continue
|
|
705
|
+
|
|
706
|
+
if (
|
|
707
|
+
fork_downstream
|
|
708
|
+
and r.get("resource", "") == "pipes"
|
|
709
|
+
and any(["engine" in x.get("params", {}) for x in r.get("nodes", [])])
|
|
710
|
+
):
|
|
711
|
+
raise click.ClickException(FeedbackManager.error_forkdownstream_pipes_with_engine(pipe=resource_name))
|
|
712
|
+
|
|
713
|
+
to_run[resource_name] = r
|
|
714
|
+
file_deps: List[str] = r.get("deps", [])
|
|
715
|
+
deps += file_deps
|
|
716
|
+
# calculate and look for deps
|
|
717
|
+
dep_list = []
|
|
718
|
+
for x in file_deps:
|
|
719
|
+
if x not in INTERNAL_TABLES or is_internal:
|
|
720
|
+
f, ds = find_file_by_name(dir_path or ".", x, verbose, vendor_paths=vendor_paths, resource=r)
|
|
721
|
+
if f:
|
|
722
|
+
dep_list.append(f.rsplit(".", 1)[0])
|
|
723
|
+
if ds:
|
|
724
|
+
ds_fn = ds["resource_name"]
|
|
725
|
+
prev = to_run.get(ds_fn, {})
|
|
726
|
+
to_run[ds_fn] = deepcopy(r)
|
|
727
|
+
try:
|
|
728
|
+
to_run[ds_fn]["deps"] = list(
|
|
729
|
+
set(to_run[ds_fn].get("deps", []) + prev.get("deps", []) + [resource_name])
|
|
730
|
+
)
|
|
731
|
+
except ValueError:
|
|
732
|
+
pass
|
|
733
|
+
embedded_datasources[x] = to_run[ds_fn]
|
|
734
|
+
else:
|
|
735
|
+
e_ds = embedded_datasources.get(x, None)
|
|
736
|
+
if e_ds:
|
|
737
|
+
dep_list.append(e_ds["resource_name"])
|
|
738
|
+
|
|
739
|
+
dep_map[resource_name] = set(dep_list)
|
|
740
|
+
return os.path.basename(name), warnings
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
async def get_processed(
|
|
744
|
+
filenames: Iterable[str],
|
|
745
|
+
changed: Optional[Dict[str, Any]] = None,
|
|
746
|
+
verbose: bool = False,
|
|
747
|
+
deps: Optional[List[str]] = None,
|
|
748
|
+
dep_map: Optional[Dict[str, Any]] = None,
|
|
749
|
+
to_run: Optional[Dict[str, Any]] = None,
|
|
750
|
+
vendor_paths: Optional[List[Tuple[str, str]]] = None,
|
|
751
|
+
processed: Optional[Set[str]] = None,
|
|
752
|
+
tb_client: Optional[TinyB] = None,
|
|
753
|
+
skip_connectors: bool = False,
|
|
754
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
755
|
+
fork_downstream: Optional[bool] = False,
|
|
756
|
+
is_internal: Optional[bool] = False,
|
|
757
|
+
dir_path: Optional[str] = None,
|
|
758
|
+
embedded_datasources: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
759
|
+
):
|
|
760
|
+
# Initialize with proper type annotations
|
|
761
|
+
deps_list: List[str] = [] if deps is None else deps
|
|
762
|
+
dep_map_dict: Dict[str, Any] = {} if dep_map is None else dep_map
|
|
763
|
+
to_run_dict: Dict[str, Any] = {} if to_run is None else to_run
|
|
764
|
+
processed_set: Set[str] = set() if processed is None else processed
|
|
765
|
+
embedded_ds: Dict[str, Dict[str, Any]] = {} if embedded_datasources is None else embedded_datasources
|
|
766
|
+
|
|
767
|
+
for filename in filenames:
|
|
768
|
+
# just process changed filenames (tb deploy and --only-changes)
|
|
769
|
+
if changed is not None:
|
|
770
|
+
resource = Path(filename).resolve().stem
|
|
771
|
+
if resource in changed and (not changed[resource] or changed[resource] in ["shared", "remote"]):
|
|
772
|
+
continue
|
|
773
|
+
if os.path.isdir(filename):
|
|
774
|
+
await get_processed(
|
|
775
|
+
filenames=get_project_filenames(filename),
|
|
776
|
+
changed=changed,
|
|
777
|
+
verbose=verbose,
|
|
778
|
+
deps=deps_list,
|
|
779
|
+
dep_map=dep_map_dict,
|
|
780
|
+
to_run=to_run_dict,
|
|
781
|
+
vendor_paths=vendor_paths,
|
|
782
|
+
processed=processed_set,
|
|
783
|
+
tb_client=tb_client,
|
|
784
|
+
skip_connectors=skip_connectors,
|
|
785
|
+
current_ws=current_ws,
|
|
786
|
+
fork_downstream=fork_downstream,
|
|
787
|
+
is_internal=is_internal,
|
|
788
|
+
dir_path=dir_path,
|
|
789
|
+
embedded_datasources=embedded_ds,
|
|
790
|
+
)
|
|
791
|
+
else:
|
|
792
|
+
if verbose:
|
|
793
|
+
click.echo(FeedbackManager.info_processing_file(filename=filename))
|
|
794
|
+
|
|
795
|
+
if ".incl" in filename:
|
|
796
|
+
click.echo(FeedbackManager.warning_skipping_include_file(file=filename))
|
|
797
|
+
|
|
798
|
+
if tb_client is None:
|
|
799
|
+
raise ValueError("tb_client cannot be None")
|
|
800
|
+
|
|
801
|
+
name, warnings = await process(
|
|
802
|
+
filename=filename,
|
|
803
|
+
tb_client=tb_client,
|
|
804
|
+
deps=deps_list,
|
|
805
|
+
dep_map=dep_map_dict,
|
|
806
|
+
to_run=to_run_dict,
|
|
807
|
+
vendor_paths=vendor_paths,
|
|
808
|
+
skip_connectors=skip_connectors,
|
|
809
|
+
current_ws=current_ws,
|
|
810
|
+
changed=changed,
|
|
811
|
+
fork_downstream=fork_downstream,
|
|
812
|
+
is_internal=is_internal,
|
|
813
|
+
dir_path=dir_path,
|
|
814
|
+
verbose=verbose,
|
|
815
|
+
embedded_datasources=embedded_ds,
|
|
816
|
+
)
|
|
817
|
+
processed_set.add(name)
|
|
818
|
+
|
|
819
|
+
if verbose:
|
|
820
|
+
if len(warnings) == 1:
|
|
821
|
+
click.echo(FeedbackManager.warning_pipe_restricted_param(word=warnings[0]))
|
|
822
|
+
elif len(warnings) > 1:
|
|
823
|
+
click.echo(
|
|
824
|
+
FeedbackManager.warning_pipe_restricted_params(
|
|
825
|
+
words=", ".join(["'{}'".format(param) for param in warnings[:-1]]),
|
|
826
|
+
last_word=warnings[-1],
|
|
827
|
+
)
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
async def build_graph(
|
|
832
|
+
filenames: Iterable[str],
|
|
833
|
+
tb_client: TinyB,
|
|
834
|
+
dir_path: Optional[str] = None,
|
|
835
|
+
process_dependencies: bool = False,
|
|
836
|
+
verbose: bool = False,
|
|
837
|
+
skip_connectors: bool = False,
|
|
838
|
+
vendor_paths: Optional[List[Tuple[str, str]]] = None,
|
|
839
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
840
|
+
changed: Optional[Dict[str, Any]] = None,
|
|
841
|
+
only_changes: bool = False,
|
|
842
|
+
fork_downstream: Optional[bool] = False,
|
|
843
|
+
is_internal: Optional[bool] = False,
|
|
844
|
+
build: Optional[bool] = False,
|
|
845
|
+
) -> GraphDependencies:
|
|
846
|
+
"""
|
|
847
|
+
This method will generate a dependency graph for the given files. It will also return a map of all the resources that are going to be deployed.
|
|
848
|
+
By default it will generate the graph from left to right, but if fork-downstream, it will generate the graph from right to left.
|
|
849
|
+
"""
|
|
850
|
+
to_run: Dict[str, Any] = {}
|
|
851
|
+
deps: List[str] = []
|
|
852
|
+
dep_map: Dict[str, Any] = {}
|
|
853
|
+
embedded_datasources: Dict[str, Dict[str, Any]] = {}
|
|
854
|
+
|
|
855
|
+
# These dictionaries are used to store all the resources and there dependencies for the whole project
|
|
856
|
+
# This is used for the downstream dependency graph
|
|
857
|
+
all_dep_map: Dict[str, Set[str]] = {}
|
|
858
|
+
all_resources: Dict[str, Dict[str, Any]] = {}
|
|
859
|
+
|
|
860
|
+
if dir_path is None:
|
|
861
|
+
dir_path = os.getcwd()
|
|
862
|
+
|
|
863
|
+
# When using fork-downstream or --only-changes, we need to generate all the graph of all the resources and their dependencies
|
|
864
|
+
# This way we can add more resources into the to_run dictionary if needed.
|
|
865
|
+
if process_dependencies and only_changes:
|
|
866
|
+
all_dependencies_graph = await build_graph(
|
|
867
|
+
get_project_filenames(dir_path),
|
|
868
|
+
tb_client,
|
|
869
|
+
dir_path=dir_path,
|
|
870
|
+
process_dependencies=True,
|
|
871
|
+
skip_connectors=True,
|
|
872
|
+
vendor_paths=vendor_paths,
|
|
873
|
+
current_ws=current_ws,
|
|
874
|
+
changed=None,
|
|
875
|
+
only_changes=False,
|
|
876
|
+
is_internal=is_internal,
|
|
877
|
+
build=build,
|
|
878
|
+
)
|
|
879
|
+
all_dep_map = all_dependencies_graph.dep_map
|
|
880
|
+
all_resources = all_dependencies_graph.to_run
|
|
881
|
+
|
|
882
|
+
processed: Set[str] = set()
|
|
883
|
+
|
|
884
|
+
await get_processed(
|
|
885
|
+
filenames=filenames,
|
|
886
|
+
changed=changed,
|
|
887
|
+
verbose=verbose,
|
|
888
|
+
deps=deps,
|
|
889
|
+
dep_map=dep_map,
|
|
890
|
+
to_run=to_run,
|
|
891
|
+
vendor_paths=vendor_paths,
|
|
892
|
+
processed=processed,
|
|
893
|
+
tb_client=tb_client,
|
|
894
|
+
skip_connectors=skip_connectors,
|
|
895
|
+
current_ws=current_ws,
|
|
896
|
+
fork_downstream=fork_downstream,
|
|
897
|
+
is_internal=is_internal,
|
|
898
|
+
dir_path=dir_path,
|
|
899
|
+
embedded_datasources=embedded_datasources,
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
if process_dependencies:
|
|
903
|
+
if only_changes:
|
|
904
|
+
for key in dict(to_run):
|
|
905
|
+
# look for deps that are the target data source of a materialized node
|
|
906
|
+
target_datasource = get_target_materialized_data_source_name(to_run[key])
|
|
907
|
+
if target_datasource:
|
|
908
|
+
# look in all_dep_map items that have as a dependency the target data source and are an endpoint
|
|
909
|
+
for _key, _deps in all_dep_map.items():
|
|
910
|
+
for dep in _deps:
|
|
911
|
+
if (
|
|
912
|
+
dep == target_datasource
|
|
913
|
+
or (dep == key and target_datasource not in all_dep_map.get(key, []))
|
|
914
|
+
) and is_endpoint_with_no_dependencies(
|
|
915
|
+
all_resources.get(_key, {}), all_dep_map, all_resources
|
|
916
|
+
):
|
|
917
|
+
dep_map[_key] = _deps
|
|
918
|
+
to_run[_key] = all_resources.get(_key)
|
|
919
|
+
else:
|
|
920
|
+
while len(deps) > 0:
|
|
921
|
+
dep = deps.pop()
|
|
922
|
+
if dep not in processed:
|
|
923
|
+
processed.add(dep)
|
|
924
|
+
f = full_path_by_name(dir_path, dep, vendor_paths)
|
|
925
|
+
if f:
|
|
926
|
+
if verbose:
|
|
927
|
+
try:
|
|
928
|
+
processed_filename = f.relative_to(os.getcwd())
|
|
929
|
+
except ValueError:
|
|
930
|
+
processed_filename = f
|
|
931
|
+
# This is to avoid processing shared data sources
|
|
932
|
+
if "vendor/" in str(processed_filename):
|
|
933
|
+
click.echo(FeedbackManager.info_skipping_resource(resource=processed_filename))
|
|
934
|
+
continue
|
|
935
|
+
click.echo(FeedbackManager.info_processing_file(filename=processed_filename))
|
|
936
|
+
await process(
|
|
937
|
+
filename=str(f),
|
|
938
|
+
tb_client=tb_client,
|
|
939
|
+
deps=deps,
|
|
940
|
+
dep_map=dep_map,
|
|
941
|
+
to_run=to_run,
|
|
942
|
+
vendor_paths=vendor_paths,
|
|
943
|
+
skip_connectors=skip_connectors,
|
|
944
|
+
current_ws=current_ws,
|
|
945
|
+
fork_downstream=fork_downstream,
|
|
946
|
+
is_internal=is_internal,
|
|
947
|
+
dir_path=dir_path,
|
|
948
|
+
verbose=verbose,
|
|
949
|
+
embedded_datasources=embedded_datasources,
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
return GraphDependencies(dep_map, to_run, all_dep_map, all_resources)
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
async def process_file(
|
|
956
|
+
filename: str,
|
|
957
|
+
tb_client: TinyB,
|
|
958
|
+
skip_connectors: bool = False,
|
|
959
|
+
current_ws: Optional[Dict[str, Any]] = None,
|
|
960
|
+
) -> List[Dict[str, Any]]:
|
|
961
|
+
"""Returns a list of resources
|
|
962
|
+
|
|
963
|
+
For both datasources and pipes, a list of just one item is returned"""
|
|
964
|
+
|
|
965
|
+
def get_engine_params(node: Dict[str, Any]) -> Dict[str, Any]:
|
|
966
|
+
params = {}
|
|
967
|
+
|
|
968
|
+
if "engine" in node:
|
|
969
|
+
engine = node["engine"]["type"]
|
|
970
|
+
params["engine"] = engine
|
|
971
|
+
args = node["engine"]["args"]
|
|
972
|
+
for k, v in args:
|
|
973
|
+
params[f"engine_{k}"] = v
|
|
974
|
+
return params
|
|
975
|
+
|
|
976
|
+
async def get_kafka_params(node: Dict[str, Any]):
|
|
977
|
+
params = {key: value for key, value in node.items() if key.startswith("kafka")}
|
|
978
|
+
|
|
979
|
+
if not skip_connectors:
|
|
980
|
+
try:
|
|
981
|
+
connector_params = {
|
|
982
|
+
"kafka_bootstrap_servers": params.get("kafka_bootstrap_servers", None),
|
|
983
|
+
"kafka_key": params.get("kafka_key", None),
|
|
984
|
+
"kafka_secret": params.get("kafka_secret", None),
|
|
985
|
+
"kafka_connection_name": params.get("kafka_connection_name", None),
|
|
986
|
+
"kafka_auto_offset_reset": params.get("kafka_auto_offset_reset", None),
|
|
987
|
+
"kafka_schema_registry_url": params.get("kafka_schema_registry_url", None),
|
|
988
|
+
"kafka_ssl_ca_pem": get_ca_pem_content(params.get("kafka_ssl_ca_pem", None), filename),
|
|
989
|
+
"kafka_sasl_mechanism": params.get("kafka_sasl_mechanism", None),
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
connector = await tb_client.get_connection(**connector_params)
|
|
993
|
+
if not connector:
|
|
994
|
+
click.echo(
|
|
995
|
+
FeedbackManager.info_creating_kafka_connection(connection_name=params["kafka_connection_name"])
|
|
996
|
+
)
|
|
997
|
+
required_params = [
|
|
998
|
+
connector_params["kafka_bootstrap_servers"],
|
|
999
|
+
connector_params["kafka_key"],
|
|
1000
|
+
connector_params["kafka_secret"],
|
|
1001
|
+
]
|
|
1002
|
+
|
|
1003
|
+
if not all(required_params):
|
|
1004
|
+
raise click.ClickException(FeedbackManager.error_unknown_kafka_connection(datasource=name))
|
|
1005
|
+
|
|
1006
|
+
connector = await tb_client.connection_create_kafka(**connector_params)
|
|
1007
|
+
except Exception as e:
|
|
1008
|
+
raise click.ClickException(
|
|
1009
|
+
FeedbackManager.error_connection_create(
|
|
1010
|
+
connection_name=params["kafka_connection_name"], error=str(e)
|
|
1011
|
+
)
|
|
1012
|
+
)
|
|
1013
|
+
|
|
1014
|
+
click.echo(FeedbackManager.success_connection_using(connection_name=connector["name"]))
|
|
1015
|
+
|
|
1016
|
+
params.update(
|
|
1017
|
+
{
|
|
1018
|
+
"connector": connector["id"],
|
|
1019
|
+
"service": "kafka",
|
|
1020
|
+
}
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
return params
|
|
1024
|
+
|
|
1025
|
+
async def get_import_params(datasource: Dict[str, Any], node: Dict[str, Any]) -> Dict[str, Any]:
|
|
1026
|
+
params: Dict[str, Any] = {key: value for key, value in node.items() if key.startswith("import_")}
|
|
1027
|
+
|
|
1028
|
+
if len(params) == 0 or skip_connectors:
|
|
1029
|
+
return params
|
|
1030
|
+
|
|
1031
|
+
service: Optional[str] = node.get("import_service", None)
|
|
1032
|
+
|
|
1033
|
+
if service and service.lower() == "bigquery":
|
|
1034
|
+
if not await tb_client.check_gcp_read_permissions():
|
|
1035
|
+
raise click.ClickException(FeedbackManager.error_unknown_bq_connection(datasource=datasource["name"]))
|
|
1036
|
+
|
|
1037
|
+
# Bigquery doesn't have a datalink, so we can stop here
|
|
1038
|
+
return params
|
|
1039
|
+
|
|
1040
|
+
# Rest of connectors
|
|
1041
|
+
|
|
1042
|
+
connector_id: Optional[str] = node.get("import_connector", None)
|
|
1043
|
+
connector_name: Optional[str] = node.get("import_connection_name", None)
|
|
1044
|
+
if not connector_name and not connector_id:
|
|
1045
|
+
raise click.ClickException(FeedbackManager.error_missing_connection_name(datasource=datasource["name"]))
|
|
1046
|
+
|
|
1047
|
+
if not connector_id:
|
|
1048
|
+
assert isinstance(connector_name, str)
|
|
1049
|
+
|
|
1050
|
+
connector: Optional[Dict[str, Any]] = await tb_client.get_connector(connector_name, service)
|
|
1051
|
+
|
|
1052
|
+
if not connector:
|
|
1053
|
+
raise Exception(
|
|
1054
|
+
FeedbackManager.error_unknown_connection(datasource=datasource["name"], connection=connector_name)
|
|
1055
|
+
)
|
|
1056
|
+
connector_id = connector["id"]
|
|
1057
|
+
service = connector["service"]
|
|
1058
|
+
|
|
1059
|
+
# The API needs the connector ID to create the datasource.
|
|
1060
|
+
params["import_connector"] = connector_id
|
|
1061
|
+
if service:
|
|
1062
|
+
params["import_service"] = service
|
|
1063
|
+
|
|
1064
|
+
if import_from_timestamp := params.get("import_from_timestamp", None):
|
|
1065
|
+
try:
|
|
1066
|
+
str(datetime.datetime.fromisoformat(import_from_timestamp).isoformat())
|
|
1067
|
+
except ValueError:
|
|
1068
|
+
raise click.ClickException(
|
|
1069
|
+
FeedbackManager.error_invalid_import_from_timestamp(datasource=datasource["name"])
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
if service in PREVIEW_CONNECTOR_SERVICES:
|
|
1073
|
+
if not params.get("import_bucket_uri", None):
|
|
1074
|
+
raise click.ClickException(FeedbackManager.error_missing_bucket_uri(datasource=datasource["name"]))
|
|
1075
|
+
elif service == "dynamodb":
|
|
1076
|
+
if not params.get("import_table_arn", None):
|
|
1077
|
+
raise click.ClickException(FeedbackManager.error_missing_table_arn(datasource=datasource["name"]))
|
|
1078
|
+
if not params.get("import_export_bucket", None):
|
|
1079
|
+
raise click.ClickException(FeedbackManager.error_missing_export_bucket(datasource=datasource["name"]))
|
|
1080
|
+
else:
|
|
1081
|
+
if not params.get("import_external_datasource", None):
|
|
1082
|
+
raise click.ClickException(
|
|
1083
|
+
FeedbackManager.error_missing_external_datasource(datasource=datasource["name"])
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
return params
|
|
1087
|
+
|
|
1088
|
+
if DataFileExtensions.DATASOURCE in filename:
|
|
1089
|
+
doc = parse_datasource(filename)
|
|
1090
|
+
node = doc.nodes[0]
|
|
1091
|
+
deps: List[str] = []
|
|
1092
|
+
# reemplace tables on materialized columns
|
|
1093
|
+
columns = parse_table_structure(node["schema"])
|
|
1094
|
+
|
|
1095
|
+
_format = "csv"
|
|
1096
|
+
for x in columns:
|
|
1097
|
+
if x["default_value"] and x["default_value"].lower().startswith("materialized"):
|
|
1098
|
+
# turn expression to a select query to sql_get_used_tables can get the used tables
|
|
1099
|
+
q = "select " + x["default_value"][len("materialized") :]
|
|
1100
|
+
tables = await tb_client.sql_get_used_tables(q)
|
|
1101
|
+
# materialized columns expressions could have joins so we need to add them as a dep
|
|
1102
|
+
deps += tables
|
|
1103
|
+
# generate replacements and replace the query
|
|
1104
|
+
replacements = {t: t for t in tables}
|
|
1105
|
+
|
|
1106
|
+
replaced_results = await tb_client.replace_tables(q, replacements)
|
|
1107
|
+
x["default_value"] = replaced_results.replace("SELECT", "materialized", 1)
|
|
1108
|
+
if x.get("jsonpath", None):
|
|
1109
|
+
_format = "ndjson"
|
|
1110
|
+
|
|
1111
|
+
schema = ",".join(schema_to_sql_columns(columns))
|
|
1112
|
+
|
|
1113
|
+
name = os.path.basename(filename).rsplit(".", 1)[0]
|
|
1114
|
+
|
|
1115
|
+
version = f"__v{doc.version}" if doc.version is not None else ""
|
|
1116
|
+
|
|
1117
|
+
def append_version_to_name(name: str, version: str) -> str:
|
|
1118
|
+
if version != "":
|
|
1119
|
+
name = name.replace(".", "_")
|
|
1120
|
+
return name + version
|
|
1121
|
+
return name
|
|
1122
|
+
|
|
1123
|
+
description = node.get("description", "")
|
|
1124
|
+
indexes_list = node.get("indexes", [])
|
|
1125
|
+
indexes = None
|
|
1126
|
+
if indexes_list:
|
|
1127
|
+
indexes = "\n".join([index.to_sql() for index in indexes_list])
|
|
1128
|
+
# Here is where we lose the columns
|
|
1129
|
+
# I don't know why we don't return something more similar to the parsed doc
|
|
1130
|
+
params = {
|
|
1131
|
+
"name": append_version_to_name(name, version),
|
|
1132
|
+
"description": description,
|
|
1133
|
+
"schema": schema,
|
|
1134
|
+
"indexes": indexes,
|
|
1135
|
+
"indexes_list": indexes_list,
|
|
1136
|
+
"format": _format,
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
params.update(get_engine_params(node))
|
|
1140
|
+
|
|
1141
|
+
if "import_service" in node or "import_connection_name" in node:
|
|
1142
|
+
VALID_SERVICES: Tuple[str, ...] = ("bigquery", "snowflake", "s3", "s3_iamrole", "gcs", "dynamodb")
|
|
1143
|
+
|
|
1144
|
+
import_params = await get_import_params(params, node)
|
|
1145
|
+
|
|
1146
|
+
service = import_params.get("import_service", None)
|
|
1147
|
+
if service and service not in VALID_SERVICES:
|
|
1148
|
+
raise Exception(f"Unknown import service: {service}")
|
|
1149
|
+
|
|
1150
|
+
if service in PREVIEW_CONNECTOR_SERVICES:
|
|
1151
|
+
ON_DEMAND_CRON = ON_DEMAND
|
|
1152
|
+
AUTO_CRON = "@auto"
|
|
1153
|
+
ON_DEMAND_CRON_EXPECTED_BY_THE_API = "@once"
|
|
1154
|
+
VALID_CRONS: Tuple[str, ...] = (ON_DEMAND_CRON, AUTO_CRON)
|
|
1155
|
+
cron = node.get("import_schedule", ON_DEMAND_CRON)
|
|
1156
|
+
|
|
1157
|
+
if cron not in VALID_CRONS:
|
|
1158
|
+
valid_values = ", ".join(VALID_CRONS)
|
|
1159
|
+
raise Exception(f"Invalid import schedule: '{cron}'. Valid values are: {valid_values}")
|
|
1160
|
+
|
|
1161
|
+
if cron == ON_DEMAND_CRON:
|
|
1162
|
+
if import_params is None:
|
|
1163
|
+
import_params = {}
|
|
1164
|
+
import_params["import_schedule"] = ON_DEMAND_CRON_EXPECTED_BY_THE_API
|
|
1165
|
+
|
|
1166
|
+
if cron == AUTO_CRON:
|
|
1167
|
+
period: int = DEFAULT_CRON_PERIOD
|
|
1168
|
+
|
|
1169
|
+
if current_ws is not None:
|
|
1170
|
+
workspaces = (await tb_client.user_workspaces()).get("workspaces", [])
|
|
1171
|
+
workspace_rate_limits: Dict[str, Dict[str, int]] = next(
|
|
1172
|
+
(w.get("rate_limits", {}) for w in workspaces if w["id"] == current_ws["id"]), {}
|
|
1173
|
+
)
|
|
1174
|
+
if workspace_rate_limits:
|
|
1175
|
+
rate_limit_config = workspace_rate_limits.get("api_datasources_create_append_replace", {})
|
|
1176
|
+
if rate_limit_config:
|
|
1177
|
+
period = rate_limit_config.get("period", DEFAULT_CRON_PERIOD)
|
|
1178
|
+
|
|
1179
|
+
def seconds_to_cron_expression(seconds: int) -> str:
|
|
1180
|
+
minutes = seconds // 60
|
|
1181
|
+
hours = minutes // 60
|
|
1182
|
+
days = hours // 24
|
|
1183
|
+
if days > 0:
|
|
1184
|
+
return f"0 0 */{days} * *"
|
|
1185
|
+
if hours > 0:
|
|
1186
|
+
return f"0 */{hours} * * *"
|
|
1187
|
+
if minutes > 0:
|
|
1188
|
+
return f"*/{minutes} * * * *"
|
|
1189
|
+
return f"*/{seconds} * * * *"
|
|
1190
|
+
|
|
1191
|
+
if import_params is None:
|
|
1192
|
+
import_params = {}
|
|
1193
|
+
import_params["import_schedule"] = seconds_to_cron_expression(period)
|
|
1194
|
+
|
|
1195
|
+
# Include all import_ parameters in the datasource params
|
|
1196
|
+
if import_params is not None:
|
|
1197
|
+
params.update(import_params)
|
|
1198
|
+
|
|
1199
|
+
# Substitute the import parameters with the ones used by the
|
|
1200
|
+
# import API:
|
|
1201
|
+
# - If an import parameter is not present and there's a default
|
|
1202
|
+
# value, use the default value.
|
|
1203
|
+
# - If the resulting value is None, do not add the parameter.
|
|
1204
|
+
#
|
|
1205
|
+
# Note: any unknown import_ parameter is leaved as is.
|
|
1206
|
+
for key in ImportReplacements.get_datafile_parameter_keys():
|
|
1207
|
+
replacement, default_value = ImportReplacements.get_api_param_for_datafile_param(service, key)
|
|
1208
|
+
if not replacement:
|
|
1209
|
+
continue # We should not reach this never, but just in case...
|
|
1210
|
+
|
|
1211
|
+
value: Any
|
|
1212
|
+
try:
|
|
1213
|
+
value = params[key]
|
|
1214
|
+
del params[key]
|
|
1215
|
+
except KeyError:
|
|
1216
|
+
value = default_value
|
|
1217
|
+
|
|
1218
|
+
if value:
|
|
1219
|
+
params[replacement] = value
|
|
1220
|
+
|
|
1221
|
+
if "kafka_connection_name" in node:
|
|
1222
|
+
kafka_params = await get_kafka_params(node)
|
|
1223
|
+
params.update(kafka_params)
|
|
1224
|
+
del params["format"]
|
|
1225
|
+
|
|
1226
|
+
if "tags" in node:
|
|
1227
|
+
tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
|
|
1228
|
+
params.update(tags)
|
|
1229
|
+
|
|
1230
|
+
resources: List[Dict[str, Any]] = []
|
|
1231
|
+
|
|
1232
|
+
resources.append(
|
|
1233
|
+
{
|
|
1234
|
+
"resource": "datasources",
|
|
1235
|
+
"resource_name": name,
|
|
1236
|
+
"version": doc.version,
|
|
1237
|
+
"params": params,
|
|
1238
|
+
"filename": filename,
|
|
1239
|
+
"deps": deps,
|
|
1240
|
+
"tokens": doc.tokens,
|
|
1241
|
+
"shared_with": doc.shared_with,
|
|
1242
|
+
"filtering_tags": doc.filtering_tags,
|
|
1243
|
+
}
|
|
1244
|
+
)
|
|
1245
|
+
|
|
1246
|
+
return resources
|
|
1247
|
+
|
|
1248
|
+
elif DataFileExtensions.PIPE in filename:
|
|
1249
|
+
doc = parse_pipe(filename)
|
|
1250
|
+
version = f"__v{doc.version}" if doc.version is not None else ""
|
|
1251
|
+
name = os.path.basename(filename).split(".")[0]
|
|
1252
|
+
description = doc.description if doc.description is not None else ""
|
|
1253
|
+
|
|
1254
|
+
deps = []
|
|
1255
|
+
nodes: List[Dict[str, Any]] = []
|
|
1256
|
+
|
|
1257
|
+
is_copy = any([node for node in doc.nodes if node.get("type", "standard").lower() == PipeNodeTypes.COPY])
|
|
1258
|
+
for node in doc.nodes:
|
|
1259
|
+
sql = node["sql"]
|
|
1260
|
+
node_type = node.get("type", "standard").lower()
|
|
1261
|
+
params = {
|
|
1262
|
+
"name": node["name"],
|
|
1263
|
+
"type": node_type,
|
|
1264
|
+
"description": node.get("description", ""),
|
|
1265
|
+
"target_datasource": node.get("target_datasource", None),
|
|
1266
|
+
"copy_schedule": node.get(CopyParameters.COPY_SCHEDULE, None),
|
|
1267
|
+
"mode": node.get("mode", CopyModes.APPEND),
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
is_export_node = ExportReplacements.is_export_node(node)
|
|
1271
|
+
export_params = ExportReplacements.get_params_from_datafile(node) if is_export_node else None
|
|
1272
|
+
|
|
1273
|
+
sql = sql.strip()
|
|
1274
|
+
is_template = False
|
|
1275
|
+
if sql[0] == "%":
|
|
1276
|
+
try:
|
|
1277
|
+
sql_rendered, _, _ = render_sql_template(sql[1:], test_mode=True)
|
|
1278
|
+
except Exception as e:
|
|
1279
|
+
raise click.ClickException(
|
|
1280
|
+
FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
|
|
1281
|
+
)
|
|
1282
|
+
is_template = True
|
|
1283
|
+
else:
|
|
1284
|
+
sql_rendered = sql
|
|
1285
|
+
|
|
1286
|
+
try:
|
|
1287
|
+
dependencies = await tb_client.sql_get_used_tables(sql_rendered, raising=True, is_copy=is_copy)
|
|
1288
|
+
deps += [t for t in dependencies if t not in [n["name"] for n in doc.nodes]]
|
|
1289
|
+
|
|
1290
|
+
except Exception as e:
|
|
1291
|
+
raise click.ClickException(
|
|
1292
|
+
FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
if is_template:
|
|
1296
|
+
deps += get_used_tables_in_template(sql[1:])
|
|
1297
|
+
|
|
1298
|
+
is_neither_copy_nor_materialized = "datasource" not in node and "target_datasource" not in node
|
|
1299
|
+
if "engine" in node and is_neither_copy_nor_materialized:
|
|
1300
|
+
raise ValueError("Defining ENGINE options in a node requires a DATASOURCE")
|
|
1301
|
+
|
|
1302
|
+
if "datasource" in node:
|
|
1303
|
+
params["datasource"] = node["datasource"]
|
|
1304
|
+
deps += [node["datasource"]]
|
|
1305
|
+
|
|
1306
|
+
if "target_datasource" in node:
|
|
1307
|
+
params["target_datasource"] = node["target_datasource"]
|
|
1308
|
+
deps += [node["target_datasource"]]
|
|
1309
|
+
|
|
1310
|
+
params.update(get_engine_params(node))
|
|
1311
|
+
|
|
1312
|
+
replacements = {x: x for x in deps if x not in [n["name"] for n in doc.nodes]}
|
|
1313
|
+
|
|
1314
|
+
# FIXME: Ideally we should use await tb_client.replace_tables(sql, replacements)
|
|
1315
|
+
for old, new in replacements.items():
|
|
1316
|
+
sql = re.sub("([\t \\n']+|^)" + old + "([\t \\n'\\)]+|$)", "\\1" + new + "\\2", sql)
|
|
1317
|
+
|
|
1318
|
+
if "tags" in node:
|
|
1319
|
+
tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
|
|
1320
|
+
params.update(tags)
|
|
1321
|
+
|
|
1322
|
+
nodes.append(
|
|
1323
|
+
{
|
|
1324
|
+
"sql": sql,
|
|
1325
|
+
"params": params,
|
|
1326
|
+
"export_params": export_params,
|
|
1327
|
+
}
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
return [
|
|
1331
|
+
{
|
|
1332
|
+
"resource": "pipes",
|
|
1333
|
+
"resource_name": name,
|
|
1334
|
+
"version": doc.version,
|
|
1335
|
+
"filename": filename,
|
|
1336
|
+
"name": name + version,
|
|
1337
|
+
"nodes": nodes,
|
|
1338
|
+
"deps": [x for x in set(deps)],
|
|
1339
|
+
"tokens": doc.tokens,
|
|
1340
|
+
"description": description,
|
|
1341
|
+
"warnings": doc.warnings,
|
|
1342
|
+
"filtering_tags": doc.filtering_tags,
|
|
1343
|
+
}
|
|
1344
|
+
]
|
|
1345
|
+
else:
|
|
1346
|
+
raise click.ClickException(FeedbackManager.error_file_extension(filename=filename))
|
|
1347
|
+
|
|
1348
|
+
|
|
1349
|
+
def sizeof_fmt(num: Union[int, float], suffix: str = "b") -> str:
|
|
1350
|
+
"""Readable file size
|
|
1351
|
+
:param num: Bytes value
|
|
1352
|
+
:type num: int
|
|
1353
|
+
:param suffix: Unit suffix (optionnal) default = o
|
|
1354
|
+
:type suffix: str
|
|
1355
|
+
:rtype: str
|
|
1356
|
+
"""
|
|
1357
|
+
for unit in ["", "k", "M", "G", "T", "P", "E", "Z"]:
|
|
1358
|
+
if abs(num) < 1024.0:
|
|
1359
|
+
return "%3.1f %s%s" % (num, unit, suffix)
|
|
1360
|
+
num /= 1024.0
|
|
1361
|
+
return "%.1f%s%s" % (num, "Yi", suffix)
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
def full_path_by_name(folder: str, name: str, vendor_paths: Optional[List[Tuple[str, str]]] = None) -> Optional[Path]:
|
|
1365
|
+
f = Path(folder)
|
|
1366
|
+
ds = name + ".datasource"
|
|
1367
|
+
if os.path.isfile(os.path.join(folder, ds)):
|
|
1368
|
+
return f / ds
|
|
1369
|
+
if os.path.isfile(f / "datasources" / ds):
|
|
1370
|
+
return f / "datasources" / ds
|
|
1371
|
+
|
|
1372
|
+
pipe = name + ".pipe"
|
|
1373
|
+
if os.path.isfile(os.path.join(folder, pipe)):
|
|
1374
|
+
return f / pipe
|
|
1375
|
+
|
|
1376
|
+
if os.path.isfile(f / "endpoints" / pipe):
|
|
1377
|
+
return f / "endpoints" / pipe
|
|
1378
|
+
|
|
1379
|
+
if os.path.isfile(f / "pipes" / pipe):
|
|
1380
|
+
return f / "pipes" / pipe
|
|
1381
|
+
|
|
1382
|
+
if os.path.isfile(f / "sinks" / pipe):
|
|
1383
|
+
return f / "sinks" / pipe
|
|
1384
|
+
|
|
1385
|
+
if os.path.isfile(f / "copies" / pipe):
|
|
1386
|
+
return f / "copies" / pipe
|
|
1387
|
+
|
|
1388
|
+
if os.path.isfile(f / "playgrounds" / pipe):
|
|
1389
|
+
return f / "playgrounds" / pipe
|
|
1390
|
+
|
|
1391
|
+
if os.path.isfile(f / "materializations" / pipe):
|
|
1392
|
+
return f / "materializations" / pipe
|
|
1393
|
+
|
|
1394
|
+
if vendor_paths:
|
|
1395
|
+
for wk_name, wk_path in vendor_paths:
|
|
1396
|
+
if name.startswith(f"{wk_name}."):
|
|
1397
|
+
r = full_path_by_name(wk_path, name.replace(f"{wk_name}.", ""))
|
|
1398
|
+
if r:
|
|
1399
|
+
return r
|
|
1400
|
+
return None
|