tinybird 0.0.1.dev71__py3-none-any.whl → 0.0.1.dev73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

@@ -0,0 +1,1400 @@
1
+ import datetime
2
+ import os
3
+ import os.path
4
+ import re
5
+ import urllib
6
+ from copy import deepcopy
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
10
+
11
+ import click
12
+ from toposort import toposort
13
+
14
+ from tinybird.client import TinyB
15
+ from tinybird.sql import parse_table_structure, schema_to_sql_columns
16
+ from tinybird.sql_template import get_used_tables_in_template, render_sql_template
17
+ from tinybird.tb.modules.common import get_ca_pem_content
18
+ from tinybird.tb.modules.datafile.build_datasource import is_datasource
19
+ from tinybird.tb.modules.datafile.build_pipe import (
20
+ get_target_materialized_data_source_name,
21
+ is_endpoint,
22
+ is_endpoint_with_no_dependencies,
23
+ is_materialized,
24
+ new_pipe,
25
+ )
26
+ from tinybird.tb.modules.datafile.common import (
27
+ DEFAULT_CRON_PERIOD,
28
+ INTERNAL_TABLES,
29
+ ON_DEMAND,
30
+ PREVIEW_CONNECTOR_SERVICES,
31
+ CopyModes,
32
+ CopyParameters,
33
+ DataFileExtensions,
34
+ ExportReplacements,
35
+ ImportReplacements,
36
+ PipeNodeTypes,
37
+ find_file_by_name,
38
+ get_name_version,
39
+ get_project_filenames,
40
+ pp,
41
+ )
42
+ from tinybird.tb.modules.datafile.exceptions import AlreadyExistsException, IncludeFileNotFoundException
43
+ from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
44
+ from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
45
+ from tinybird.tb.modules.feedback_manager import FeedbackManager
46
+ from tinybird.tb.modules.project import Project
47
+
48
+
49
+ async def folder_playground(
50
+ project: Project,
51
+ tb_client: TinyB,
52
+ filenames: Optional[List[str]] = None,
53
+ is_internal: bool = False,
54
+ current_ws: Optional[Dict[str, Any]] = None,
55
+ local_ws: Optional[Dict[str, Any]] = None,
56
+ ):
57
+ build = True
58
+ dry_run = False
59
+ force = True
60
+ only_changes = True
61
+ debug = False
62
+ run_tests = False
63
+ verbose = False
64
+ raise_on_exists = False
65
+ fork_downstream = True
66
+ fork = False
67
+ release_created = False
68
+ folder = str(project.path)
69
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
70
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
71
+ build = True
72
+ dry_run = False
73
+ force = True
74
+ only_changes = True
75
+ debug = False
76
+ check = True
77
+ populate = False
78
+ populate_subset = None
79
+ populate_condition = None
80
+ tests_to_run = 0
81
+ override_datasource = False
82
+ skip_confirmation = True
83
+ wait = False
84
+ unlink_on_populate_error = False
85
+ only_response_times = False
86
+ run_tests = False
87
+ verbose = False
88
+ as_standard = False
89
+ raise_on_exists = False
90
+ fork_downstream = True
91
+ fork = False
92
+ release_created = False
93
+ tests_relative_change = 0.01
94
+ tests_sample_by_params = 0
95
+ tests_filter_by = None
96
+ tests_failfast = False
97
+ tests_ignore_order = False
98
+ tests_validate_processed_bytes = False
99
+ tests_check_requests_from_branch = False
100
+ user_token = None
101
+ ignore_sql_errors = False
102
+ is_vendor = False
103
+ current_ws = current_ws or local_ws
104
+
105
+ existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
106
+ remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
107
+
108
+ if not filenames:
109
+ filenames = get_project_filenames(folder)
110
+
111
+ # build graph to get new versions for all the files involved in the query
112
+ # dependencies need to be processed always to get the versions
113
+ dependencies_graph = await build_graph(
114
+ filenames,
115
+ tb_client,
116
+ dir_path=folder,
117
+ process_dependencies=True,
118
+ skip_connectors=True,
119
+ vendor_paths=[],
120
+ current_ws=current_ws,
121
+ only_changes=only_changes,
122
+ fork_downstream=fork_downstream,
123
+ is_internal=is_internal,
124
+ build=build,
125
+ )
126
+
127
+ if debug:
128
+ pp.pprint(dependencies_graph.to_run)
129
+
130
+ def should_push_file(
131
+ name: str,
132
+ remote_resource_names: List[str],
133
+ force: bool,
134
+ run_tests: bool,
135
+ ) -> bool:
136
+ """
137
+ Function to know if we need to run a file or not
138
+ """
139
+ if name not in remote_resource_names:
140
+ return True
141
+ # When we need to try to push a file when it doesn't exist and the version is different that the existing one
142
+ resource_full_name = name
143
+ if resource_full_name not in existing_resources:
144
+ return True
145
+ if force or run_tests:
146
+ return True
147
+ return False
148
+
149
+ async def push(
150
+ name: str,
151
+ to_run: Dict[str, Dict[str, Any]],
152
+ dry_run: bool,
153
+ fork_downstream: Optional[bool] = False,
154
+ fork: Optional[bool] = False,
155
+ ):
156
+ if name in to_run:
157
+ resource = to_run[name]["resource"]
158
+ if resource == "datasources":
159
+ return
160
+ if not dry_run:
161
+ if should_push_file(name, remote_resource_names, force, run_tests):
162
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=""))
163
+ try:
164
+ await exec_file(
165
+ to_run[name],
166
+ tb_client,
167
+ force,
168
+ check,
169
+ debug and verbose,
170
+ populate,
171
+ populate_subset,
172
+ populate_condition,
173
+ unlink_on_populate_error,
174
+ wait,
175
+ user_token,
176
+ override_datasource,
177
+ ignore_sql_errors,
178
+ skip_confirmation,
179
+ only_response_times,
180
+ run_tests,
181
+ as_standard,
182
+ tests_to_run,
183
+ tests_relative_change,
184
+ tests_sample_by_params,
185
+ tests_filter_by,
186
+ tests_failfast,
187
+ tests_ignore_order,
188
+ tests_validate_processed_bytes,
189
+ tests_check_requests_from_branch,
190
+ current_ws,
191
+ local_ws,
192
+ fork_downstream,
193
+ fork,
194
+ build,
195
+ is_vendor,
196
+ )
197
+ if not run_tests:
198
+ click.echo(
199
+ FeedbackManager.success_create(
200
+ name=(
201
+ name
202
+ if to_run[name]["version"] is None
203
+ else f"{name}__v{to_run[name]['version']}"
204
+ )
205
+ )
206
+ )
207
+ except Exception as e:
208
+ filename = to_run[name]["filename"]
209
+ exception = FeedbackManager.error_push_file_exception(
210
+ filename=filename,
211
+ error=e,
212
+ )
213
+ raise click.ClickException(exception)
214
+ else:
215
+ if raise_on_exists:
216
+ raise AlreadyExistsException(
217
+ FeedbackManager.warning_name_already_exists(
218
+ name=name if to_run[name]["version"] is None else f"{name}__v{to_run[name]['version']}"
219
+ )
220
+ )
221
+ else:
222
+ if await name_matches_existing_resource(resource, name, tb_client):
223
+ if resource == "pipes":
224
+ click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
225
+ else:
226
+ click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
227
+ else:
228
+ click.echo(
229
+ FeedbackManager.warning_name_already_exists(
230
+ name=(
231
+ name
232
+ if to_run[name]["version"] is None
233
+ else f"{name}__v{to_run[name]['version']}"
234
+ )
235
+ )
236
+ )
237
+ else:
238
+ if should_push_file(name, remote_resource_names, force, run_tests):
239
+ extension = "pipe" if resource == "pipes" else "datasource"
240
+ click.echo(FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=""))
241
+ else:
242
+ if await name_matches_existing_resource(resource, name, tb_client):
243
+ if resource == "pipes":
244
+ click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
245
+ else:
246
+ click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
247
+ else:
248
+ click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
249
+
250
+ async def push_files(
251
+ dependency_graph: GraphDependencies,
252
+ dry_run: bool = False,
253
+ ):
254
+ endpoints_dep_map = dict()
255
+ processed = set()
256
+
257
+ resources_to_run = dependency_graph.to_run
258
+
259
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
260
+ # If we have a graph like this:
261
+ # A -> B -> C
262
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
263
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
264
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
265
+ dependency_graph.all_dep_map,
266
+ dependency_graph.all_resources,
267
+ resources_to_run,
268
+ list(dependency_graph.dep_map.keys()),
269
+ )
270
+
271
+ # First, we will deploy the datasources that need to be deployed.
272
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
273
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
274
+
275
+ groups.reverse()
276
+ for group in groups:
277
+ for name in group:
278
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
279
+ continue
280
+
281
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
282
+ kafka_connection_name = (
283
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
284
+ )
285
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
286
+ if release_created and (kafka_connection_name or service):
287
+ connector = "Kafka" if kafka_connection_name else service
288
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
289
+ raise click.ClickException(error_msg)
290
+
291
+ await push(
292
+ name,
293
+ resources_to_run_fork_downstream,
294
+ dry_run,
295
+ fork_downstream,
296
+ fork,
297
+ )
298
+ processed.add(name)
299
+
300
+ # Now, we will create a map of all the endpoints and there dependencies
301
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
302
+ # But does not include the missing gaps
303
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
304
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
305
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
306
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
307
+ for group in groups:
308
+ for name in group:
309
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
310
+ continue
311
+
312
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
313
+
314
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
315
+ groups = [group for group in toposort(endpoints_dep_map)]
316
+
317
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
318
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
319
+ # So we need to reverse the groups
320
+ groups.reverse()
321
+ for group in groups:
322
+ for name in group:
323
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
324
+ continue
325
+
326
+ await push(
327
+ name,
328
+ resources_to_run_fork_downstream,
329
+ dry_run,
330
+ fork_downstream,
331
+ fork,
332
+ )
333
+ processed.add(name)
334
+
335
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
336
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
337
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
338
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
339
+ for group in groups:
340
+ for name in group:
341
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
342
+ continue
343
+
344
+ await push(
345
+ name,
346
+ resources_to_run_fork_downstream,
347
+ dry_run,
348
+ fork_downstream,
349
+ fork,
350
+ )
351
+ processed.add(name)
352
+
353
+ # Finally, we need to deploy the materialized views from right to left.
354
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
355
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
356
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
357
+ for group in groups:
358
+ for name in group:
359
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
360
+ continue
361
+
362
+ await push(
363
+ name,
364
+ resources_to_run_fork_downstream,
365
+ dry_run,
366
+ fork_downstream,
367
+ fork,
368
+ )
369
+ processed.add(name)
370
+
371
+ await push_files(dependencies_graph, dry_run)
372
+
373
+ if not dry_run and not run_tests and verbose:
374
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
375
+
376
+ return dependencies_graph.to_run
377
+
378
+
379
+ async def name_matches_existing_resource(resource: str, name: str, tb_client: TinyB):
380
+ if resource == "datasources":
381
+ current_pipes: List[Dict[str, Any]] = await tb_client.pipes()
382
+ if name in [x["name"] for x in current_pipes]:
383
+ return True
384
+ else:
385
+ current_datasources: List[Dict[str, Any]] = await tb_client.datasources()
386
+ if name in [x["name"] for x in current_datasources]:
387
+ return True
388
+ return False
389
+
390
+
391
+ async def exec_file(
392
+ r: Dict[str, Any],
393
+ tb_client: TinyB,
394
+ force: bool,
395
+ check: bool,
396
+ debug: bool,
397
+ populate: bool,
398
+ populate_subset,
399
+ populate_condition,
400
+ unlink_on_populate_error,
401
+ wait_populate,
402
+ user_token: Optional[str],
403
+ override_datasource: bool = False,
404
+ ignore_sql_errors: bool = False,
405
+ skip_confirmation: bool = False,
406
+ only_response_times: bool = False,
407
+ run_tests=False,
408
+ as_standard=False,
409
+ tests_to_run: int = 0,
410
+ tests_relative_change: float = 0.01,
411
+ tests_to_sample_by_params: int = 0,
412
+ tests_filter_by: Optional[List[str]] = None,
413
+ tests_failfast: bool = False,
414
+ tests_ignore_order: bool = False,
415
+ tests_validate_processed_bytes: bool = False,
416
+ tests_check_requests_from_branch: bool = False,
417
+ current_ws: Optional[Dict[str, Any]] = None,
418
+ local_ws: Optional[Dict[str, Any]] = None,
419
+ fork_downstream: Optional[bool] = False,
420
+ fork: Optional[bool] = False,
421
+ build: Optional[bool] = False,
422
+ is_vendor: Optional[bool] = False,
423
+ ):
424
+ if debug:
425
+ click.echo(FeedbackManager.debug_running_file(file=pp.pformat(r)))
426
+ if r["resource"] == "pipes":
427
+ await new_pipe(
428
+ r,
429
+ tb_client,
430
+ force,
431
+ check,
432
+ populate,
433
+ populate_subset,
434
+ populate_condition,
435
+ unlink_on_populate_error,
436
+ wait_populate,
437
+ ignore_sql_errors=ignore_sql_errors,
438
+ only_response_times=only_response_times,
439
+ run_tests=run_tests,
440
+ as_standard=as_standard,
441
+ tests_to_run=tests_to_run,
442
+ tests_relative_change=tests_relative_change,
443
+ tests_to_sample_by_params=tests_to_sample_by_params,
444
+ tests_filter_by=tests_filter_by,
445
+ tests_failfast=tests_failfast,
446
+ tests_ignore_order=tests_ignore_order,
447
+ tests_validate_processed_bytes=tests_validate_processed_bytes,
448
+ override_datasource=override_datasource,
449
+ tests_check_requests_from_branch=tests_check_requests_from_branch,
450
+ fork_downstream=fork_downstream,
451
+ fork=fork,
452
+ )
453
+
454
+ elif r["resource"] == "datasources":
455
+ pass
456
+ else:
457
+ raise click.ClickException(FeedbackManager.error_unknown_resource(resource=r["resource"]))
458
+
459
+
460
+ def get_remote_resource_name_without_version(remote_resource_name: str) -> str:
461
+ """
462
+ >>> get_remote_resource_name_without_version("r__datasource")
463
+ 'r__datasource'
464
+ >>> get_remote_resource_name_without_version("r__datasource__v0")
465
+ 'r__datasource'
466
+ >>> get_remote_resource_name_without_version("datasource")
467
+ 'datasource'
468
+ """
469
+ parts = get_name_version(remote_resource_name)
470
+ return parts["name"]
471
+
472
+
473
+ def create_downstream_dependency_graph(dependency_graph: Dict[str, Set[str]], all_resources: Dict[str, Dict[str, Any]]):
474
+ """
475
+ This function reverses the dependency graph obtained from build_graph so you have downstream dependencies for each node in the graph.
476
+
477
+ Additionally takes into account target_datasource of materialized views
478
+ """
479
+ downstream_dependency_graph: Dict[str, Set[str]] = {node: set() for node in dependency_graph}
480
+
481
+ for node, dependencies in dependency_graph.items():
482
+ for dependency in dependencies:
483
+ if dependency not in downstream_dependency_graph:
484
+ # a shared data source, we can skip it
485
+ continue
486
+ downstream_dependency_graph[dependency].add(node)
487
+
488
+ for key in dict(downstream_dependency_graph):
489
+ target_datasource = get_target_materialized_data_source_name(all_resources[key])
490
+ if target_datasource:
491
+ downstream_dependency_graph[key].update({target_datasource})
492
+ try:
493
+ downstream_dependency_graph[target_datasource].remove(key)
494
+ except KeyError:
495
+ pass
496
+
497
+ return downstream_dependency_graph
498
+
499
+
500
+ def update_dep_map_recursively(
501
+ dep_map: Dict[str, Set[str]],
502
+ downstream_dep_map: Dict[str, Set[str]],
503
+ all_resources: Dict[str, Dict[str, Any]],
504
+ to_run: Dict[str, Dict[str, Any]],
505
+ dep_map_keys: List[str],
506
+ key: Optional[str] = None,
507
+ visited: Optional[List[str]] = None,
508
+ ):
509
+ """
510
+ Given a downstream_dep_map obtained from create_downstream_dependency_graph this function updates each node recursively to complete the downstream dependency graph for each node
511
+ """
512
+ if not visited:
513
+ visited = list()
514
+ if not key and len(dep_map_keys) == 0:
515
+ return
516
+ if not key:
517
+ key = dep_map_keys.pop()
518
+ if key not in dep_map:
519
+ dep_map[key] = set()
520
+ else:
521
+ visited.append(key)
522
+ return
523
+
524
+ for dep in downstream_dep_map.get(key, {}):
525
+ if dep not in downstream_dep_map:
526
+ continue
527
+ to_run[dep] = all_resources.get(dep, {})
528
+ update_dep_map_recursively(
529
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=dep, visited=visited
530
+ )
531
+ dep_map[key].update(downstream_dep_map[dep])
532
+ dep_map[key].update({dep})
533
+ try:
534
+ dep_map[key].remove(key)
535
+ except KeyError:
536
+ pass
537
+
538
+ to_run[key] = all_resources.get(key, {})
539
+ update_dep_map_recursively(
540
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=None, visited=visited
541
+ )
542
+
543
+
544
+ def generate_forkdownstream_graph(
545
+ all_dep_map: Dict[str, Set[str]],
546
+ all_resources: Dict[str, Dict[str, Any]],
547
+ to_run: Dict[str, Dict[str, Any]],
548
+ dep_map_keys: List[str],
549
+ ) -> Tuple[Dict[str, Set[str]], Dict[str, Dict[str, Any]]]:
550
+ """
551
+ This function for a given graph of dependencies from left to right. It will generate a new graph with the dependencies from right to left, but taking into account that even if some nodes are not inside to_run, they are still dependencies that need to be deployed.
552
+
553
+ >>> deps, _ = generate_forkdownstream_graph(
554
+ ... {
555
+ ... 'a': {'b'},
556
+ ... 'b': {'c'},
557
+ ... 'c': set(),
558
+ ... },
559
+ ... {
560
+ ... 'a': {'resource_name': 'a'},
561
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
562
+ ... 'c': {'resource_name': 'c'},
563
+ ... },
564
+ ... {
565
+ ... 'a': {'resource_name': 'a'},
566
+ ... },
567
+ ... ['a', 'b', 'c'],
568
+ ... )
569
+ >>> {k: sorted(v) for k, v in deps.items()}
570
+ {'c': [], 'b': ['a', 'c'], 'a': []}
571
+
572
+ >>> deps, _ = generate_forkdownstream_graph(
573
+ ... {
574
+ ... 'a': {'b'},
575
+ ... 'b': {'c'},
576
+ ... 'c': set(),
577
+ ... },
578
+ ... {
579
+ ... 'a': {'resource_name': 'a'},
580
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
581
+ ... 'c': {'resource_name': 'c'},
582
+ ... },
583
+ ... {
584
+ ... 'b': {'resource_name': 'b'},
585
+ ... },
586
+ ... ['a', 'b', 'c'],
587
+ ... )
588
+ >>> {k: sorted(v) for k, v in deps.items()}
589
+ {'c': [], 'b': ['a', 'c'], 'a': []}
590
+
591
+ >>> deps, _ = generate_forkdownstream_graph(
592
+ ... {
593
+ ... 'migrated__a': {'a'},
594
+ ... 'a': {'b'},
595
+ ... 'b': {'c'},
596
+ ... 'c': set(),
597
+ ... },
598
+ ... {
599
+ ... 'migrated__a': {'resource_name': 'migrated__a', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'a'}}]},
600
+ ... 'a': {'resource_name': 'a'},
601
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
602
+ ... 'c': {'resource_name': 'c'},
603
+ ... },
604
+ ... {
605
+ ... 'migrated__a': {'resource_name': 'migrated__a'},
606
+ ... 'a': {'resource_name': 'a'},
607
+ ... },
608
+ ... ['migrated_a', 'a', 'b', 'c'],
609
+ ... )
610
+ >>> {k: sorted(v) for k, v in deps.items()}
611
+ {'c': [], 'b': ['a', 'c'], 'a': [], 'migrated_a': []}
612
+ """
613
+ downstream_dep_map = create_downstream_dependency_graph(all_dep_map, all_resources)
614
+ new_dep_map: Dict[str, Set[str]] = {}
615
+ new_to_run = deepcopy(to_run)
616
+ update_dep_map_recursively(new_dep_map, downstream_dep_map, all_resources, new_to_run, dep_map_keys)
617
+ return new_dep_map, new_to_run
618
+
619
+
620
+ @dataclass
621
+ class GraphDependencies:
622
+ """
623
+ This class is used to store the dependencies graph and the resources that are going to be deployed
624
+ """
625
+
626
+ dep_map: Dict[str, Set[str]]
627
+ to_run: Dict[str, Dict[str, Any]]
628
+
629
+ # The same as above but for the whole project, not just the resources affected by the current deployment
630
+ all_dep_map: Dict[str, Set[str]]
631
+ all_resources: Dict[str, Dict[str, Any]]
632
+
633
+
634
+ async def process(
635
+ filename: str,
636
+ tb_client: TinyB,
637
+ deps: List[str],
638
+ dep_map: Dict[str, Any],
639
+ to_run: Dict[str, Any],
640
+ vendor_paths: Optional[List[Tuple[str, str]]] = None,
641
+ skip_connectors: bool = False,
642
+ current_ws: Optional[Dict[str, Any]] = None,
643
+ changed: Optional[Dict[str, Any]] = None,
644
+ fork_downstream: Optional[bool] = False,
645
+ is_internal: Optional[bool] = False,
646
+ dir_path: Optional[str] = None,
647
+ verbose: bool = False,
648
+ embedded_datasources: Optional[Dict[str, Any]] = None,
649
+ ):
650
+ name, kind = filename.rsplit(".", 1)
651
+ warnings = []
652
+ embedded_datasources = {} if embedded_datasources is None else embedded_datasources
653
+
654
+ try:
655
+ res = await process_file(
656
+ filename,
657
+ tb_client,
658
+ skip_connectors=skip_connectors,
659
+ current_ws=current_ws,
660
+ )
661
+ except click.ClickException as e:
662
+ raise e
663
+ except IncludeFileNotFoundException as e:
664
+ raise click.ClickException(FeedbackManager.error_deleted_include(include_file=str(e), filename=filename))
665
+ except Exception as e:
666
+ raise click.ClickException(str(e))
667
+
668
+ # datasource
669
+ # {
670
+ # "resource": "datasources",
671
+ # "resource_name": name,
672
+ # "version": doc.version,
673
+ # "params": params,
674
+ # "filename": filename,
675
+ # "deps": deps,
676
+ # "tokens": doc.tokens,
677
+ # "shared_with": doc.shared_with,
678
+ # "filtering_tags": doc.filtering_tags,
679
+ # }
680
+ # pipe
681
+ # {
682
+ # "resource": "pipes",
683
+ # "resource_name": name,
684
+ # "version": doc.version,
685
+ # "filename": filename,
686
+ # "name": name + version,
687
+ # "nodes": nodes,
688
+ # "deps": [x for x in set(deps)],
689
+ # "tokens": doc.tokens,
690
+ # "description": description,
691
+ # "warnings": doc.warnings,
692
+ # "filtering_tags": doc.filtering_tags,
693
+ # }
694
+
695
+ # r is essentially a Datasource or a Pipe in dict shape, like in the comment above
696
+ for r in res:
697
+ resource_name = r["resource_name"]
698
+ warnings = r.get("warnings", [])
699
+ if (
700
+ changed
701
+ and resource_name in changed
702
+ and (not changed[resource_name] or changed[resource_name] in ["shared", "remote"])
703
+ ):
704
+ continue
705
+
706
+ if (
707
+ fork_downstream
708
+ and r.get("resource", "") == "pipes"
709
+ and any(["engine" in x.get("params", {}) for x in r.get("nodes", [])])
710
+ ):
711
+ raise click.ClickException(FeedbackManager.error_forkdownstream_pipes_with_engine(pipe=resource_name))
712
+
713
+ to_run[resource_name] = r
714
+ file_deps: List[str] = r.get("deps", [])
715
+ deps += file_deps
716
+ # calculate and look for deps
717
+ dep_list = []
718
+ for x in file_deps:
719
+ if x not in INTERNAL_TABLES or is_internal:
720
+ f, ds = find_file_by_name(dir_path or ".", x, verbose, vendor_paths=vendor_paths, resource=r)
721
+ if f:
722
+ dep_list.append(f.rsplit(".", 1)[0])
723
+ if ds:
724
+ ds_fn = ds["resource_name"]
725
+ prev = to_run.get(ds_fn, {})
726
+ to_run[ds_fn] = deepcopy(r)
727
+ try:
728
+ to_run[ds_fn]["deps"] = list(
729
+ set(to_run[ds_fn].get("deps", []) + prev.get("deps", []) + [resource_name])
730
+ )
731
+ except ValueError:
732
+ pass
733
+ embedded_datasources[x] = to_run[ds_fn]
734
+ else:
735
+ e_ds = embedded_datasources.get(x, None)
736
+ if e_ds:
737
+ dep_list.append(e_ds["resource_name"])
738
+
739
+ dep_map[resource_name] = set(dep_list)
740
+ return os.path.basename(name), warnings
741
+
742
+
743
+ async def get_processed(
744
+ filenames: Iterable[str],
745
+ changed: Optional[Dict[str, Any]] = None,
746
+ verbose: bool = False,
747
+ deps: Optional[List[str]] = None,
748
+ dep_map: Optional[Dict[str, Any]] = None,
749
+ to_run: Optional[Dict[str, Any]] = None,
750
+ vendor_paths: Optional[List[Tuple[str, str]]] = None,
751
+ processed: Optional[Set[str]] = None,
752
+ tb_client: Optional[TinyB] = None,
753
+ skip_connectors: bool = False,
754
+ current_ws: Optional[Dict[str, Any]] = None,
755
+ fork_downstream: Optional[bool] = False,
756
+ is_internal: Optional[bool] = False,
757
+ dir_path: Optional[str] = None,
758
+ embedded_datasources: Optional[Dict[str, Dict[str, Any]]] = None,
759
+ ):
760
+ # Initialize with proper type annotations
761
+ deps_list: List[str] = [] if deps is None else deps
762
+ dep_map_dict: Dict[str, Any] = {} if dep_map is None else dep_map
763
+ to_run_dict: Dict[str, Any] = {} if to_run is None else to_run
764
+ processed_set: Set[str] = set() if processed is None else processed
765
+ embedded_ds: Dict[str, Dict[str, Any]] = {} if embedded_datasources is None else embedded_datasources
766
+
767
+ for filename in filenames:
768
+ # just process changed filenames (tb deploy and --only-changes)
769
+ if changed is not None:
770
+ resource = Path(filename).resolve().stem
771
+ if resource in changed and (not changed[resource] or changed[resource] in ["shared", "remote"]):
772
+ continue
773
+ if os.path.isdir(filename):
774
+ await get_processed(
775
+ filenames=get_project_filenames(filename),
776
+ changed=changed,
777
+ verbose=verbose,
778
+ deps=deps_list,
779
+ dep_map=dep_map_dict,
780
+ to_run=to_run_dict,
781
+ vendor_paths=vendor_paths,
782
+ processed=processed_set,
783
+ tb_client=tb_client,
784
+ skip_connectors=skip_connectors,
785
+ current_ws=current_ws,
786
+ fork_downstream=fork_downstream,
787
+ is_internal=is_internal,
788
+ dir_path=dir_path,
789
+ embedded_datasources=embedded_ds,
790
+ )
791
+ else:
792
+ if verbose:
793
+ click.echo(FeedbackManager.info_processing_file(filename=filename))
794
+
795
+ if ".incl" in filename:
796
+ click.echo(FeedbackManager.warning_skipping_include_file(file=filename))
797
+
798
+ if tb_client is None:
799
+ raise ValueError("tb_client cannot be None")
800
+
801
+ name, warnings = await process(
802
+ filename=filename,
803
+ tb_client=tb_client,
804
+ deps=deps_list,
805
+ dep_map=dep_map_dict,
806
+ to_run=to_run_dict,
807
+ vendor_paths=vendor_paths,
808
+ skip_connectors=skip_connectors,
809
+ current_ws=current_ws,
810
+ changed=changed,
811
+ fork_downstream=fork_downstream,
812
+ is_internal=is_internal,
813
+ dir_path=dir_path,
814
+ verbose=verbose,
815
+ embedded_datasources=embedded_ds,
816
+ )
817
+ processed_set.add(name)
818
+
819
+ if verbose:
820
+ if len(warnings) == 1:
821
+ click.echo(FeedbackManager.warning_pipe_restricted_param(word=warnings[0]))
822
+ elif len(warnings) > 1:
823
+ click.echo(
824
+ FeedbackManager.warning_pipe_restricted_params(
825
+ words=", ".join(["'{}'".format(param) for param in warnings[:-1]]),
826
+ last_word=warnings[-1],
827
+ )
828
+ )
829
+
830
+
831
+ async def build_graph(
832
+ filenames: Iterable[str],
833
+ tb_client: TinyB,
834
+ dir_path: Optional[str] = None,
835
+ process_dependencies: bool = False,
836
+ verbose: bool = False,
837
+ skip_connectors: bool = False,
838
+ vendor_paths: Optional[List[Tuple[str, str]]] = None,
839
+ current_ws: Optional[Dict[str, Any]] = None,
840
+ changed: Optional[Dict[str, Any]] = None,
841
+ only_changes: bool = False,
842
+ fork_downstream: Optional[bool] = False,
843
+ is_internal: Optional[bool] = False,
844
+ build: Optional[bool] = False,
845
+ ) -> GraphDependencies:
846
+ """
847
+ This method will generate a dependency graph for the given files. It will also return a map of all the resources that are going to be deployed.
848
+ By default it will generate the graph from left to right, but if fork-downstream, it will generate the graph from right to left.
849
+ """
850
+ to_run: Dict[str, Any] = {}
851
+ deps: List[str] = []
852
+ dep_map: Dict[str, Any] = {}
853
+ embedded_datasources: Dict[str, Dict[str, Any]] = {}
854
+
855
+ # These dictionaries are used to store all the resources and there dependencies for the whole project
856
+ # This is used for the downstream dependency graph
857
+ all_dep_map: Dict[str, Set[str]] = {}
858
+ all_resources: Dict[str, Dict[str, Any]] = {}
859
+
860
+ if dir_path is None:
861
+ dir_path = os.getcwd()
862
+
863
+ # When using fork-downstream or --only-changes, we need to generate all the graph of all the resources and their dependencies
864
+ # This way we can add more resources into the to_run dictionary if needed.
865
+ if process_dependencies and only_changes:
866
+ all_dependencies_graph = await build_graph(
867
+ get_project_filenames(dir_path),
868
+ tb_client,
869
+ dir_path=dir_path,
870
+ process_dependencies=True,
871
+ skip_connectors=True,
872
+ vendor_paths=vendor_paths,
873
+ current_ws=current_ws,
874
+ changed=None,
875
+ only_changes=False,
876
+ is_internal=is_internal,
877
+ build=build,
878
+ )
879
+ all_dep_map = all_dependencies_graph.dep_map
880
+ all_resources = all_dependencies_graph.to_run
881
+
882
+ processed: Set[str] = set()
883
+
884
+ await get_processed(
885
+ filenames=filenames,
886
+ changed=changed,
887
+ verbose=verbose,
888
+ deps=deps,
889
+ dep_map=dep_map,
890
+ to_run=to_run,
891
+ vendor_paths=vendor_paths,
892
+ processed=processed,
893
+ tb_client=tb_client,
894
+ skip_connectors=skip_connectors,
895
+ current_ws=current_ws,
896
+ fork_downstream=fork_downstream,
897
+ is_internal=is_internal,
898
+ dir_path=dir_path,
899
+ embedded_datasources=embedded_datasources,
900
+ )
901
+
902
+ if process_dependencies:
903
+ if only_changes:
904
+ for key in dict(to_run):
905
+ # look for deps that are the target data source of a materialized node
906
+ target_datasource = get_target_materialized_data_source_name(to_run[key])
907
+ if target_datasource:
908
+ # look in all_dep_map items that have as a dependency the target data source and are an endpoint
909
+ for _key, _deps in all_dep_map.items():
910
+ for dep in _deps:
911
+ if (
912
+ dep == target_datasource
913
+ or (dep == key and target_datasource not in all_dep_map.get(key, []))
914
+ ) and is_endpoint_with_no_dependencies(
915
+ all_resources.get(_key, {}), all_dep_map, all_resources
916
+ ):
917
+ dep_map[_key] = _deps
918
+ to_run[_key] = all_resources.get(_key)
919
+ else:
920
+ while len(deps) > 0:
921
+ dep = deps.pop()
922
+ if dep not in processed:
923
+ processed.add(dep)
924
+ f = full_path_by_name(dir_path, dep, vendor_paths)
925
+ if f:
926
+ if verbose:
927
+ try:
928
+ processed_filename = f.relative_to(os.getcwd())
929
+ except ValueError:
930
+ processed_filename = f
931
+ # This is to avoid processing shared data sources
932
+ if "vendor/" in str(processed_filename):
933
+ click.echo(FeedbackManager.info_skipping_resource(resource=processed_filename))
934
+ continue
935
+ click.echo(FeedbackManager.info_processing_file(filename=processed_filename))
936
+ await process(
937
+ filename=str(f),
938
+ tb_client=tb_client,
939
+ deps=deps,
940
+ dep_map=dep_map,
941
+ to_run=to_run,
942
+ vendor_paths=vendor_paths,
943
+ skip_connectors=skip_connectors,
944
+ current_ws=current_ws,
945
+ fork_downstream=fork_downstream,
946
+ is_internal=is_internal,
947
+ dir_path=dir_path,
948
+ verbose=verbose,
949
+ embedded_datasources=embedded_datasources,
950
+ )
951
+
952
+ return GraphDependencies(dep_map, to_run, all_dep_map, all_resources)
953
+
954
+
955
+ async def process_file(
956
+ filename: str,
957
+ tb_client: TinyB,
958
+ skip_connectors: bool = False,
959
+ current_ws: Optional[Dict[str, Any]] = None,
960
+ ) -> List[Dict[str, Any]]:
961
+ """Returns a list of resources
962
+
963
+ For both datasources and pipes, a list of just one item is returned"""
964
+
965
+ def get_engine_params(node: Dict[str, Any]) -> Dict[str, Any]:
966
+ params = {}
967
+
968
+ if "engine" in node:
969
+ engine = node["engine"]["type"]
970
+ params["engine"] = engine
971
+ args = node["engine"]["args"]
972
+ for k, v in args:
973
+ params[f"engine_{k}"] = v
974
+ return params
975
+
976
+ async def get_kafka_params(node: Dict[str, Any]):
977
+ params = {key: value for key, value in node.items() if key.startswith("kafka")}
978
+
979
+ if not skip_connectors:
980
+ try:
981
+ connector_params = {
982
+ "kafka_bootstrap_servers": params.get("kafka_bootstrap_servers", None),
983
+ "kafka_key": params.get("kafka_key", None),
984
+ "kafka_secret": params.get("kafka_secret", None),
985
+ "kafka_connection_name": params.get("kafka_connection_name", None),
986
+ "kafka_auto_offset_reset": params.get("kafka_auto_offset_reset", None),
987
+ "kafka_schema_registry_url": params.get("kafka_schema_registry_url", None),
988
+ "kafka_ssl_ca_pem": get_ca_pem_content(params.get("kafka_ssl_ca_pem", None), filename),
989
+ "kafka_sasl_mechanism": params.get("kafka_sasl_mechanism", None),
990
+ }
991
+
992
+ connector = await tb_client.get_connection(**connector_params)
993
+ if not connector:
994
+ click.echo(
995
+ FeedbackManager.info_creating_kafka_connection(connection_name=params["kafka_connection_name"])
996
+ )
997
+ required_params = [
998
+ connector_params["kafka_bootstrap_servers"],
999
+ connector_params["kafka_key"],
1000
+ connector_params["kafka_secret"],
1001
+ ]
1002
+
1003
+ if not all(required_params):
1004
+ raise click.ClickException(FeedbackManager.error_unknown_kafka_connection(datasource=name))
1005
+
1006
+ connector = await tb_client.connection_create_kafka(**connector_params)
1007
+ except Exception as e:
1008
+ raise click.ClickException(
1009
+ FeedbackManager.error_connection_create(
1010
+ connection_name=params["kafka_connection_name"], error=str(e)
1011
+ )
1012
+ )
1013
+
1014
+ click.echo(FeedbackManager.success_connection_using(connection_name=connector["name"]))
1015
+
1016
+ params.update(
1017
+ {
1018
+ "connector": connector["id"],
1019
+ "service": "kafka",
1020
+ }
1021
+ )
1022
+
1023
+ return params
1024
+
1025
+ async def get_import_params(datasource: Dict[str, Any], node: Dict[str, Any]) -> Dict[str, Any]:
1026
+ params: Dict[str, Any] = {key: value for key, value in node.items() if key.startswith("import_")}
1027
+
1028
+ if len(params) == 0 or skip_connectors:
1029
+ return params
1030
+
1031
+ service: Optional[str] = node.get("import_service", None)
1032
+
1033
+ if service and service.lower() == "bigquery":
1034
+ if not await tb_client.check_gcp_read_permissions():
1035
+ raise click.ClickException(FeedbackManager.error_unknown_bq_connection(datasource=datasource["name"]))
1036
+
1037
+ # Bigquery doesn't have a datalink, so we can stop here
1038
+ return params
1039
+
1040
+ # Rest of connectors
1041
+
1042
+ connector_id: Optional[str] = node.get("import_connector", None)
1043
+ connector_name: Optional[str] = node.get("import_connection_name", None)
1044
+ if not connector_name and not connector_id:
1045
+ raise click.ClickException(FeedbackManager.error_missing_connection_name(datasource=datasource["name"]))
1046
+
1047
+ if not connector_id:
1048
+ assert isinstance(connector_name, str)
1049
+
1050
+ connector: Optional[Dict[str, Any]] = await tb_client.get_connector(connector_name, service)
1051
+
1052
+ if not connector:
1053
+ raise Exception(
1054
+ FeedbackManager.error_unknown_connection(datasource=datasource["name"], connection=connector_name)
1055
+ )
1056
+ connector_id = connector["id"]
1057
+ service = connector["service"]
1058
+
1059
+ # The API needs the connector ID to create the datasource.
1060
+ params["import_connector"] = connector_id
1061
+ if service:
1062
+ params["import_service"] = service
1063
+
1064
+ if import_from_timestamp := params.get("import_from_timestamp", None):
1065
+ try:
1066
+ str(datetime.datetime.fromisoformat(import_from_timestamp).isoformat())
1067
+ except ValueError:
1068
+ raise click.ClickException(
1069
+ FeedbackManager.error_invalid_import_from_timestamp(datasource=datasource["name"])
1070
+ )
1071
+
1072
+ if service in PREVIEW_CONNECTOR_SERVICES:
1073
+ if not params.get("import_bucket_uri", None):
1074
+ raise click.ClickException(FeedbackManager.error_missing_bucket_uri(datasource=datasource["name"]))
1075
+ elif service == "dynamodb":
1076
+ if not params.get("import_table_arn", None):
1077
+ raise click.ClickException(FeedbackManager.error_missing_table_arn(datasource=datasource["name"]))
1078
+ if not params.get("import_export_bucket", None):
1079
+ raise click.ClickException(FeedbackManager.error_missing_export_bucket(datasource=datasource["name"]))
1080
+ else:
1081
+ if not params.get("import_external_datasource", None):
1082
+ raise click.ClickException(
1083
+ FeedbackManager.error_missing_external_datasource(datasource=datasource["name"])
1084
+ )
1085
+
1086
+ return params
1087
+
1088
+ if DataFileExtensions.DATASOURCE in filename:
1089
+ doc = parse_datasource(filename)
1090
+ node = doc.nodes[0]
1091
+ deps: List[str] = []
1092
+ # reemplace tables on materialized columns
1093
+ columns = parse_table_structure(node["schema"])
1094
+
1095
+ _format = "csv"
1096
+ for x in columns:
1097
+ if x["default_value"] and x["default_value"].lower().startswith("materialized"):
1098
+ # turn expression to a select query to sql_get_used_tables can get the used tables
1099
+ q = "select " + x["default_value"][len("materialized") :]
1100
+ tables = await tb_client.sql_get_used_tables(q)
1101
+ # materialized columns expressions could have joins so we need to add them as a dep
1102
+ deps += tables
1103
+ # generate replacements and replace the query
1104
+ replacements = {t: t for t in tables}
1105
+
1106
+ replaced_results = await tb_client.replace_tables(q, replacements)
1107
+ x["default_value"] = replaced_results.replace("SELECT", "materialized", 1)
1108
+ if x.get("jsonpath", None):
1109
+ _format = "ndjson"
1110
+
1111
+ schema = ",".join(schema_to_sql_columns(columns))
1112
+
1113
+ name = os.path.basename(filename).rsplit(".", 1)[0]
1114
+
1115
+ version = f"__v{doc.version}" if doc.version is not None else ""
1116
+
1117
+ def append_version_to_name(name: str, version: str) -> str:
1118
+ if version != "":
1119
+ name = name.replace(".", "_")
1120
+ return name + version
1121
+ return name
1122
+
1123
+ description = node.get("description", "")
1124
+ indexes_list = node.get("indexes", [])
1125
+ indexes = None
1126
+ if indexes_list:
1127
+ indexes = "\n".join([index.to_sql() for index in indexes_list])
1128
+ # Here is where we lose the columns
1129
+ # I don't know why we don't return something more similar to the parsed doc
1130
+ params = {
1131
+ "name": append_version_to_name(name, version),
1132
+ "description": description,
1133
+ "schema": schema,
1134
+ "indexes": indexes,
1135
+ "indexes_list": indexes_list,
1136
+ "format": _format,
1137
+ }
1138
+
1139
+ params.update(get_engine_params(node))
1140
+
1141
+ if "import_service" in node or "import_connection_name" in node:
1142
+ VALID_SERVICES: Tuple[str, ...] = ("bigquery", "snowflake", "s3", "s3_iamrole", "gcs", "dynamodb")
1143
+
1144
+ import_params = await get_import_params(params, node)
1145
+
1146
+ service = import_params.get("import_service", None)
1147
+ if service and service not in VALID_SERVICES:
1148
+ raise Exception(f"Unknown import service: {service}")
1149
+
1150
+ if service in PREVIEW_CONNECTOR_SERVICES:
1151
+ ON_DEMAND_CRON = ON_DEMAND
1152
+ AUTO_CRON = "@auto"
1153
+ ON_DEMAND_CRON_EXPECTED_BY_THE_API = "@once"
1154
+ VALID_CRONS: Tuple[str, ...] = (ON_DEMAND_CRON, AUTO_CRON)
1155
+ cron = node.get("import_schedule", ON_DEMAND_CRON)
1156
+
1157
+ if cron not in VALID_CRONS:
1158
+ valid_values = ", ".join(VALID_CRONS)
1159
+ raise Exception(f"Invalid import schedule: '{cron}'. Valid values are: {valid_values}")
1160
+
1161
+ if cron == ON_DEMAND_CRON:
1162
+ if import_params is None:
1163
+ import_params = {}
1164
+ import_params["import_schedule"] = ON_DEMAND_CRON_EXPECTED_BY_THE_API
1165
+
1166
+ if cron == AUTO_CRON:
1167
+ period: int = DEFAULT_CRON_PERIOD
1168
+
1169
+ if current_ws is not None:
1170
+ workspaces = (await tb_client.user_workspaces()).get("workspaces", [])
1171
+ workspace_rate_limits: Dict[str, Dict[str, int]] = next(
1172
+ (w.get("rate_limits", {}) for w in workspaces if w["id"] == current_ws["id"]), {}
1173
+ )
1174
+ if workspace_rate_limits:
1175
+ rate_limit_config = workspace_rate_limits.get("api_datasources_create_append_replace", {})
1176
+ if rate_limit_config:
1177
+ period = rate_limit_config.get("period", DEFAULT_CRON_PERIOD)
1178
+
1179
+ def seconds_to_cron_expression(seconds: int) -> str:
1180
+ minutes = seconds // 60
1181
+ hours = minutes // 60
1182
+ days = hours // 24
1183
+ if days > 0:
1184
+ return f"0 0 */{days} * *"
1185
+ if hours > 0:
1186
+ return f"0 */{hours} * * *"
1187
+ if minutes > 0:
1188
+ return f"*/{minutes} * * * *"
1189
+ return f"*/{seconds} * * * *"
1190
+
1191
+ if import_params is None:
1192
+ import_params = {}
1193
+ import_params["import_schedule"] = seconds_to_cron_expression(period)
1194
+
1195
+ # Include all import_ parameters in the datasource params
1196
+ if import_params is not None:
1197
+ params.update(import_params)
1198
+
1199
+ # Substitute the import parameters with the ones used by the
1200
+ # import API:
1201
+ # - If an import parameter is not present and there's a default
1202
+ # value, use the default value.
1203
+ # - If the resulting value is None, do not add the parameter.
1204
+ #
1205
+ # Note: any unknown import_ parameter is leaved as is.
1206
+ for key in ImportReplacements.get_datafile_parameter_keys():
1207
+ replacement, default_value = ImportReplacements.get_api_param_for_datafile_param(service, key)
1208
+ if not replacement:
1209
+ continue # We should not reach this never, but just in case...
1210
+
1211
+ value: Any
1212
+ try:
1213
+ value = params[key]
1214
+ del params[key]
1215
+ except KeyError:
1216
+ value = default_value
1217
+
1218
+ if value:
1219
+ params[replacement] = value
1220
+
1221
+ if "kafka_connection_name" in node:
1222
+ kafka_params = await get_kafka_params(node)
1223
+ params.update(kafka_params)
1224
+ del params["format"]
1225
+
1226
+ if "tags" in node:
1227
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1228
+ params.update(tags)
1229
+
1230
+ resources: List[Dict[str, Any]] = []
1231
+
1232
+ resources.append(
1233
+ {
1234
+ "resource": "datasources",
1235
+ "resource_name": name,
1236
+ "version": doc.version,
1237
+ "params": params,
1238
+ "filename": filename,
1239
+ "deps": deps,
1240
+ "tokens": doc.tokens,
1241
+ "shared_with": doc.shared_with,
1242
+ "filtering_tags": doc.filtering_tags,
1243
+ }
1244
+ )
1245
+
1246
+ return resources
1247
+
1248
+ elif DataFileExtensions.PIPE in filename:
1249
+ doc = parse_pipe(filename)
1250
+ version = f"__v{doc.version}" if doc.version is not None else ""
1251
+ name = os.path.basename(filename).split(".")[0]
1252
+ description = doc.description if doc.description is not None else ""
1253
+
1254
+ deps = []
1255
+ nodes: List[Dict[str, Any]] = []
1256
+
1257
+ is_copy = any([node for node in doc.nodes if node.get("type", "standard").lower() == PipeNodeTypes.COPY])
1258
+ for node in doc.nodes:
1259
+ sql = node["sql"]
1260
+ node_type = node.get("type", "standard").lower()
1261
+ params = {
1262
+ "name": node["name"],
1263
+ "type": node_type,
1264
+ "description": node.get("description", ""),
1265
+ "target_datasource": node.get("target_datasource", None),
1266
+ "copy_schedule": node.get(CopyParameters.COPY_SCHEDULE, None),
1267
+ "mode": node.get("mode", CopyModes.APPEND),
1268
+ }
1269
+
1270
+ is_export_node = ExportReplacements.is_export_node(node)
1271
+ export_params = ExportReplacements.get_params_from_datafile(node) if is_export_node else None
1272
+
1273
+ sql = sql.strip()
1274
+ is_template = False
1275
+ if sql[0] == "%":
1276
+ try:
1277
+ sql_rendered, _, _ = render_sql_template(sql[1:], test_mode=True)
1278
+ except Exception as e:
1279
+ raise click.ClickException(
1280
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1281
+ )
1282
+ is_template = True
1283
+ else:
1284
+ sql_rendered = sql
1285
+
1286
+ try:
1287
+ dependencies = await tb_client.sql_get_used_tables(sql_rendered, raising=True, is_copy=is_copy)
1288
+ deps += [t for t in dependencies if t not in [n["name"] for n in doc.nodes]]
1289
+
1290
+ except Exception as e:
1291
+ raise click.ClickException(
1292
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1293
+ )
1294
+
1295
+ if is_template:
1296
+ deps += get_used_tables_in_template(sql[1:])
1297
+
1298
+ is_neither_copy_nor_materialized = "datasource" not in node and "target_datasource" not in node
1299
+ if "engine" in node and is_neither_copy_nor_materialized:
1300
+ raise ValueError("Defining ENGINE options in a node requires a DATASOURCE")
1301
+
1302
+ if "datasource" in node:
1303
+ params["datasource"] = node["datasource"]
1304
+ deps += [node["datasource"]]
1305
+
1306
+ if "target_datasource" in node:
1307
+ params["target_datasource"] = node["target_datasource"]
1308
+ deps += [node["target_datasource"]]
1309
+
1310
+ params.update(get_engine_params(node))
1311
+
1312
+ replacements = {x: x for x in deps if x not in [n["name"] for n in doc.nodes]}
1313
+
1314
+ # FIXME: Ideally we should use await tb_client.replace_tables(sql, replacements)
1315
+ for old, new in replacements.items():
1316
+ sql = re.sub("([\t \\n']+|^)" + old + "([\t \\n'\\)]+|$)", "\\1" + new + "\\2", sql)
1317
+
1318
+ if "tags" in node:
1319
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1320
+ params.update(tags)
1321
+
1322
+ nodes.append(
1323
+ {
1324
+ "sql": sql,
1325
+ "params": params,
1326
+ "export_params": export_params,
1327
+ }
1328
+ )
1329
+
1330
+ return [
1331
+ {
1332
+ "resource": "pipes",
1333
+ "resource_name": name,
1334
+ "version": doc.version,
1335
+ "filename": filename,
1336
+ "name": name + version,
1337
+ "nodes": nodes,
1338
+ "deps": [x for x in set(deps)],
1339
+ "tokens": doc.tokens,
1340
+ "description": description,
1341
+ "warnings": doc.warnings,
1342
+ "filtering_tags": doc.filtering_tags,
1343
+ }
1344
+ ]
1345
+ else:
1346
+ raise click.ClickException(FeedbackManager.error_file_extension(filename=filename))
1347
+
1348
+
1349
+ def sizeof_fmt(num: Union[int, float], suffix: str = "b") -> str:
1350
+ """Readable file size
1351
+ :param num: Bytes value
1352
+ :type num: int
1353
+ :param suffix: Unit suffix (optionnal) default = o
1354
+ :type suffix: str
1355
+ :rtype: str
1356
+ """
1357
+ for unit in ["", "k", "M", "G", "T", "P", "E", "Z"]:
1358
+ if abs(num) < 1024.0:
1359
+ return "%3.1f %s%s" % (num, unit, suffix)
1360
+ num /= 1024.0
1361
+ return "%.1f%s%s" % (num, "Yi", suffix)
1362
+
1363
+
1364
+ def full_path_by_name(folder: str, name: str, vendor_paths: Optional[List[Tuple[str, str]]] = None) -> Optional[Path]:
1365
+ f = Path(folder)
1366
+ ds = name + ".datasource"
1367
+ if os.path.isfile(os.path.join(folder, ds)):
1368
+ return f / ds
1369
+ if os.path.isfile(f / "datasources" / ds):
1370
+ return f / "datasources" / ds
1371
+
1372
+ pipe = name + ".pipe"
1373
+ if os.path.isfile(os.path.join(folder, pipe)):
1374
+ return f / pipe
1375
+
1376
+ if os.path.isfile(f / "endpoints" / pipe):
1377
+ return f / "endpoints" / pipe
1378
+
1379
+ if os.path.isfile(f / "pipes" / pipe):
1380
+ return f / "pipes" / pipe
1381
+
1382
+ if os.path.isfile(f / "sinks" / pipe):
1383
+ return f / "sinks" / pipe
1384
+
1385
+ if os.path.isfile(f / "copies" / pipe):
1386
+ return f / "copies" / pipe
1387
+
1388
+ if os.path.isfile(f / "playgrounds" / pipe):
1389
+ return f / "playgrounds" / pipe
1390
+
1391
+ if os.path.isfile(f / "materializations" / pipe):
1392
+ return f / "materializations" / pipe
1393
+
1394
+ if vendor_paths:
1395
+ for wk_name, wk_path in vendor_paths:
1396
+ if name.startswith(f"{wk_name}."):
1397
+ r = full_path_by_name(wk_path, name.replace(f"{wk_name}.", ""))
1398
+ if r:
1399
+ return r
1400
+ return None