tinybird 0.0.1.dev6__py3-none-any.whl → 0.0.1.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (31) hide show
  1. tinybird/tb/modules/branch.py +0 -21
  2. tinybird/tb/modules/build.py +7 -18
  3. tinybird/tb/modules/cli.py +11 -131
  4. tinybird/tb/modules/common.py +14 -2
  5. tinybird/tb/modules/create.py +10 -14
  6. tinybird/tb/modules/datafile/build.py +2136 -0
  7. tinybird/tb/modules/datafile/build_common.py +118 -0
  8. tinybird/tb/modules/datafile/build_datasource.py +413 -0
  9. tinybird/tb/modules/datafile/build_pipe.py +648 -0
  10. tinybird/tb/modules/datafile/common.py +898 -0
  11. tinybird/tb/modules/datafile/diff.py +197 -0
  12. tinybird/tb/modules/datafile/exceptions.py +23 -0
  13. tinybird/tb/modules/datafile/format_common.py +66 -0
  14. tinybird/tb/modules/datafile/format_datasource.py +160 -0
  15. tinybird/tb/modules/datafile/format_pipe.py +195 -0
  16. tinybird/tb/modules/datafile/parse_datasource.py +41 -0
  17. tinybird/tb/modules/datafile/parse_pipe.py +69 -0
  18. tinybird/tb/modules/datafile/pipe_checker.py +560 -0
  19. tinybird/tb/modules/datafile/pull.py +157 -0
  20. tinybird/tb/modules/datasource.py +1 -1
  21. tinybird/tb/modules/fmt.py +4 -1
  22. tinybird/tb/modules/local.py +3 -0
  23. tinybird/tb/modules/pipe.py +8 -2
  24. tinybird/tb/modules/prompts.py +1 -1
  25. tinybird/tb/modules/workspace.py +1 -1
  26. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/METADATA +1 -1
  27. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/RECORD +30 -17
  28. tinybird/tb/modules/datafile.py +0 -6122
  29. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/WHEEL +0 -0
  30. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/entry_points.txt +0 -0
  31. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2136 @@
1
+ import datetime
2
+ import os
3
+ import os.path
4
+ import re
5
+ import sys
6
+ import urllib
7
+ from copy import deepcopy
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
11
+
12
+ import click
13
+ from toposort import toposort
14
+
15
+ from tinybird.client import TinyB
16
+ from tinybird.feedback_manager import FeedbackManager
17
+ from tinybird.sql import parse_table_structure, schema_to_sql_columns
18
+ from tinybird.sql_template import get_used_tables_in_template, render_sql_template
19
+ from tinybird.tb.modules.common import get_ca_pem_content
20
+ from tinybird.tb.modules.config import CLIConfig
21
+ from tinybird.tb.modules.datafile.build_common import update_tags_in_resource
22
+ from tinybird.tb.modules.datafile.build_datasource import is_datasource, new_ds
23
+ from tinybird.tb.modules.datafile.build_pipe import (
24
+ get_target_materialized_data_source_name,
25
+ is_endpoint,
26
+ is_endpoint_with_no_dependencies,
27
+ is_materialized,
28
+ new_pipe,
29
+ )
30
+ from tinybird.tb.modules.datafile.common import (
31
+ DEFAULT_CRON_PERIOD,
32
+ INTERNAL_TABLES,
33
+ ON_DEMAND,
34
+ PREVIEW_CONNECTOR_SERVICES,
35
+ TB_LOCAL_WORKSPACE_NAME,
36
+ CopyModes,
37
+ CopyParameters,
38
+ DataFileExtensions,
39
+ ExportReplacements,
40
+ ImportReplacements,
41
+ PipeNodeTypes,
42
+ find_file_by_name,
43
+ get_name_version,
44
+ get_project_filenames,
45
+ pp,
46
+ )
47
+ from tinybird.tb.modules.datafile.exceptions import AlreadyExistsException, IncludeFileNotFoundException
48
+ from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
49
+ from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
50
+
51
+
52
+ async def folder_build(
53
+ tb_client: TinyB,
54
+ filenames: Optional[List[str]] = None,
55
+ folder: str = ".",
56
+ ignore_sql_errors: bool = False,
57
+ is_internal: bool = False,
58
+ only_pipes: bool = False,
59
+ is_vendor: bool = False,
60
+ current_ws: Optional[Dict[str, Any]] = None,
61
+ workspaces: Optional[List[Dict[str, Any]]] = None,
62
+ ):
63
+ if only_pipes:
64
+ filenames = [f for f in filenames if f.endswith(".pipe")]
65
+
66
+ config = CLIConfig.get_project_config()
67
+ build = True
68
+ dry_run = False
69
+ force = True
70
+ push_deps = True
71
+ only_changes = True
72
+ debug = False
73
+ check = True
74
+ populate = False
75
+ populate_subset = None
76
+ populate_condition = None
77
+ tests_to_run = 0
78
+ tests_failfast = True
79
+ override_datasource = False
80
+ tests_check_requests_from_branch = False
81
+ skip_confirmation = True
82
+ wait = False
83
+ unlink_on_populate_error = False
84
+ upload_fixtures = False
85
+ only_response_times = False
86
+ workspace_map: Dict[str, Any] = {}
87
+ tests_sample_by_params = 1
88
+ tests_ignore_order = False
89
+ tests_validate_processed_bytes = False
90
+ run_tests = False
91
+ verbose = False
92
+ as_standard = False
93
+ raise_on_exists = False
94
+ fork_downstream = True
95
+ fork = False
96
+ release_created = False
97
+ auto_promote = False
98
+ hide_folders = False
99
+ tests_relative_change = 0.01
100
+ tests_sample_by_params = 0
101
+ tests_filter_by = None
102
+ tests_failfast = False
103
+ tests_ignore_order = False
104
+ tests_validate_processed_bytes = False
105
+ tests_check_requests_from_branch = False
106
+ git_release = False
107
+ workspace_lib_paths = []
108
+
109
+ workspace_lib_paths = list(workspace_lib_paths)
110
+ # include vendor libs without overriding user ones
111
+ existing_workspaces = set(x[1] for x in workspace_lib_paths)
112
+ vendor_path = Path("vendor")
113
+ user_token = config.get_user_token()
114
+ user_client = deepcopy(tb_client)
115
+
116
+ if user_token:
117
+ user_client.token = user_token
118
+
119
+ vendor_workspaces = []
120
+ user_workspaces = await user_client.user_workspaces()
121
+ if vendor_path.exists() and not is_vendor:
122
+ for x in vendor_path.iterdir():
123
+ if x.is_dir() and x.name not in existing_workspaces:
124
+ if user_token:
125
+ try:
126
+ ws_to_delete = next((ws for ws in user_workspaces["workspaces"] if ws["name"] == x.name), None)
127
+ if ws_to_delete:
128
+ await user_client.delete_workspace(ws_to_delete["id"], hard_delete_confirmation=x.name)
129
+ except Exception:
130
+ pass
131
+ vendor_ws = await user_client.create_workspace(x.name, template=None)
132
+ vendor_workspaces.append(vendor_ws)
133
+ workspace_lib_paths.append((x.name, x))
134
+
135
+ workspaces: List[Dict[str, Any]] = (await user_client.user_workspaces()).get("workspaces", [])
136
+ local_ws = next((ws for ws in workspaces if ws["name"] == TB_LOCAL_WORKSPACE_NAME), {})
137
+ current_ws: Dict[str, Any] = current_ws or local_ws
138
+ for vendor_ws in [ws for ws in workspaces if ws["name"] in [ws["name"] for ws in vendor_workspaces]]:
139
+ ws_client = deepcopy(tb_client)
140
+ ws_client.token = vendor_ws["token"]
141
+ shared_ws_path = Path(folder) / "vendor" / vendor_ws["name"]
142
+
143
+ if shared_ws_path.exists() and not is_vendor:
144
+ await folder_build(ws_client, folder=shared_ws_path.as_posix(), is_vendor=True, current_ws=vendor_ws)
145
+
146
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
147
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
148
+
149
+ existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
150
+ # replace workspace mapping names
151
+ for old_ws, new_ws in workspace_map.items():
152
+ existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
153
+
154
+ remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
155
+
156
+ # replace workspace mapping names
157
+ for old_ws, new_ws in workspace_map.items():
158
+ remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
159
+
160
+ if not filenames:
161
+ filenames = get_project_filenames(folder)
162
+
163
+ changed = None
164
+
165
+ # build graph to get new versions for all the files involved in the query
166
+ # dependencies need to be processed always to get the versions
167
+ dependencies_graph = await build_graph(
168
+ filenames,
169
+ tb_client,
170
+ dir_path=folder,
171
+ process_dependencies=True,
172
+ workspace_map=workspace_map,
173
+ skip_connectors=True,
174
+ workspace_lib_paths=workspace_lib_paths,
175
+ current_ws=current_ws,
176
+ changed=changed,
177
+ only_changes=only_changes,
178
+ fork_downstream=fork_downstream,
179
+ is_internal=is_internal,
180
+ build=build,
181
+ )
182
+
183
+ resource_versions = {}
184
+ latest_datasource_versions = {}
185
+
186
+ # If we have datasources using VERSION, let's try to get the latest version
187
+ dependencies_graph = await build_graph(
188
+ filenames,
189
+ tb_client,
190
+ dir_path=folder,
191
+ resource_versions=latest_datasource_versions,
192
+ workspace_map=workspace_map,
193
+ process_dependencies=push_deps,
194
+ verbose=verbose,
195
+ workspace_lib_paths=workspace_lib_paths,
196
+ current_ws=current_ws,
197
+ changed=changed,
198
+ only_changes=only_changes,
199
+ fork_downstream=fork_downstream,
200
+ is_internal=is_internal,
201
+ build=build,
202
+ )
203
+
204
+ if debug:
205
+ pp.pprint(dependencies_graph.to_run)
206
+
207
+ def should_push_file(
208
+ name: str,
209
+ remote_resource_names: List[str],
210
+ latest_datasource_versions: Dict[str, Any],
211
+ force: bool,
212
+ run_tests: bool,
213
+ ) -> bool:
214
+ """
215
+ Function to know if we need to run a file or not
216
+ """
217
+ if name not in remote_resource_names:
218
+ return True
219
+ # When we need to try to push a file when it doesn't exist and the version is different that the existing one
220
+ resource_full_name = (
221
+ f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
222
+ )
223
+ if resource_full_name not in existing_resources:
224
+ return True
225
+ if force or run_tests:
226
+ return True
227
+ return False
228
+
229
+ async def push(
230
+ name: str,
231
+ to_run: Dict[str, Dict[str, Any]],
232
+ resource_versions: Dict[str, Any],
233
+ latest_datasource_versions: Dict[str, Any],
234
+ dry_run: bool,
235
+ fork_downstream: Optional[bool] = False,
236
+ fork: Optional[bool] = False,
237
+ ):
238
+ if name in to_run:
239
+ resource = to_run[name]["resource"]
240
+ if not dry_run:
241
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
242
+ if name not in resource_versions:
243
+ version = ""
244
+ if name in latest_datasource_versions:
245
+ version = f"(v{latest_datasource_versions[name]})"
246
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
247
+ else:
248
+ click.echo(
249
+ FeedbackManager.info_processing_resource(
250
+ name=name,
251
+ version=latest_datasource_versions[name],
252
+ latest_version=resource_versions.get(name),
253
+ )
254
+ )
255
+ try:
256
+ await exec_file(
257
+ to_run[name],
258
+ tb_client,
259
+ force,
260
+ check,
261
+ debug and verbose,
262
+ populate,
263
+ populate_subset,
264
+ populate_condition,
265
+ unlink_on_populate_error,
266
+ wait,
267
+ user_token,
268
+ override_datasource,
269
+ ignore_sql_errors,
270
+ skip_confirmation,
271
+ only_response_times,
272
+ run_tests,
273
+ as_standard,
274
+ tests_to_run,
275
+ tests_relative_change,
276
+ tests_sample_by_params,
277
+ tests_filter_by,
278
+ tests_failfast,
279
+ tests_ignore_order,
280
+ tests_validate_processed_bytes,
281
+ tests_check_requests_from_branch,
282
+ current_ws,
283
+ local_ws,
284
+ fork_downstream,
285
+ fork,
286
+ git_release,
287
+ build,
288
+ is_vendor,
289
+ )
290
+ if not run_tests:
291
+ click.echo(
292
+ FeedbackManager.success_create(
293
+ name=(
294
+ name
295
+ if to_run[name]["version"] is None
296
+ else f'{name}__v{to_run[name]["version"]}'
297
+ )
298
+ )
299
+ )
300
+ except Exception as e:
301
+ filename = (
302
+ os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
303
+ )
304
+ exception = FeedbackManager.error_push_file_exception(
305
+ filename=filename,
306
+ error=e,
307
+ )
308
+ raise click.ClickException(exception)
309
+ else:
310
+ if raise_on_exists:
311
+ raise AlreadyExistsException(
312
+ FeedbackManager.warning_name_already_exists(
313
+ name=name if to_run[name]["version"] is None else f'{name}__v{to_run[name]["version"]}'
314
+ )
315
+ )
316
+ else:
317
+ if await name_matches_existing_resource(resource, name, tb_client):
318
+ if resource == "pipes":
319
+ click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
320
+ else:
321
+ click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
322
+ else:
323
+ click.echo(
324
+ FeedbackManager.warning_name_already_exists(
325
+ name=(
326
+ name
327
+ if to_run[name]["version"] is None
328
+ else f'{name}__v{to_run[name]["version"]}'
329
+ )
330
+ )
331
+ )
332
+ else:
333
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
334
+ if name not in resource_versions:
335
+ version = ""
336
+ if name in latest_datasource_versions:
337
+ version = f"(v{latest_datasource_versions[name]})"
338
+ if build:
339
+ extension = "pipe" if resource == "pipes" else "datasource"
340
+ click.echo(
341
+ FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=version)
342
+ )
343
+ else:
344
+ click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
345
+ else:
346
+ click.echo(
347
+ FeedbackManager.info_dry_processing_resource(
348
+ name=name,
349
+ version=latest_datasource_versions[name],
350
+ latest_version=resource_versions.get(name),
351
+ )
352
+ )
353
+ else:
354
+ if await name_matches_existing_resource(resource, name, tb_client):
355
+ if resource == "pipes":
356
+ click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
357
+ else:
358
+ click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
359
+ else:
360
+ click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
361
+
362
+ async def push_files(
363
+ dependency_graph: GraphDependencies,
364
+ dry_run: bool = False,
365
+ check_backfill_required: bool = False,
366
+ ):
367
+ endpoints_dep_map = dict()
368
+ processed = set()
369
+
370
+ dependencies_graph = dependency_graph.dep_map
371
+ resources_to_run = dependency_graph.to_run
372
+
373
+ if not fork_downstream:
374
+ # First, we will deploy the all the resources following the dependency graph except for the endpoints
375
+ groups = [group for group in toposort(dependencies_graph)]
376
+ for group in groups:
377
+ for name in group:
378
+ if name in processed:
379
+ continue
380
+
381
+ if is_endpoint_with_no_dependencies(
382
+ resources_to_run.get(name, {}),
383
+ dependencies_graph,
384
+ resources_to_run,
385
+ ):
386
+ endpoints_dep_map[name] = dependencies_graph[name]
387
+ continue
388
+
389
+ await push(
390
+ name,
391
+ resources_to_run,
392
+ resource_versions,
393
+ latest_datasource_versions,
394
+ dry_run,
395
+ fork_downstream,
396
+ fork,
397
+ )
398
+ processed.add(name)
399
+
400
+ # Then, we will deploy the endpoints that are on the dependency graph
401
+ groups = [group for group in toposort(endpoints_dep_map)]
402
+ for group in groups:
403
+ for name in group:
404
+ if name not in processed:
405
+ await push(
406
+ name,
407
+ resources_to_run,
408
+ resource_versions,
409
+ latest_datasource_versions,
410
+ dry_run,
411
+ fork_downstream,
412
+ fork,
413
+ )
414
+ processed.add(name)
415
+ else:
416
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
417
+ # If we have a graph like this:
418
+ # A -> B -> C
419
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
420
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
421
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
422
+ dependency_graph.all_dep_map,
423
+ dependency_graph.all_resources,
424
+ resources_to_run,
425
+ list(dependency_graph.dep_map.keys()),
426
+ )
427
+
428
+ # First, we will deploy the datasources that need to be deployed.
429
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
430
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
431
+
432
+ for group in groups:
433
+ for name in group:
434
+ try:
435
+ await tb_client.datasource_delete(name, force=True)
436
+ except Exception:
437
+ pass
438
+ try:
439
+ await tb_client.pipe_delete(name)
440
+ except Exception:
441
+ pass
442
+
443
+ groups.reverse()
444
+ for group in groups:
445
+ for name in group:
446
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
447
+ continue
448
+
449
+ # If the resource is new, we will use the normal resource information to deploy it
450
+ # This is mostly used for datasources with connections.
451
+ # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
452
+ # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
453
+ if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
454
+ await push(
455
+ name,
456
+ resources_to_run,
457
+ resource_versions,
458
+ latest_datasource_versions,
459
+ dry_run,
460
+ fork_downstream,
461
+ fork,
462
+ )
463
+ else:
464
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
465
+ kafka_connection_name = (
466
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
467
+ )
468
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
469
+ if release_created and (kafka_connection_name or service):
470
+ connector = "Kafka" if kafka_connection_name else service
471
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
472
+ raise click.ClickException(error_msg)
473
+
474
+ # If we are pushing a modified datasource, inform about the backfill``
475
+ if check_backfill_required and auto_promote and release_created:
476
+ error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
477
+ raise click.ClickException(error_msg)
478
+
479
+ await push(
480
+ name,
481
+ resources_to_run_fork_downstream,
482
+ resource_versions,
483
+ latest_datasource_versions,
484
+ dry_run,
485
+ fork_downstream,
486
+ fork,
487
+ )
488
+ processed.add(name)
489
+
490
+ # Now, we will create a map of all the endpoints and there dependencies
491
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
492
+ # But does not include the missing gaps
493
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
494
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
495
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
496
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
497
+ for group in groups:
498
+ for name in group:
499
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
500
+ continue
501
+
502
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
503
+
504
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
505
+ groups = [group for group in toposort(endpoints_dep_map)]
506
+
507
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
508
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
509
+ # So we need to reverse the groups
510
+ groups.reverse()
511
+ for group in groups:
512
+ for name in group:
513
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
514
+ continue
515
+
516
+ await push(
517
+ name,
518
+ resources_to_run_fork_downstream,
519
+ resource_versions,
520
+ latest_datasource_versions,
521
+ dry_run,
522
+ fork_downstream,
523
+ fork,
524
+ )
525
+ processed.add(name)
526
+
527
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
528
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
529
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
530
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
531
+ for group in groups:
532
+ for name in group:
533
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
534
+ continue
535
+
536
+ await push(
537
+ name,
538
+ resources_to_run_fork_downstream,
539
+ resource_versions,
540
+ latest_datasource_versions,
541
+ dry_run,
542
+ fork_downstream,
543
+ fork,
544
+ )
545
+ processed.add(name)
546
+
547
+ # Finally, we need to deploy the materialized views from right to left.
548
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
549
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
550
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
551
+ for group in groups:
552
+ for name in group:
553
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
554
+ continue
555
+
556
+ await push(
557
+ name,
558
+ resources_to_run_fork_downstream,
559
+ resource_versions,
560
+ latest_datasource_versions,
561
+ dry_run,
562
+ fork_downstream,
563
+ fork,
564
+ )
565
+ processed.add(name)
566
+
567
+ await push_files(dependencies_graph, dry_run)
568
+
569
+ if not dry_run and not run_tests:
570
+ if upload_fixtures:
571
+ click.echo(FeedbackManager.info_pushing_fixtures())
572
+
573
+ processed = set()
574
+ for group in toposort(dependencies_graph.dep_map):
575
+ for f in group:
576
+ name = os.path.basename(f)
577
+ if name not in processed and name in dependencies_graph.to_run:
578
+ await check_fixtures_data(
579
+ tb_client,
580
+ dependencies_graph.to_run[name],
581
+ debug,
582
+ folder,
583
+ force,
584
+ mode="replace",
585
+ )
586
+ processed.add(name)
587
+ for f in dependencies_graph.to_run:
588
+ if f not in processed:
589
+ await check_fixtures_data(
590
+ tb_client,
591
+ dependencies_graph.to_run[f],
592
+ debug,
593
+ folder,
594
+ force,
595
+ mode="replace",
596
+ )
597
+ else:
598
+ if verbose:
599
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
600
+
601
+ return dependencies_graph.to_run
602
+
603
+
604
+ async def check_fixtures_data(
605
+ client: TinyB, resource: Dict[str, Any], debug: bool, folder: str = "", force: bool = False, mode: str = "replace"
606
+ ):
607
+ if debug:
608
+ click.echo(FeedbackManager.info_checking_file(file=pp.pformat(resource)))
609
+ if resource["resource"] in ["pipes", "tokens"]:
610
+ pass
611
+ elif resource["resource"] == "datasources":
612
+ datasource_name = resource["params"]["name"]
613
+ name = os.path.basename(resource["filename"]).rsplit(".", 1)[0]
614
+ fixture_path = Path(folder) / "fixtures" / f"{name}.csv"
615
+
616
+ if not fixture_path.exists():
617
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.csv"
618
+ if not fixture_path.exists():
619
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.ndjson"
620
+ if not fixture_path.exists():
621
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.parquet"
622
+ if fixture_path.exists():
623
+ # Let's validate only when when we are going to replace the actual data
624
+ result = await client.query(sql=f"SELECT count() as c FROM {datasource_name} FORMAT JSON")
625
+ count = result["data"][0]["c"]
626
+
627
+ if count > 0 and not force:
628
+ raise click.ClickException(
629
+ FeedbackManager.error_push_fixture_will_replace_data(datasource=datasource_name)
630
+ )
631
+
632
+ click.echo(
633
+ FeedbackManager.info_checking_file_size(
634
+ filename=resource["filename"], size=sizeof_fmt(os.stat(fixture_path).st_size)
635
+ )
636
+ )
637
+ sys.stdout.flush()
638
+ try:
639
+ await client.datasource_append_data(
640
+ datasource_name=resource["params"]["name"],
641
+ file=fixture_path,
642
+ mode=mode,
643
+ format=fixture_path.suffix[1:],
644
+ )
645
+ click.echo(FeedbackManager.success_processing_data())
646
+ except Exception as e:
647
+ raise click.ClickException(FeedbackManager.error_processing_blocks(error=e))
648
+
649
+ else:
650
+ click.echo(FeedbackManager.warning_fixture_not_found(datasource_name=name))
651
+ else:
652
+ raise click.ClickException(FeedbackManager.error_unknown_resource(resource=resource["resource"]))
653
+
654
+
655
+ def is_new(
656
+ name: str,
657
+ changed: Dict[str, str],
658
+ normal_dependency: Dict[str, Set[str]],
659
+ fork_downstream_dependency: Dict[str, Set[str]],
660
+ ) -> bool:
661
+ def is_git_new(name: str):
662
+ return changed and changed.get(name) == "A"
663
+
664
+ if not is_git_new(name):
665
+ return False
666
+
667
+ # if should not depend on a changed resource
668
+ if back_deps := normal_dependency.get(name):
669
+ for dep in back_deps:
670
+ if dep in fork_downstream_dependency and not is_git_new(dep):
671
+ return False
672
+
673
+ return True
674
+
675
+
676
+ async def name_matches_existing_resource(resource: str, name: str, tb_client: TinyB):
677
+ if resource == "datasources":
678
+ current_pipes: List[Dict[str, Any]] = await tb_client.pipes()
679
+ if name in [x["name"] for x in current_pipes]:
680
+ return True
681
+ else:
682
+ current_datasources: List[Dict[str, Any]] = await tb_client.datasources()
683
+ if name in [x["name"] for x in current_datasources]:
684
+ return True
685
+ return False
686
+
687
+
688
+ async def exec_file(
689
+ r: Dict[str, Any],
690
+ tb_client: TinyB,
691
+ force: bool,
692
+ check: bool,
693
+ debug: bool,
694
+ populate: bool,
695
+ populate_subset,
696
+ populate_condition,
697
+ unlink_on_populate_error,
698
+ wait_populate,
699
+ user_token: Optional[str],
700
+ override_datasource: bool = False,
701
+ ignore_sql_errors: bool = False,
702
+ skip_confirmation: bool = False,
703
+ only_response_times: bool = False,
704
+ run_tests=False,
705
+ as_standard=False,
706
+ tests_to_run: int = 0,
707
+ tests_relative_change: float = 0.01,
708
+ tests_to_sample_by_params: int = 0,
709
+ tests_filter_by: Optional[List[str]] = None,
710
+ tests_failfast: bool = False,
711
+ tests_ignore_order: bool = False,
712
+ tests_validate_processed_bytes: bool = False,
713
+ tests_check_requests_from_branch: bool = False,
714
+ current_ws: Optional[Dict[str, Any]] = None,
715
+ local_ws: Optional[Dict[str, Any]] = None,
716
+ fork_downstream: Optional[bool] = False,
717
+ fork: Optional[bool] = False,
718
+ git_release: Optional[bool] = False,
719
+ build: Optional[bool] = False,
720
+ is_vendor: Optional[bool] = False,
721
+ ):
722
+ if debug:
723
+ click.echo(FeedbackManager.debug_running_file(file=pp.pformat(r)))
724
+ if r["resource"] == "pipes":
725
+ await new_pipe(
726
+ r,
727
+ tb_client,
728
+ force,
729
+ check,
730
+ populate,
731
+ populate_subset,
732
+ populate_condition,
733
+ unlink_on_populate_error,
734
+ wait_populate,
735
+ ignore_sql_errors=ignore_sql_errors,
736
+ only_response_times=only_response_times,
737
+ run_tests=run_tests,
738
+ as_standard=as_standard,
739
+ tests_to_run=tests_to_run,
740
+ tests_relative_change=tests_relative_change,
741
+ tests_to_sample_by_params=tests_to_sample_by_params,
742
+ tests_filter_by=tests_filter_by,
743
+ tests_failfast=tests_failfast,
744
+ tests_ignore_order=tests_ignore_order,
745
+ tests_validate_processed_bytes=tests_validate_processed_bytes,
746
+ override_datasource=override_datasource,
747
+ tests_check_requests_from_branch=tests_check_requests_from_branch,
748
+ fork_downstream=fork_downstream,
749
+ fork=fork,
750
+ )
751
+ await update_tags_in_resource(r, "pipe", tb_client)
752
+ elif r["resource"] == "datasources":
753
+ await new_ds(
754
+ r,
755
+ tb_client,
756
+ user_token,
757
+ force,
758
+ skip_confirmation=skip_confirmation,
759
+ current_ws=current_ws,
760
+ local_ws=local_ws,
761
+ fork_downstream=fork_downstream,
762
+ fork=fork,
763
+ build=build,
764
+ is_vendor=is_vendor,
765
+ )
766
+ await update_tags_in_resource(r, "datasource", tb_client)
767
+ else:
768
+ raise click.ClickException(FeedbackManager.error_unknown_resource(resource=r["resource"]))
769
+
770
+
771
+ def get_remote_resource_name_without_version(remote_resource_name: str) -> str:
772
+ """
773
+ >>> get_remote_resource_name_without_version("r__datasource")
774
+ 'r__datasource'
775
+ >>> get_remote_resource_name_without_version("r__datasource__v0")
776
+ 'r__datasource'
777
+ >>> get_remote_resource_name_without_version("datasource")
778
+ 'datasource'
779
+ """
780
+ parts = get_name_version(remote_resource_name)
781
+ return parts["name"]
782
+
783
+
784
+ def create_downstream_dependency_graph(dependency_graph: Dict[str, Set[str]], all_resources: Dict[str, Dict[str, Any]]):
785
+ """
786
+ This function reverses the dependency graph obtained from build_graph so you have downstream dependencies for each node in the graph.
787
+
788
+ Additionally takes into account target_datasource of materialized views
789
+ """
790
+ downstream_dependency_graph: Dict[str, Set[str]] = {node: set() for node in dependency_graph}
791
+
792
+ for node, dependencies in dependency_graph.items():
793
+ for dependency in dependencies:
794
+ if dependency not in downstream_dependency_graph:
795
+ # a shared data source, we can skip it
796
+ continue
797
+ downstream_dependency_graph[dependency].add(node)
798
+
799
+ for key in dict(downstream_dependency_graph):
800
+ target_datasource = get_target_materialized_data_source_name(all_resources[key])
801
+ if target_datasource:
802
+ downstream_dependency_graph[key].update({target_datasource})
803
+ try:
804
+ downstream_dependency_graph[target_datasource].remove(key)
805
+ except KeyError:
806
+ pass
807
+
808
+ return downstream_dependency_graph
809
+
810
+
811
+ def update_dep_map_recursively(
812
+ dep_map: Dict[str, Set[str]],
813
+ downstream_dep_map: Dict[str, Set[str]],
814
+ all_resources: Dict[str, Dict[str, Any]],
815
+ to_run: Dict[str, Dict[str, Any]],
816
+ dep_map_keys: List[str],
817
+ key: Optional[str] = None,
818
+ visited: Optional[List[str]] = None,
819
+ ):
820
+ """
821
+ Given a downstream_dep_map obtained from create_downstream_dependency_graph this function updates each node recursively to complete the downstream dependency graph for each node
822
+ """
823
+ if not visited:
824
+ visited = list()
825
+ if not key and len(dep_map_keys) == 0:
826
+ return
827
+ if not key:
828
+ key = dep_map_keys.pop()
829
+ if key not in dep_map:
830
+ dep_map[key] = set()
831
+ else:
832
+ visited.append(key)
833
+ return
834
+
835
+ for dep in downstream_dep_map.get(key, {}):
836
+ if dep not in downstream_dep_map:
837
+ continue
838
+ to_run[dep] = all_resources.get(dep, {})
839
+ update_dep_map_recursively(
840
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=dep, visited=visited
841
+ )
842
+ dep_map[key].update(downstream_dep_map[dep])
843
+ dep_map[key].update({dep})
844
+ try:
845
+ dep_map[key].remove(key)
846
+ except KeyError:
847
+ pass
848
+
849
+ to_run[key] = all_resources.get(key, {})
850
+ update_dep_map_recursively(
851
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=None, visited=visited
852
+ )
853
+
854
+
855
+ def generate_forkdownstream_graph(
856
+ all_dep_map: Dict[str, Set[str]],
857
+ all_resources: Dict[str, Dict[str, Any]],
858
+ to_run: Dict[str, Dict[str, Any]],
859
+ dep_map_keys: List[str],
860
+ ) -> Tuple[Dict[str, Set[str]], Dict[str, Dict[str, Any]]]:
861
+ """
862
+ This function for a given graph of dependencies from left to right. It will generate a new graph with the dependencies from right to left, but taking into account that even if some nodes are not inside to_run, they are still dependencies that need to be deployed.
863
+
864
+ >>> deps, _ = generate_forkdownstream_graph(
865
+ ... {
866
+ ... 'a': {'b'},
867
+ ... 'b': {'c'},
868
+ ... 'c': set(),
869
+ ... },
870
+ ... {
871
+ ... 'a': {'resource_name': 'a'},
872
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
873
+ ... 'c': {'resource_name': 'c'},
874
+ ... },
875
+ ... {
876
+ ... 'a': {'resource_name': 'a'},
877
+ ... },
878
+ ... ['a', 'b', 'c'],
879
+ ... )
880
+ >>> {k: sorted(v) for k, v in deps.items()}
881
+ {'c': [], 'b': ['a', 'c'], 'a': []}
882
+
883
+ >>> deps, _ = generate_forkdownstream_graph(
884
+ ... {
885
+ ... 'a': {'b'},
886
+ ... 'b': {'c'},
887
+ ... 'c': set(),
888
+ ... },
889
+ ... {
890
+ ... 'a': {'resource_name': 'a'},
891
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
892
+ ... 'c': {'resource_name': 'c'},
893
+ ... },
894
+ ... {
895
+ ... 'b': {'resource_name': 'b'},
896
+ ... },
897
+ ... ['a', 'b', 'c'],
898
+ ... )
899
+ >>> {k: sorted(v) for k, v in deps.items()}
900
+ {'c': [], 'b': ['a', 'c'], 'a': []}
901
+
902
+ >>> deps, _ = generate_forkdownstream_graph(
903
+ ... {
904
+ ... 'migrated__a': {'a'},
905
+ ... 'a': {'b'},
906
+ ... 'b': {'c'},
907
+ ... 'c': set(),
908
+ ... },
909
+ ... {
910
+ ... 'migrated__a': {'resource_name': 'migrated__a', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'a'}}]},
911
+ ... 'a': {'resource_name': 'a'},
912
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
913
+ ... 'c': {'resource_name': 'c'},
914
+ ... },
915
+ ... {
916
+ ... 'migrated__a': {'resource_name': 'migrated__a'},
917
+ ... 'a': {'resource_name': 'a'},
918
+ ... },
919
+ ... ['migrated_a', 'a', 'b', 'c'],
920
+ ... )
921
+ >>> {k: sorted(v) for k, v in deps.items()}
922
+ {'c': [], 'b': ['a', 'c'], 'a': [], 'migrated_a': []}
923
+ """
924
+ downstream_dep_map = create_downstream_dependency_graph(all_dep_map, all_resources)
925
+ new_dep_map: Dict[str, Set[str]] = {}
926
+ new_to_run = deepcopy(to_run)
927
+ update_dep_map_recursively(new_dep_map, downstream_dep_map, all_resources, new_to_run, dep_map_keys)
928
+ return new_dep_map, new_to_run
929
+
930
+
931
+ @dataclass
932
+ class GraphDependencies:
933
+ """
934
+ This class is used to store the dependencies graph and the resources that are going to be deployed
935
+ """
936
+
937
+ dep_map: Dict[str, Set[str]]
938
+ to_run: Dict[str, Dict[str, Any]]
939
+
940
+ # The same as above but for the whole project, not just the resources affected by the current deployment
941
+ all_dep_map: Dict[str, Set[str]]
942
+ all_resources: Dict[str, Dict[str, Any]]
943
+
944
+
945
+ async def build_graph(
946
+ filenames: Iterable[str],
947
+ tb_client: TinyB,
948
+ dir_path: Optional[str] = None,
949
+ resource_versions=None,
950
+ workspace_map: Optional[Dict] = None,
951
+ process_dependencies: bool = False,
952
+ verbose: bool = False,
953
+ skip_connectors: bool = False,
954
+ workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
955
+ current_ws: Optional[Dict[str, Any]] = None,
956
+ changed: Optional[Dict[str, Any]] = None,
957
+ only_changes: bool = False,
958
+ fork_downstream: Optional[bool] = False,
959
+ is_internal: Optional[bool] = False,
960
+ build: Optional[bool] = False,
961
+ ) -> GraphDependencies:
962
+ """
963
+ This method will generate a dependency graph for the given files. It will also return a map of all the resources that are going to be deployed.
964
+ By default it will generate the graph from left to right, but if fork-downstream, it will generate the graph from right to left.
965
+ """
966
+ to_run: Dict[str, Any] = {}
967
+ deps: List[str] = []
968
+ dep_map: Dict[str, Any] = {}
969
+ embedded_datasources = {}
970
+ if not workspace_map:
971
+ workspace_map = {}
972
+
973
+ # These dictionaries are used to store all the resources and there dependencies for the whole project
974
+ # This is used for the downstream dependency graph
975
+ all_dep_map: Dict[str, Set[str]] = {}
976
+ all_resources: Dict[str, Dict[str, Any]] = {}
977
+
978
+ if dir_path is None:
979
+ dir_path = os.getcwd()
980
+
981
+ # When using fork-downstream or --only-changes, we need to generate all the graph of all the resources and their dependencies
982
+ # This way we can add more resources into the to_run dictionary if needed.
983
+ if process_dependencies and only_changes:
984
+ all_dependencies_graph = await build_graph(
985
+ get_project_filenames(dir_path),
986
+ tb_client,
987
+ dir_path=dir_path,
988
+ process_dependencies=True,
989
+ resource_versions=resource_versions,
990
+ workspace_map=workspace_map,
991
+ skip_connectors=True,
992
+ workspace_lib_paths=workspace_lib_paths,
993
+ current_ws=current_ws,
994
+ changed=None,
995
+ only_changes=False,
996
+ is_internal=is_internal,
997
+ build=build,
998
+ )
999
+ all_dep_map = all_dependencies_graph.dep_map
1000
+ all_resources = all_dependencies_graph.to_run
1001
+
1002
+ async def process(
1003
+ filename: str,
1004
+ deps: List[str],
1005
+ dep_map: Dict[str, Any],
1006
+ to_run: Dict[str, Any],
1007
+ workspace_lib_paths: Optional[List[Tuple[str, str]]],
1008
+ ):
1009
+ name, kind = filename.rsplit(".", 1)
1010
+ warnings = []
1011
+
1012
+ try:
1013
+ res = await process_file(
1014
+ filename,
1015
+ tb_client,
1016
+ resource_versions=resource_versions,
1017
+ skip_connectors=skip_connectors,
1018
+ workspace_map=workspace_map,
1019
+ workspace_lib_paths=workspace_lib_paths,
1020
+ current_ws=current_ws,
1021
+ )
1022
+ except click.ClickException as e:
1023
+ raise e
1024
+ except IncludeFileNotFoundException as e:
1025
+ raise click.ClickException(FeedbackManager.error_deleted_include(include_file=str(e), filename=filename))
1026
+ except Exception as e:
1027
+ raise click.ClickException(str(e))
1028
+
1029
+ for r in res:
1030
+ fn = r["resource_name"]
1031
+ warnings = r.get("warnings", [])
1032
+ if changed and fn in changed and (not changed[fn] or changed[fn] in ["shared", "remote"]):
1033
+ continue
1034
+
1035
+ if (
1036
+ fork_downstream
1037
+ and r.get("resource", "") == "pipes"
1038
+ and any(["engine" in x.get("params", {}) for x in r.get("nodes", [])])
1039
+ ):
1040
+ raise click.ClickException(FeedbackManager.error_forkdownstream_pipes_with_engine(pipe=fn))
1041
+
1042
+ to_run[fn] = r
1043
+ file_deps = r.get("deps", [])
1044
+ deps += file_deps
1045
+ # calculate and look for deps
1046
+ dep_list = []
1047
+ for x in file_deps:
1048
+ if x not in INTERNAL_TABLES or is_internal:
1049
+ f, ds = find_file_by_name(dir_path, x, verbose, workspace_lib_paths=workspace_lib_paths, resource=r)
1050
+ if f:
1051
+ dep_list.append(f.rsplit(".", 1)[0])
1052
+ if ds:
1053
+ ds_fn = ds["resource_name"]
1054
+ prev = to_run.get(ds_fn, {})
1055
+ to_run[ds_fn] = deepcopy(r)
1056
+ try:
1057
+ to_run[ds_fn]["deps"] = list(
1058
+ set(to_run[ds_fn].get("deps", []) + prev.get("deps", []) + [fn])
1059
+ )
1060
+ except ValueError:
1061
+ pass
1062
+ embedded_datasources[x] = to_run[ds_fn]
1063
+ else:
1064
+ e_ds = embedded_datasources.get(x, None)
1065
+ if e_ds:
1066
+ dep_list.append(e_ds["resource_name"])
1067
+
1068
+ # In case the datasource is to be shared and we have mapping, let's replace the name
1069
+ if "shared_with" in r and workspace_map:
1070
+ mapped_workspaces: List[str] = []
1071
+ for shared_with in r["shared_with"]:
1072
+ mapped_workspaces.append(
1073
+ workspace_map.get(shared_with)
1074
+ if workspace_map.get(shared_with, None) is not None
1075
+ else shared_with # type: ignore
1076
+ )
1077
+ r["shared_with"] = mapped_workspaces
1078
+
1079
+ dep_map[fn] = set(dep_list)
1080
+ return os.path.basename(name), warnings
1081
+
1082
+ processed = set()
1083
+
1084
+ async def get_processed(filenames: Iterable[str]):
1085
+ for filename in filenames:
1086
+ # just process changed filenames (tb deploy and --only-changes)
1087
+ if changed:
1088
+ resource = Path(filename).resolve().stem
1089
+ if resource in changed and (not changed[resource] or changed[resource] in ["shared", "remote"]):
1090
+ continue
1091
+ if os.path.isdir(filename):
1092
+ await get_processed(filenames=get_project_filenames(filename))
1093
+ else:
1094
+ if verbose:
1095
+ click.echo(FeedbackManager.info_processing_file(filename=filename))
1096
+
1097
+ if ".incl" in filename:
1098
+ click.echo(FeedbackManager.warning_skipping_include_file(file=filename))
1099
+
1100
+ name, warnings = await process(filename, deps, dep_map, to_run, workspace_lib_paths)
1101
+ processed.add(name)
1102
+
1103
+ if verbose:
1104
+ if len(warnings) == 1:
1105
+ click.echo(FeedbackManager.warning_pipe_restricted_param(word=warnings[0]))
1106
+ elif len(warnings) > 1:
1107
+ click.echo(
1108
+ FeedbackManager.warning_pipe_restricted_params(
1109
+ words=", ".join(["'{}'".format(param) for param in warnings[:-1]]),
1110
+ last_word=warnings[-1],
1111
+ )
1112
+ )
1113
+
1114
+ await get_processed(filenames=filenames)
1115
+
1116
+ if process_dependencies:
1117
+ if only_changes:
1118
+ for key in dict(to_run):
1119
+ # look for deps that are the target data source of a materialized node
1120
+ target_datasource = get_target_materialized_data_source_name(to_run[key])
1121
+ if target_datasource:
1122
+ # look in all_dep_map items that have as a dependency the target data source and are an endpoint
1123
+ for _key, _deps in all_dep_map.items():
1124
+ for dep in _deps:
1125
+ if (
1126
+ dep == target_datasource
1127
+ or (dep == key and target_datasource not in all_dep_map.get(key, []))
1128
+ ) and is_endpoint_with_no_dependencies(
1129
+ all_resources.get(_key, {}), all_dep_map, all_resources
1130
+ ):
1131
+ dep_map[_key] = _deps
1132
+ to_run[_key] = all_resources.get(_key)
1133
+ else:
1134
+ while len(deps) > 0:
1135
+ dep = deps.pop()
1136
+ if dep not in processed:
1137
+ processed.add(dep)
1138
+ f = full_path_by_name(dir_path, dep, workspace_lib_paths)
1139
+ if f:
1140
+ if verbose:
1141
+ try:
1142
+ processed_filename = f.relative_to(os.getcwd())
1143
+ except ValueError:
1144
+ processed_filename = f
1145
+ # This is to avoid processing shared data sources
1146
+ if "vendor/" in str(processed_filename):
1147
+ click.echo(FeedbackManager.info_skipping_resource(resource=processed_filename))
1148
+ continue
1149
+ click.echo(FeedbackManager.info_processing_file(filename=processed_filename))
1150
+ await process(str(f), deps, dep_map, to_run, workspace_lib_paths)
1151
+
1152
+ return GraphDependencies(dep_map, to_run, all_dep_map, all_resources)
1153
+
1154
+
1155
+ async def process_file(
1156
+ filename: str,
1157
+ tb_client: TinyB,
1158
+ resource_versions: Optional[Dict] = None,
1159
+ skip_connectors: bool = False,
1160
+ workspace_map: Optional[Dict] = None,
1161
+ workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
1162
+ current_ws: Optional[Dict[str, Any]] = None,
1163
+ ):
1164
+ if workspace_map is None:
1165
+ workspace_map = {}
1166
+
1167
+ if resource_versions is None:
1168
+ resource_versions = {}
1169
+ resource_versions_string = {k: f"__v{v}" for k, v in resource_versions.items() if v >= 0}
1170
+
1171
+ def get_engine_params(node: Dict[str, Any]) -> Dict[str, Any]:
1172
+ params = {}
1173
+
1174
+ if "engine" in node:
1175
+ engine = node["engine"]["type"]
1176
+ params["engine"] = engine
1177
+ args = node["engine"]["args"]
1178
+ for k, v in args:
1179
+ params[f"engine_{k}"] = v
1180
+ return params
1181
+
1182
+ async def get_kafka_params(node: Dict[str, Any]):
1183
+ params = {key: value for key, value in node.items() if key.startswith("kafka")}
1184
+
1185
+ if not skip_connectors:
1186
+ try:
1187
+ connector_params = {
1188
+ "kafka_bootstrap_servers": params.get("kafka_bootstrap_servers", None),
1189
+ "kafka_key": params.get("kafka_key", None),
1190
+ "kafka_secret": params.get("kafka_secret", None),
1191
+ "kafka_connection_name": params.get("kafka_connection_name", None),
1192
+ "kafka_auto_offset_reset": params.get("kafka_auto_offset_reset", None),
1193
+ "kafka_schema_registry_url": params.get("kafka_schema_registry_url", None),
1194
+ "kafka_ssl_ca_pem": get_ca_pem_content(params.get("kafka_ssl_ca_pem", None), filename),
1195
+ "kafka_sasl_mechanism": params.get("kafka_sasl_mechanism", None),
1196
+ }
1197
+
1198
+ connector = await tb_client.get_connection(**connector_params)
1199
+ if not connector:
1200
+ click.echo(
1201
+ FeedbackManager.info_creating_kafka_connection(connection_name=params["kafka_connection_name"])
1202
+ )
1203
+ required_params = [
1204
+ connector_params["kafka_bootstrap_servers"],
1205
+ connector_params["kafka_key"],
1206
+ connector_params["kafka_secret"],
1207
+ ]
1208
+
1209
+ if not all(required_params):
1210
+ raise click.ClickException(FeedbackManager.error_unknown_kafka_connection(datasource=name))
1211
+
1212
+ connector = await tb_client.connection_create_kafka(**connector_params)
1213
+ except Exception as e:
1214
+ raise click.ClickException(
1215
+ FeedbackManager.error_connection_create(
1216
+ connection_name=params["kafka_connection_name"], error=str(e)
1217
+ )
1218
+ )
1219
+
1220
+ click.echo(FeedbackManager.success_connection_using(connection_name=connector["name"]))
1221
+
1222
+ params.update(
1223
+ {
1224
+ "connector": connector["id"],
1225
+ "service": "kafka",
1226
+ }
1227
+ )
1228
+
1229
+ return params
1230
+
1231
+ async def get_import_params(datasource: Dict[str, Any], node: Dict[str, Any]) -> Dict[str, Any]:
1232
+ params: Dict[str, Any] = {key: value for key, value in node.items() if key.startswith("import_")}
1233
+
1234
+ if len(params) == 0 or skip_connectors:
1235
+ return params
1236
+
1237
+ service: Optional[str] = node.get("import_service", None)
1238
+
1239
+ if service and service.lower() == "bigquery":
1240
+ if not await tb_client.check_gcp_read_permissions():
1241
+ raise click.ClickException(FeedbackManager.error_unknown_bq_connection(datasource=datasource["name"]))
1242
+
1243
+ # Bigquery doesn't have a datalink, so we can stop here
1244
+ return params
1245
+
1246
+ # Rest of connectors
1247
+
1248
+ connector_id: Optional[str] = node.get("import_connector", None)
1249
+ connector_name: Optional[str] = node.get("import_connection_name", None)
1250
+ if not connector_name and not connector_id:
1251
+ raise click.ClickException(FeedbackManager.error_missing_connection_name(datasource=datasource["name"]))
1252
+
1253
+ if not connector_id:
1254
+ assert isinstance(connector_name, str)
1255
+
1256
+ connector: Optional[Dict[str, Any]] = await tb_client.get_connector(connector_name, service)
1257
+
1258
+ if not connector:
1259
+ raise Exception(
1260
+ FeedbackManager.error_unknown_connection(datasource=datasource["name"], connection=connector_name)
1261
+ )
1262
+ connector_id = connector["id"]
1263
+ service = connector["service"]
1264
+
1265
+ # The API needs the connector ID to create the datasource.
1266
+ params["import_connector"] = connector_id
1267
+ if service:
1268
+ params["import_service"] = service
1269
+
1270
+ if import_from_timestamp := params.get("import_from_timestamp", None):
1271
+ try:
1272
+ str(datetime.fromisoformat(import_from_timestamp).isoformat())
1273
+ except ValueError:
1274
+ raise click.ClickException(
1275
+ FeedbackManager.error_invalid_import_from_timestamp(datasource=datasource["name"])
1276
+ )
1277
+
1278
+ if service in PREVIEW_CONNECTOR_SERVICES:
1279
+ if not params.get("import_bucket_uri", None):
1280
+ raise click.ClickException(FeedbackManager.error_missing_bucket_uri(datasource=datasource["name"]))
1281
+ elif service == "dynamodb":
1282
+ if not params.get("import_table_arn", None):
1283
+ raise click.ClickException(FeedbackManager.error_missing_table_arn(datasource=datasource["name"]))
1284
+ if not params.get("import_export_bucket", None):
1285
+ raise click.ClickException(FeedbackManager.error_missing_export_bucket(datasource=datasource["name"]))
1286
+ else:
1287
+ if not params.get("import_external_datasource", None):
1288
+ raise click.ClickException(
1289
+ FeedbackManager.error_missing_external_datasource(datasource=datasource["name"])
1290
+ )
1291
+
1292
+ return params
1293
+
1294
+ if DataFileExtensions.DATASOURCE in filename:
1295
+ doc = parse_datasource(filename)
1296
+ node = doc.nodes[0]
1297
+ deps: List[str] = []
1298
+ # reemplace tables on materialized columns
1299
+ columns = parse_table_structure(node["schema"])
1300
+
1301
+ _format = "csv"
1302
+ for x in columns:
1303
+ if x["default_value"] and x["default_value"].lower().startswith("materialized"):
1304
+ # turn expression to a select query to sql_get_used_tables can get the used tables
1305
+ q = "select " + x["default_value"][len("materialized") :]
1306
+ tables = await tb_client.sql_get_used_tables(q)
1307
+ # materialized columns expressions could have joins so we need to add them as a dep
1308
+ deps += tables
1309
+ # generate replacements and replace the query
1310
+ replacements = {t: t + resource_versions_string.get(t, "") for t in tables}
1311
+
1312
+ replaced_results = await tb_client.replace_tables(q, replacements)
1313
+ x["default_value"] = replaced_results.replace("SELECT", "materialized", 1)
1314
+ if x.get("jsonpath", None):
1315
+ _format = "ndjson"
1316
+
1317
+ schema = ",".join(schema_to_sql_columns(columns))
1318
+
1319
+ name = os.path.basename(filename).rsplit(".", 1)[0]
1320
+
1321
+ if workspace_lib_paths:
1322
+ for wk_name, wk_path in workspace_lib_paths:
1323
+ try:
1324
+ Path(filename).relative_to(wk_path)
1325
+ name = f"{workspace_map.get(wk_name, wk_name)}.{name}"
1326
+ except ValueError:
1327
+ # the path was not relative, not inside workspace
1328
+ pass
1329
+
1330
+ version = f"__v{doc.version}" if doc.version is not None else ""
1331
+
1332
+ def append_version_to_name(name: str, version: str) -> str:
1333
+ if version != "":
1334
+ name = name.replace(".", "_")
1335
+ return name + version
1336
+ return name
1337
+
1338
+ description = node.get("description", "")
1339
+ indexes_list = node.get("indexes", [])
1340
+ indexes = None
1341
+ if indexes_list:
1342
+ indexes = "\n".join([index.to_sql() for index in indexes_list])
1343
+ params = {
1344
+ "name": append_version_to_name(name, version),
1345
+ "description": description,
1346
+ "schema": schema,
1347
+ "indexes": indexes,
1348
+ "indexes_list": indexes_list,
1349
+ "format": _format,
1350
+ }
1351
+
1352
+ params.update(get_engine_params(node))
1353
+
1354
+ if "import_service" in node or "import_connection_name" in node:
1355
+ VALID_SERVICES: Tuple[str, ...] = ("bigquery", "snowflake", "s3", "s3_iamrole", "gcs", "dynamodb")
1356
+
1357
+ import_params = await get_import_params(params, node)
1358
+
1359
+ service = import_params.get("import_service", None)
1360
+ if service and service not in VALID_SERVICES:
1361
+ raise Exception(f"Unknown import service: {service}")
1362
+
1363
+ if service in PREVIEW_CONNECTOR_SERVICES:
1364
+ ON_DEMAND_CRON = ON_DEMAND
1365
+ AUTO_CRON = "@auto"
1366
+ ON_DEMAND_CRON_EXPECTED_BY_THE_API = "@once"
1367
+ VALID_CRONS: Tuple[str, ...] = (ON_DEMAND_CRON, AUTO_CRON)
1368
+ cron = node.get("import_schedule", ON_DEMAND_CRON)
1369
+
1370
+ if cron not in VALID_CRONS:
1371
+ valid_values = ", ".join(VALID_CRONS)
1372
+ raise Exception(f"Invalid import schedule: '{cron}'. Valid values are: {valid_values}")
1373
+
1374
+ if cron == ON_DEMAND_CRON:
1375
+ import_params["import_schedule"] = ON_DEMAND_CRON_EXPECTED_BY_THE_API
1376
+ if cron == AUTO_CRON:
1377
+ period: int = DEFAULT_CRON_PERIOD
1378
+
1379
+ if current_ws:
1380
+ workspaces = (await tb_client.user_workspaces()).get("workspaces", [])
1381
+ workspace_rate_limits: Dict[str, Dict[str, int]] = next(
1382
+ (w.get("rate_limits", {}) for w in workspaces if w["id"] == current_ws["id"]), {}
1383
+ )
1384
+ period = workspace_rate_limits.get("api_datasources_create_append_replace", {}).get(
1385
+ "period", DEFAULT_CRON_PERIOD
1386
+ )
1387
+
1388
+ def seconds_to_cron_expression(seconds: int) -> str:
1389
+ minutes = seconds // 60
1390
+ hours = minutes // 60
1391
+ days = hours // 24
1392
+ if days > 0:
1393
+ return f"0 0 */{days} * *"
1394
+ if hours > 0:
1395
+ return f"0 */{hours} * * *"
1396
+ if minutes > 0:
1397
+ return f"*/{minutes} * * * *"
1398
+ return f"*/{seconds} * * * *"
1399
+
1400
+ import_params["import_schedule"] = seconds_to_cron_expression(period)
1401
+
1402
+ # Include all import_ parameters in the datasource params
1403
+ params.update(import_params)
1404
+
1405
+ # Substitute the import parameters with the ones used by the
1406
+ # import API:
1407
+ # - If an import parameter is not present and there's a default
1408
+ # value, use the default value.
1409
+ # - If the resulting value is None, do not add the parameter.
1410
+ #
1411
+ # Note: any unknown import_ parameter is leaved as is.
1412
+ for key in ImportReplacements.get_datafile_parameter_keys():
1413
+ replacement, default_value = ImportReplacements.get_api_param_for_datafile_param(service, key)
1414
+ if not replacement:
1415
+ continue # We should not reach this never, but just in case...
1416
+
1417
+ value: Any
1418
+ try:
1419
+ value = params[key]
1420
+ del params[key]
1421
+ except KeyError:
1422
+ value = default_value
1423
+
1424
+ if value:
1425
+ params[replacement] = value
1426
+
1427
+ if "kafka_connection_name" in node:
1428
+ kafka_params = await get_kafka_params(node)
1429
+ params.update(kafka_params)
1430
+ del params["format"]
1431
+
1432
+ if "tags" in node:
1433
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1434
+ params.update(tags)
1435
+
1436
+ resources: List[Dict[str, Any]] = []
1437
+
1438
+ resources.append(
1439
+ {
1440
+ "resource": "datasources",
1441
+ "resource_name": name,
1442
+ "version": doc.version,
1443
+ "params": params,
1444
+ "filename": filename,
1445
+ "deps": deps,
1446
+ "tokens": doc.tokens,
1447
+ "shared_with": doc.shared_with,
1448
+ "filtering_tags": doc.filtering_tags,
1449
+ }
1450
+ )
1451
+
1452
+ return resources
1453
+
1454
+ elif DataFileExtensions.PIPE in filename:
1455
+ doc = parse_pipe(filename)
1456
+ version = f"__v{doc.version}" if doc.version is not None else ""
1457
+ name = os.path.basename(filename).split(".")[0]
1458
+ description = doc.description if doc.description is not None else ""
1459
+
1460
+ deps = []
1461
+ nodes: List[Dict[str, Any]] = []
1462
+
1463
+ is_copy = any([node for node in doc.nodes if node.get("type", "standard").lower() == PipeNodeTypes.COPY])
1464
+ for node in doc.nodes:
1465
+ sql = node["sql"]
1466
+ node_type = node.get("type", "standard").lower()
1467
+ params = {
1468
+ "name": node["name"],
1469
+ "type": node_type,
1470
+ "description": node.get("description", ""),
1471
+ "target_datasource": node.get("target_datasource", None),
1472
+ "copy_schedule": node.get(CopyParameters.COPY_SCHEDULE, None),
1473
+ "mode": node.get("mode", CopyModes.APPEND),
1474
+ }
1475
+
1476
+ is_export_node = ExportReplacements.is_export_node(node)
1477
+ export_params = ExportReplacements.get_params_from_datafile(node) if is_export_node else None
1478
+
1479
+ sql = sql.strip()
1480
+ is_template = False
1481
+ if sql[0] == "%":
1482
+ try:
1483
+ sql_rendered, _, _ = render_sql_template(sql[1:], test_mode=True)
1484
+ except Exception as e:
1485
+ raise click.ClickException(
1486
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1487
+ )
1488
+ is_template = True
1489
+ else:
1490
+ sql_rendered = sql
1491
+
1492
+ try:
1493
+ dependencies = await tb_client.sql_get_used_tables(sql_rendered, raising=True, is_copy=is_copy)
1494
+ deps += [t for t in dependencies if t not in [n["name"] for n in doc.nodes]]
1495
+
1496
+ except Exception as e:
1497
+ raise click.ClickException(
1498
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1499
+ )
1500
+
1501
+ if is_template:
1502
+ deps += get_used_tables_in_template(sql[1:])
1503
+
1504
+ is_neither_copy_nor_materialized = "datasource" not in node and "target_datasource" not in node
1505
+ if "engine" in node and is_neither_copy_nor_materialized:
1506
+ raise ValueError("Defining ENGINE options in a node requires a DATASOURCE")
1507
+
1508
+ if "datasource" in node:
1509
+ params["datasource"] = node["datasource"] + resource_versions_string.get(node["datasource"], "")
1510
+ deps += [node["datasource"]]
1511
+
1512
+ if "target_datasource" in node:
1513
+ params["target_datasource"] = node["target_datasource"] + resource_versions_string.get(
1514
+ node["target_datasource"], ""
1515
+ )
1516
+ deps += [node["target_datasource"]]
1517
+
1518
+ params.update(get_engine_params(node))
1519
+
1520
+ def create_replacement_for_resource(name: str) -> str:
1521
+ for old_ws, new_ws in workspace_map.items():
1522
+ name = name.replace(f"{old_ws}.", f"{new_ws}.")
1523
+ return name + resource_versions_string.get(name, "")
1524
+
1525
+ replacements = {
1526
+ x: create_replacement_for_resource(x) for x in deps if x not in [n["name"] for n in doc.nodes]
1527
+ }
1528
+
1529
+ # FIXME: Ideally we should use await tb_client.replace_tables(sql, replacements)
1530
+ for old, new in replacements.items():
1531
+ sql = re.sub("([\t \\n']+|^)" + old + "([\t \\n'\\)]+|$)", "\\1" + new + "\\2", sql)
1532
+
1533
+ if "tags" in node:
1534
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1535
+ params.update(tags)
1536
+
1537
+ nodes.append(
1538
+ {
1539
+ "sql": sql,
1540
+ "params": params,
1541
+ "export_params": export_params,
1542
+ }
1543
+ )
1544
+
1545
+ return [
1546
+ {
1547
+ "resource": "pipes",
1548
+ "resource_name": name,
1549
+ "version": doc.version,
1550
+ "filename": filename,
1551
+ "name": name + version,
1552
+ "nodes": nodes,
1553
+ "deps": [x for x in set(deps)],
1554
+ "tokens": doc.tokens,
1555
+ "description": description,
1556
+ "warnings": doc.warnings,
1557
+ "filtering_tags": doc.filtering_tags,
1558
+ }
1559
+ ]
1560
+ else:
1561
+ raise click.ClickException(FeedbackManager.error_file_extension(filename=filename))
1562
+
1563
+
1564
+ def sizeof_fmt(num: Union[int, float], suffix: str = "b") -> str:
1565
+ """Readable file size
1566
+ :param num: Bytes value
1567
+ :type num: int
1568
+ :param suffix: Unit suffix (optionnal) default = o
1569
+ :type suffix: str
1570
+ :rtype: str
1571
+ """
1572
+ for unit in ["", "k", "M", "G", "T", "P", "E", "Z"]:
1573
+ if abs(num) < 1024.0:
1574
+ return "%3.1f %s%s" % (num, unit, suffix)
1575
+ num /= 1024.0
1576
+ return "%.1f%s%s" % (num, "Yi", suffix)
1577
+
1578
+
1579
+ def full_path_by_name(
1580
+ folder: str, name: str, workspace_lib_paths: Optional[List[Tuple[str, str]]] = None
1581
+ ) -> Optional[Path]:
1582
+ f = Path(folder)
1583
+ ds = name + ".datasource"
1584
+ if os.path.isfile(os.path.join(folder, ds)):
1585
+ return f / ds
1586
+ if os.path.isfile(f / "datasources" / ds):
1587
+ return f / "datasources" / ds
1588
+
1589
+ pipe = name + ".pipe"
1590
+ if os.path.isfile(os.path.join(folder, pipe)):
1591
+ return f / pipe
1592
+
1593
+ if os.path.isfile(f / "endpoints" / pipe):
1594
+ return f / "endpoints" / pipe
1595
+
1596
+ if os.path.isfile(f / "pipes" / pipe):
1597
+ return f / "pipes" / pipe
1598
+
1599
+ if os.path.isfile(f / "sinks" / pipe):
1600
+ return f / "sinks" / pipe
1601
+
1602
+ if os.path.isfile(f / "copies" / pipe):
1603
+ return f / "copies" / pipe
1604
+
1605
+ if os.path.isfile(f / "playgrounds" / pipe):
1606
+ return f / "playgrounds" / pipe
1607
+
1608
+ if os.path.isfile(f / "materializations" / pipe):
1609
+ return f / "materializations" / pipe
1610
+
1611
+ if workspace_lib_paths:
1612
+ for wk_name, wk_path in workspace_lib_paths:
1613
+ if name.startswith(f"{wk_name}."):
1614
+ r = full_path_by_name(wk_path, name.replace(f"{wk_name}.", ""))
1615
+ if r:
1616
+ return r
1617
+ return None
1618
+
1619
+
1620
+ async def folder_push(
1621
+ tb_client: TinyB,
1622
+ filenames: Optional[List[str]] = None,
1623
+ dry_run: bool = False,
1624
+ check: bool = False,
1625
+ push_deps: bool = False,
1626
+ only_changes: bool = False,
1627
+ git_release: bool = False,
1628
+ debug: bool = False,
1629
+ force: bool = False,
1630
+ override_datasource: bool = False,
1631
+ folder: str = ".",
1632
+ populate: bool = False,
1633
+ populate_subset=None,
1634
+ populate_condition: Optional[str] = None,
1635
+ unlink_on_populate_error: bool = False,
1636
+ upload_fixtures: bool = False,
1637
+ wait: bool = False,
1638
+ ignore_sql_errors: bool = False,
1639
+ skip_confirmation: bool = False,
1640
+ only_response_times: bool = False,
1641
+ workspace_map=None,
1642
+ workspace_lib_paths=None,
1643
+ no_versions: bool = False,
1644
+ run_tests: bool = False,
1645
+ as_standard: bool = False,
1646
+ raise_on_exists: bool = False,
1647
+ verbose: bool = True,
1648
+ tests_to_run: int = 0,
1649
+ tests_relative_change: float = 0.01,
1650
+ tests_sample_by_params: int = 0,
1651
+ tests_filter_by: Optional[List[str]] = None,
1652
+ tests_failfast: bool = False,
1653
+ tests_ignore_order: bool = False,
1654
+ tests_validate_processed_bytes: bool = False,
1655
+ tests_check_requests_from_branch: bool = False,
1656
+ config: Optional[CLIConfig] = None,
1657
+ user_token: Optional[str] = None,
1658
+ fork_downstream: Optional[bool] = False,
1659
+ fork: Optional[bool] = False,
1660
+ is_internal: Optional[bool] = False,
1661
+ release_created: Optional[bool] = False,
1662
+ auto_promote: Optional[bool] = False,
1663
+ check_backfill_required: bool = False,
1664
+ use_main: bool = False,
1665
+ check_outdated: bool = True,
1666
+ hide_folders: bool = False,
1667
+ ):
1668
+ workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces_and_branches()).get("workspaces", [])
1669
+ current_ws: Dict[str, Any] = next(
1670
+ (workspace for workspace in workspaces if config and workspace.get("id", ".") == config.get("id", "..")), {}
1671
+ )
1672
+ is_branch = current_ws.get("is_branch", False)
1673
+
1674
+ if not workspace_map:
1675
+ workspace_map = {}
1676
+ if not workspace_lib_paths:
1677
+ workspace_lib_paths = []
1678
+
1679
+ workspace_lib_paths = list(workspace_lib_paths)
1680
+ # include vendor libs without overriding user ones
1681
+ existing_workspaces = set(x[1] for x in workspace_lib_paths)
1682
+ vendor_path = Path("vendor")
1683
+ if vendor_path.exists():
1684
+ for x in vendor_path.iterdir():
1685
+ if x.is_dir() and x.name not in existing_workspaces:
1686
+ workspace_lib_paths.append((x.name, x))
1687
+
1688
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
1689
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
1690
+
1691
+ existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
1692
+ # replace workspace mapping names
1693
+ for old_ws, new_ws in workspace_map.items():
1694
+ existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
1695
+
1696
+ remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
1697
+
1698
+ # replace workspace mapping names
1699
+ for old_ws, new_ws in workspace_map.items():
1700
+ remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
1701
+
1702
+ if not filenames:
1703
+ filenames = get_project_filenames(folder)
1704
+
1705
+ # build graph to get new versions for all the files involved in the query
1706
+ # dependencies need to be processed always to get the versions
1707
+ dependencies_graph = await build_graph(
1708
+ filenames,
1709
+ tb_client,
1710
+ dir_path=folder,
1711
+ process_dependencies=True,
1712
+ workspace_map=workspace_map,
1713
+ skip_connectors=True,
1714
+ workspace_lib_paths=workspace_lib_paths,
1715
+ current_ws=current_ws,
1716
+ changed=None,
1717
+ only_changes=only_changes,
1718
+ fork_downstream=fork_downstream,
1719
+ is_internal=is_internal,
1720
+ )
1721
+
1722
+ resource_versions = {}
1723
+ latest_datasource_versions = {}
1724
+ changed = None
1725
+ # If we have datasources using VERSION, let's try to get the latest version
1726
+ dependencies_graph = await build_graph(
1727
+ filenames,
1728
+ tb_client,
1729
+ dir_path=folder,
1730
+ resource_versions=latest_datasource_versions,
1731
+ workspace_map=workspace_map,
1732
+ process_dependencies=push_deps,
1733
+ verbose=verbose,
1734
+ workspace_lib_paths=workspace_lib_paths,
1735
+ current_ws=current_ws,
1736
+ changed=None,
1737
+ only_changes=only_changes,
1738
+ fork_downstream=fork_downstream,
1739
+ is_internal=is_internal,
1740
+ )
1741
+
1742
+ if debug:
1743
+ pp.pprint(dependencies_graph.to_run)
1744
+
1745
+ if verbose:
1746
+ click.echo(FeedbackManager.info_building_dependencies())
1747
+
1748
+ def should_push_file(
1749
+ name: str,
1750
+ remote_resource_names: List[str],
1751
+ latest_datasource_versions: Dict[str, Any],
1752
+ force: bool,
1753
+ run_tests: bool,
1754
+ ) -> bool:
1755
+ """
1756
+ Function to know if we need to run a file or not
1757
+ """
1758
+ if name not in remote_resource_names:
1759
+ return True
1760
+ # When we need to try to push a file when it doesn't exist and the version is different that the existing one
1761
+ resource_full_name = (
1762
+ f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
1763
+ )
1764
+ if resource_full_name not in existing_resources:
1765
+ return True
1766
+ if force or run_tests:
1767
+ return True
1768
+ return False
1769
+
1770
+ async def push(
1771
+ name: str,
1772
+ to_run: Dict[str, Dict[str, Any]],
1773
+ resource_versions: Dict[str, Any],
1774
+ latest_datasource_versions: Dict[str, Any],
1775
+ dry_run: bool,
1776
+ fork_downstream: Optional[bool] = False,
1777
+ fork: Optional[bool] = False,
1778
+ ):
1779
+ if name in to_run:
1780
+ resource = to_run[name]["resource"]
1781
+ if not dry_run:
1782
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1783
+ if name not in resource_versions:
1784
+ version = ""
1785
+ if name in latest_datasource_versions:
1786
+ version = f"(v{latest_datasource_versions[name]})"
1787
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
1788
+ else:
1789
+ click.echo(
1790
+ FeedbackManager.info_processing_resource(
1791
+ name=name,
1792
+ version=latest_datasource_versions[name],
1793
+ latest_version=resource_versions.get(name),
1794
+ )
1795
+ )
1796
+ try:
1797
+ await exec_file(
1798
+ to_run[name],
1799
+ tb_client,
1800
+ force,
1801
+ check,
1802
+ debug and verbose,
1803
+ populate,
1804
+ populate_subset,
1805
+ populate_condition,
1806
+ unlink_on_populate_error,
1807
+ wait,
1808
+ user_token,
1809
+ override_datasource,
1810
+ ignore_sql_errors,
1811
+ skip_confirmation,
1812
+ only_response_times,
1813
+ run_tests,
1814
+ as_standard,
1815
+ tests_to_run,
1816
+ tests_relative_change,
1817
+ tests_sample_by_params,
1818
+ tests_filter_by,
1819
+ tests_failfast,
1820
+ tests_ignore_order,
1821
+ tests_validate_processed_bytes,
1822
+ tests_check_requests_from_branch,
1823
+ current_ws,
1824
+ fork_downstream,
1825
+ fork,
1826
+ git_release,
1827
+ )
1828
+ if not run_tests:
1829
+ click.echo(
1830
+ FeedbackManager.success_create(
1831
+ name=(
1832
+ name
1833
+ if to_run[name]["version"] is None
1834
+ else f'{name}__v{to_run[name]["version"]}'
1835
+ )
1836
+ )
1837
+ )
1838
+ except Exception as e:
1839
+ filename = (
1840
+ os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
1841
+ )
1842
+ exception = FeedbackManager.error_push_file_exception(
1843
+ filename=filename,
1844
+ error=e,
1845
+ )
1846
+ raise click.ClickException(exception)
1847
+ else:
1848
+ if raise_on_exists:
1849
+ raise AlreadyExistsException(
1850
+ FeedbackManager.warning_name_already_exists(
1851
+ name=name if to_run[name]["version"] is None else f'{name}__v{to_run[name]["version"]}'
1852
+ )
1853
+ )
1854
+ else:
1855
+ if await name_matches_existing_resource(resource, name, tb_client):
1856
+ if resource == "pipes":
1857
+ click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
1858
+ else:
1859
+ click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
1860
+ else:
1861
+ click.echo(
1862
+ FeedbackManager.warning_name_already_exists(
1863
+ name=(
1864
+ name
1865
+ if to_run[name]["version"] is None
1866
+ else f'{name}__v{to_run[name]["version"]}'
1867
+ )
1868
+ )
1869
+ )
1870
+ else:
1871
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1872
+ if name not in resource_versions:
1873
+ version = ""
1874
+ if name in latest_datasource_versions:
1875
+ version = f"(v{latest_datasource_versions[name]})"
1876
+ click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
1877
+ else:
1878
+ click.echo(
1879
+ FeedbackManager.info_dry_processing_resource(
1880
+ name=name,
1881
+ version=latest_datasource_versions[name],
1882
+ latest_version=resource_versions.get(name),
1883
+ )
1884
+ )
1885
+ else:
1886
+ if await name_matches_existing_resource(resource, name, tb_client):
1887
+ if resource == "pipes":
1888
+ click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
1889
+ else:
1890
+ click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
1891
+ else:
1892
+ click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
1893
+
1894
+ async def push_files(
1895
+ dependency_graph: GraphDependencies,
1896
+ dry_run: bool = False,
1897
+ check_backfill_required: bool = False,
1898
+ ):
1899
+ endpoints_dep_map = dict()
1900
+ processed = set()
1901
+
1902
+ dependencies_graph = dependency_graph.dep_map
1903
+ resources_to_run = dependency_graph.to_run
1904
+
1905
+ if not fork_downstream:
1906
+ # First, we will deploy the all the resources following the dependency graph except for the endpoints
1907
+ groups = [group for group in toposort(dependencies_graph)]
1908
+ for group in groups:
1909
+ for name in group:
1910
+ if name in processed:
1911
+ continue
1912
+
1913
+ if is_endpoint_with_no_dependencies(
1914
+ resources_to_run.get(name, {}),
1915
+ dependencies_graph,
1916
+ resources_to_run,
1917
+ ):
1918
+ endpoints_dep_map[name] = dependencies_graph[name]
1919
+ continue
1920
+
1921
+ await push(
1922
+ name,
1923
+ resources_to_run,
1924
+ resource_versions,
1925
+ latest_datasource_versions,
1926
+ dry_run,
1927
+ fork_downstream,
1928
+ fork,
1929
+ )
1930
+ processed.add(name)
1931
+
1932
+ # Then, we will deploy the endpoints that are on the dependency graph
1933
+ groups = [group for group in toposort(endpoints_dep_map)]
1934
+ for group in groups:
1935
+ for name in group:
1936
+ if name not in processed:
1937
+ await push(
1938
+ name,
1939
+ resources_to_run,
1940
+ resource_versions,
1941
+ latest_datasource_versions,
1942
+ dry_run,
1943
+ fork_downstream,
1944
+ fork,
1945
+ )
1946
+ processed.add(name)
1947
+ else:
1948
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
1949
+ # If we have a graph like this:
1950
+ # A -> B -> C
1951
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
1952
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
1953
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
1954
+ dependency_graph.all_dep_map,
1955
+ dependency_graph.all_resources,
1956
+ resources_to_run,
1957
+ list(dependency_graph.dep_map.keys()),
1958
+ )
1959
+
1960
+ # First, we will deploy the datasources that need to be deployed.
1961
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
1962
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
1963
+ groups.reverse()
1964
+ for group in groups:
1965
+ for name in group:
1966
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
1967
+ continue
1968
+
1969
+ # If the resource is new, we will use the normal resource information to deploy it
1970
+ # This is mostly used for datasources with connections.
1971
+ # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
1972
+ # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
1973
+ if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
1974
+ await push(
1975
+ name,
1976
+ resources_to_run,
1977
+ resource_versions,
1978
+ latest_datasource_versions,
1979
+ dry_run,
1980
+ fork_downstream,
1981
+ fork,
1982
+ )
1983
+ else:
1984
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
1985
+ kafka_connection_name = (
1986
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
1987
+ )
1988
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
1989
+ if release_created and (kafka_connection_name or service):
1990
+ connector = "Kafka" if kafka_connection_name else service
1991
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
1992
+ raise click.ClickException(error_msg)
1993
+
1994
+ # If we are pushing a modified datasource, inform about the backfill``
1995
+ if check_backfill_required and auto_promote and release_created:
1996
+ error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
1997
+ raise click.ClickException(error_msg)
1998
+
1999
+ await push(
2000
+ name,
2001
+ resources_to_run_fork_downstream,
2002
+ resource_versions,
2003
+ latest_datasource_versions,
2004
+ dry_run,
2005
+ fork_downstream,
2006
+ fork,
2007
+ )
2008
+ processed.add(name)
2009
+
2010
+ # Now, we will create a map of all the endpoints and there dependencies
2011
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
2012
+ # But does not include the missing gaps
2013
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
2014
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
2015
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
2016
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2017
+ for group in groups:
2018
+ for name in group:
2019
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
2020
+ continue
2021
+
2022
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
2023
+
2024
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
2025
+ groups = [group for group in toposort(endpoints_dep_map)]
2026
+
2027
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
2028
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
2029
+ # So we need to reverse the groups
2030
+ groups.reverse()
2031
+ for group in groups:
2032
+ for name in group:
2033
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
2034
+ continue
2035
+
2036
+ await push(
2037
+ name,
2038
+ resources_to_run_fork_downstream,
2039
+ resource_versions,
2040
+ latest_datasource_versions,
2041
+ dry_run,
2042
+ fork_downstream,
2043
+ fork,
2044
+ )
2045
+ processed.add(name)
2046
+
2047
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
2048
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
2049
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2050
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2051
+ for group in groups:
2052
+ for name in group:
2053
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
2054
+ continue
2055
+
2056
+ await push(
2057
+ name,
2058
+ resources_to_run_fork_downstream,
2059
+ resource_versions,
2060
+ latest_datasource_versions,
2061
+ dry_run,
2062
+ fork_downstream,
2063
+ fork,
2064
+ )
2065
+ processed.add(name)
2066
+
2067
+ # Finally, we need to deploy the materialized views from right to left.
2068
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
2069
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2070
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2071
+ for group in groups:
2072
+ for name in group:
2073
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
2074
+ continue
2075
+
2076
+ await push(
2077
+ name,
2078
+ resources_to_run_fork_downstream,
2079
+ resource_versions,
2080
+ latest_datasource_versions,
2081
+ dry_run,
2082
+ fork_downstream,
2083
+ fork,
2084
+ )
2085
+ processed.add(name)
2086
+
2087
+ await push_files(dependencies_graph, dry_run)
2088
+
2089
+ if not dry_run and not run_tests:
2090
+ if upload_fixtures:
2091
+ click.echo(FeedbackManager.info_pushing_fixtures())
2092
+
2093
+ # We need to upload the fixtures even if there is no change
2094
+ if is_branch:
2095
+ filenames = get_project_filenames(folder, with_vendor=True)
2096
+ dependencies_graph = await build_graph(
2097
+ filenames,
2098
+ tb_client,
2099
+ dir_path=folder,
2100
+ resource_versions=latest_datasource_versions,
2101
+ workspace_map=workspace_map,
2102
+ process_dependencies=push_deps,
2103
+ verbose=verbose,
2104
+ workspace_lib_paths=workspace_lib_paths,
2105
+ current_ws=current_ws,
2106
+ )
2107
+
2108
+ processed = set()
2109
+ for group in toposort(dependencies_graph.dep_map):
2110
+ for f in group:
2111
+ name = os.path.basename(f)
2112
+ if name not in processed and name in dependencies_graph.to_run:
2113
+ await check_fixtures_data(
2114
+ tb_client,
2115
+ dependencies_graph.to_run[name],
2116
+ debug,
2117
+ folder,
2118
+ force,
2119
+ mode="append" if is_branch else "replace",
2120
+ )
2121
+ processed.add(name)
2122
+ for f in dependencies_graph.to_run:
2123
+ if f not in processed:
2124
+ await check_fixtures_data(
2125
+ tb_client,
2126
+ dependencies_graph.to_run[f],
2127
+ debug,
2128
+ folder,
2129
+ force,
2130
+ mode="append" if is_branch else "replace",
2131
+ )
2132
+ else:
2133
+ if verbose:
2134
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
2135
+
2136
+ return dependencies_graph.to_run