tinybird 0.0.1.dev5__py3-none-any.whl → 0.0.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (55) hide show
  1. tinybird/__cli__.py +7 -8
  2. tinybird/tb/cli.py +28 -0
  3. tinybird/{tb_cli_modules → tb/modules}/auth.py +5 -5
  4. tinybird/{tb_cli_modules → tb/modules}/branch.py +5 -25
  5. tinybird/{tb_cli_modules → tb/modules}/build.py +10 -21
  6. tinybird/tb/modules/cicd.py +271 -0
  7. tinybird/{tb_cli_modules → tb/modules}/cli.py +20 -140
  8. tinybird/tb/modules/common.py +2110 -0
  9. tinybird/tb/modules/config.py +352 -0
  10. tinybird/{tb_cli_modules → tb/modules}/connection.py +4 -4
  11. tinybird/{tb_cli_modules → tb/modules}/create.py +20 -20
  12. tinybird/tb/modules/datafile/build.py +2103 -0
  13. tinybird/tb/modules/datafile/build_common.py +118 -0
  14. tinybird/tb/modules/datafile/build_datasource.py +403 -0
  15. tinybird/tb/modules/datafile/build_pipe.py +648 -0
  16. tinybird/tb/modules/datafile/common.py +897 -0
  17. tinybird/tb/modules/datafile/diff.py +197 -0
  18. tinybird/tb/modules/datafile/exceptions.py +23 -0
  19. tinybird/tb/modules/datafile/format_common.py +66 -0
  20. tinybird/tb/modules/datafile/format_datasource.py +160 -0
  21. tinybird/tb/modules/datafile/format_pipe.py +195 -0
  22. tinybird/tb/modules/datafile/parse_datasource.py +41 -0
  23. tinybird/tb/modules/datafile/parse_pipe.py +69 -0
  24. tinybird/tb/modules/datafile/pipe_checker.py +560 -0
  25. tinybird/tb/modules/datafile/pull.py +157 -0
  26. tinybird/{tb_cli_modules → tb/modules}/datasource.py +7 -6
  27. tinybird/tb/modules/exceptions.py +91 -0
  28. tinybird/{tb_cli_modules → tb/modules}/fmt.py +6 -3
  29. tinybird/{tb_cli_modules → tb/modules}/job.py +3 -3
  30. tinybird/{tb_cli_modules → tb/modules}/llm.py +1 -1
  31. tinybird/{tb_cli_modules → tb/modules}/local.py +9 -5
  32. tinybird/{tb_cli_modules → tb/modules}/mock.py +5 -5
  33. tinybird/{tb_cli_modules → tb/modules}/pipe.py +11 -5
  34. tinybird/{tb_cli_modules → tb/modules}/prompts.py +1 -1
  35. tinybird/tb/modules/regions.py +9 -0
  36. tinybird/{tb_cli_modules → tb/modules}/tag.py +2 -2
  37. tinybird/tb/modules/telemetry.py +310 -0
  38. tinybird/{tb_cli_modules → tb/modules}/test.py +5 -5
  39. tinybird/{tb_cli_modules → tb/modules}/tinyunit/tinyunit.py +1 -1
  40. tinybird/{tb_cli_modules → tb/modules}/token.py +3 -3
  41. tinybird/{tb_cli_modules → tb/modules}/workspace.py +5 -5
  42. tinybird/{tb_cli_modules → tb/modules}/workspace_members.py +4 -4
  43. tinybird/tb_cli_modules/common.py +9 -25
  44. tinybird/tb_cli_modules/config.py +0 -8
  45. {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/METADATA +1 -1
  46. tinybird-0.0.1.dev7.dist-info/RECORD +71 -0
  47. tinybird-0.0.1.dev7.dist-info/entry_points.txt +2 -0
  48. tinybird/datafile.py +0 -6123
  49. tinybird/tb_cli.py +0 -28
  50. tinybird-0.0.1.dev5.dist-info/RECORD +0 -52
  51. tinybird-0.0.1.dev5.dist-info/entry_points.txt +0 -2
  52. /tinybird/{tb_cli_modules → tb/modules}/table.py +0 -0
  53. /tinybird/{tb_cli_modules → tb/modules}/tinyunit/tinyunit_lib.py +0 -0
  54. {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/WHEEL +0 -0
  55. {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2103 @@
1
+ import datetime
2
+ import os
3
+ import os.path
4
+ import re
5
+ import sys
6
+ import urllib
7
+ from copy import deepcopy
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union
11
+
12
+ import click
13
+ from toposort import toposort
14
+
15
+ from tinybird.client import TinyB
16
+ from tinybird.feedback_manager import FeedbackManager
17
+ from tinybird.sql import parse_table_structure, schema_to_sql_columns
18
+ from tinybird.sql_template import get_used_tables_in_template, render_sql_template
19
+ from tinybird.tb.modules.common import get_ca_pem_content
20
+ from tinybird.tb.modules.config import CLIConfig
21
+ from tinybird.tb.modules.datafile.build_common import update_tags_in_resource
22
+ from tinybird.tb.modules.datafile.build_datasource import is_datasource, new_ds
23
+ from tinybird.tb.modules.datafile.build_pipe import (
24
+ get_target_materialized_data_source_name,
25
+ is_endpoint,
26
+ is_endpoint_with_no_dependencies,
27
+ is_materialized,
28
+ new_pipe,
29
+ )
30
+ from tinybird.tb.modules.datafile.common import (
31
+ DEFAULT_CRON_PERIOD,
32
+ INTERNAL_TABLES,
33
+ ON_DEMAND,
34
+ PREVIEW_CONNECTOR_SERVICES,
35
+ CopyModes,
36
+ CopyParameters,
37
+ DataFileExtensions,
38
+ ExportReplacements,
39
+ ImportReplacements,
40
+ PipeNodeTypes,
41
+ find_file_by_name,
42
+ get_name_version,
43
+ get_project_filenames,
44
+ pp,
45
+ )
46
+ from tinybird.tb.modules.datafile.exceptions import AlreadyExistsException, IncludeFileNotFoundException
47
+ from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
48
+ from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
49
+
50
+
51
+ async def folder_build(
52
+ tb_client: TinyB,
53
+ filenames: Optional[List[str]] = None,
54
+ folder: str = ".",
55
+ ignore_sql_errors: bool = False,
56
+ is_internal: bool = False,
57
+ only_pipes: bool = False,
58
+ ):
59
+ if only_pipes:
60
+ filenames = [f for f in filenames if f.endswith(".pipe")]
61
+
62
+ config = CLIConfig.get_project_config()
63
+ build = True
64
+ dry_run = False
65
+ force = True
66
+ push_deps = True
67
+ only_changes = True
68
+ debug = False
69
+ check = True
70
+ populate = False
71
+ populate_subset = None
72
+ populate_condition = None
73
+ tests_to_run = 0
74
+ user_token = None
75
+ tests_failfast = True
76
+ override_datasource = True
77
+ tests_check_requests_from_branch = False
78
+ skip_confirmation = True
79
+ wait = False
80
+ unlink_on_populate_error = False
81
+ upload_fixtures = False
82
+ only_response_times = False
83
+ workspace_map: Dict[str, Any] = {}
84
+ tests_sample_by_params = 1
85
+ tests_ignore_order = False
86
+ tests_validate_processed_bytes = False
87
+ run_tests = False
88
+ verbose = False
89
+ as_standard = False
90
+ raise_on_exists = False
91
+ fork_downstream = True
92
+ fork = False
93
+ release_created = False
94
+ auto_promote = False
95
+ hide_folders = False
96
+ tests_relative_change = 0.01
97
+ tests_sample_by_params = 0
98
+ tests_filter_by = None
99
+ tests_failfast = False
100
+ tests_ignore_order = False
101
+ tests_validate_processed_bytes = False
102
+ tests_check_requests_from_branch = False
103
+ git_release = False
104
+ workspace_lib_paths = []
105
+ workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces()).get("workspaces", [])
106
+ current_ws: Dict[str, Any] = next(
107
+ (workspace for workspace in workspaces if config and workspace.get("id", ".") == config.get("id", "..")), {}
108
+ )
109
+
110
+ workspace_lib_paths = list(workspace_lib_paths)
111
+ # include vendor libs without overriding user ones
112
+ existing_workspaces = set(x[1] for x in workspace_lib_paths)
113
+ vendor_path = Path("vendor")
114
+ if vendor_path.exists():
115
+ for x in vendor_path.iterdir():
116
+ if x.is_dir() and x.name not in existing_workspaces:
117
+ workspace_lib_paths.append((x.name, x))
118
+
119
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
120
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
121
+
122
+ existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
123
+ # replace workspace mapping names
124
+ for old_ws, new_ws in workspace_map.items():
125
+ existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
126
+
127
+ remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
128
+
129
+ # replace workspace mapping names
130
+ for old_ws, new_ws in workspace_map.items():
131
+ remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
132
+
133
+ if not filenames:
134
+ filenames = get_project_filenames(folder)
135
+
136
+ changed = None
137
+
138
+ # build graph to get new versions for all the files involved in the query
139
+ # dependencies need to be processed always to get the versions
140
+ dependencies_graph = await build_graph(
141
+ filenames,
142
+ tb_client,
143
+ dir_path=folder,
144
+ process_dependencies=True,
145
+ workspace_map=workspace_map,
146
+ skip_connectors=True,
147
+ workspace_lib_paths=workspace_lib_paths,
148
+ current_ws=current_ws,
149
+ changed=changed,
150
+ only_changes=only_changes,
151
+ fork_downstream=fork_downstream,
152
+ is_internal=is_internal,
153
+ build=build,
154
+ )
155
+
156
+ resource_versions = {}
157
+ latest_datasource_versions = {}
158
+
159
+ # If we have datasources using VERSION, let's try to get the latest version
160
+ dependencies_graph = await build_graph(
161
+ filenames,
162
+ tb_client,
163
+ dir_path=folder,
164
+ resource_versions=latest_datasource_versions,
165
+ workspace_map=workspace_map,
166
+ process_dependencies=push_deps,
167
+ verbose=verbose,
168
+ workspace_lib_paths=workspace_lib_paths,
169
+ current_ws=current_ws,
170
+ changed=changed,
171
+ only_changes=only_changes,
172
+ fork_downstream=fork_downstream,
173
+ is_internal=is_internal,
174
+ build=build,
175
+ )
176
+
177
+ if debug:
178
+ pp.pprint(dependencies_graph.to_run)
179
+
180
+ def should_push_file(
181
+ name: str,
182
+ remote_resource_names: List[str],
183
+ latest_datasource_versions: Dict[str, Any],
184
+ force: bool,
185
+ run_tests: bool,
186
+ ) -> bool:
187
+ """
188
+ Function to know if we need to run a file or not
189
+ """
190
+ if name not in remote_resource_names:
191
+ return True
192
+ # When we need to try to push a file when it doesn't exist and the version is different that the existing one
193
+ resource_full_name = (
194
+ f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
195
+ )
196
+ if resource_full_name not in existing_resources:
197
+ return True
198
+ if force or run_tests:
199
+ return True
200
+ return False
201
+
202
+ async def push(
203
+ name: str,
204
+ to_run: Dict[str, Dict[str, Any]],
205
+ resource_versions: Dict[str, Any],
206
+ latest_datasource_versions: Dict[str, Any],
207
+ dry_run: bool,
208
+ fork_downstream: Optional[bool] = False,
209
+ fork: Optional[bool] = False,
210
+ ):
211
+ if name in to_run:
212
+ resource = to_run[name]["resource"]
213
+ if not dry_run:
214
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
215
+ if name not in resource_versions:
216
+ version = ""
217
+ if name in latest_datasource_versions:
218
+ version = f"(v{latest_datasource_versions[name]})"
219
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
220
+ else:
221
+ click.echo(
222
+ FeedbackManager.info_processing_resource(
223
+ name=name,
224
+ version=latest_datasource_versions[name],
225
+ latest_version=resource_versions.get(name),
226
+ )
227
+ )
228
+ try:
229
+ await exec_file(
230
+ to_run[name],
231
+ tb_client,
232
+ force,
233
+ check,
234
+ debug and verbose,
235
+ populate,
236
+ populate_subset,
237
+ populate_condition,
238
+ unlink_on_populate_error,
239
+ wait,
240
+ user_token,
241
+ override_datasource,
242
+ ignore_sql_errors,
243
+ skip_confirmation,
244
+ only_response_times,
245
+ run_tests,
246
+ as_standard,
247
+ tests_to_run,
248
+ tests_relative_change,
249
+ tests_sample_by_params,
250
+ tests_filter_by,
251
+ tests_failfast,
252
+ tests_ignore_order,
253
+ tests_validate_processed_bytes,
254
+ tests_check_requests_from_branch,
255
+ current_ws,
256
+ fork_downstream,
257
+ fork,
258
+ git_release,
259
+ build,
260
+ )
261
+ if not run_tests:
262
+ click.echo(
263
+ FeedbackManager.success_create(
264
+ name=(
265
+ name
266
+ if to_run[name]["version"] is None
267
+ else f'{name}__v{to_run[name]["version"]}'
268
+ )
269
+ )
270
+ )
271
+ except Exception as e:
272
+ filename = (
273
+ os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
274
+ )
275
+ exception = FeedbackManager.error_push_file_exception(
276
+ filename=filename,
277
+ error=e,
278
+ )
279
+ raise click.ClickException(exception)
280
+ else:
281
+ if raise_on_exists:
282
+ raise AlreadyExistsException(
283
+ FeedbackManager.warning_name_already_exists(
284
+ name=name if to_run[name]["version"] is None else f'{name}__v{to_run[name]["version"]}'
285
+ )
286
+ )
287
+ else:
288
+ if await name_matches_existing_resource(resource, name, tb_client):
289
+ if resource == "pipes":
290
+ click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
291
+ else:
292
+ click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
293
+ else:
294
+ click.echo(
295
+ FeedbackManager.warning_name_already_exists(
296
+ name=(
297
+ name
298
+ if to_run[name]["version"] is None
299
+ else f'{name}__v{to_run[name]["version"]}'
300
+ )
301
+ )
302
+ )
303
+ else:
304
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
305
+ if name not in resource_versions:
306
+ version = ""
307
+ if name in latest_datasource_versions:
308
+ version = f"(v{latest_datasource_versions[name]})"
309
+ if build:
310
+ extension = "pipe" if resource == "pipes" else "datasource"
311
+ click.echo(
312
+ FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=version)
313
+ )
314
+ else:
315
+ click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
316
+ else:
317
+ click.echo(
318
+ FeedbackManager.info_dry_processing_resource(
319
+ name=name,
320
+ version=latest_datasource_versions[name],
321
+ latest_version=resource_versions.get(name),
322
+ )
323
+ )
324
+ else:
325
+ if await name_matches_existing_resource(resource, name, tb_client):
326
+ if resource == "pipes":
327
+ click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
328
+ else:
329
+ click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
330
+ else:
331
+ click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
332
+
333
+ async def push_files(
334
+ dependency_graph: GraphDependencies,
335
+ dry_run: bool = False,
336
+ check_backfill_required: bool = False,
337
+ ):
338
+ endpoints_dep_map = dict()
339
+ processed = set()
340
+
341
+ dependencies_graph = dependency_graph.dep_map
342
+ resources_to_run = dependency_graph.to_run
343
+
344
+ if not fork_downstream:
345
+ # First, we will deploy the all the resources following the dependency graph except for the endpoints
346
+ groups = [group for group in toposort(dependencies_graph)]
347
+ for group in groups:
348
+ for name in group:
349
+ if name in processed:
350
+ continue
351
+
352
+ if is_endpoint_with_no_dependencies(
353
+ resources_to_run.get(name, {}),
354
+ dependencies_graph,
355
+ resources_to_run,
356
+ ):
357
+ endpoints_dep_map[name] = dependencies_graph[name]
358
+ continue
359
+
360
+ await push(
361
+ name,
362
+ resources_to_run,
363
+ resource_versions,
364
+ latest_datasource_versions,
365
+ dry_run,
366
+ fork_downstream,
367
+ fork,
368
+ )
369
+ processed.add(name)
370
+
371
+ # Then, we will deploy the endpoints that are on the dependency graph
372
+ groups = [group for group in toposort(endpoints_dep_map)]
373
+ for group in groups:
374
+ for name in group:
375
+ if name not in processed:
376
+ await push(
377
+ name,
378
+ resources_to_run,
379
+ resource_versions,
380
+ latest_datasource_versions,
381
+ dry_run,
382
+ fork_downstream,
383
+ fork,
384
+ )
385
+ processed.add(name)
386
+ else:
387
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
388
+ # If we have a graph like this:
389
+ # A -> B -> C
390
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
391
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
392
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
393
+ dependency_graph.all_dep_map,
394
+ dependency_graph.all_resources,
395
+ resources_to_run,
396
+ list(dependency_graph.dep_map.keys()),
397
+ )
398
+
399
+ # First, we will deploy the datasources that need to be deployed.
400
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
401
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
402
+
403
+ for group in groups:
404
+ for name in group:
405
+ try:
406
+ await tb_client.datasource_delete(name, force=True)
407
+ except Exception:
408
+ pass
409
+ try:
410
+ await tb_client.pipe_delete(name)
411
+ except Exception:
412
+ pass
413
+
414
+ groups.reverse()
415
+ for group in groups:
416
+ for name in group:
417
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
418
+ continue
419
+
420
+ # If the resource is new, we will use the normal resource information to deploy it
421
+ # This is mostly used for datasources with connections.
422
+ # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
423
+ # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
424
+ if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
425
+ await push(
426
+ name,
427
+ resources_to_run,
428
+ resource_versions,
429
+ latest_datasource_versions,
430
+ dry_run,
431
+ fork_downstream,
432
+ fork,
433
+ )
434
+ else:
435
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
436
+ kafka_connection_name = (
437
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
438
+ )
439
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
440
+ if release_created and (kafka_connection_name or service):
441
+ connector = "Kafka" if kafka_connection_name else service
442
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
443
+ raise click.ClickException(error_msg)
444
+
445
+ # If we are pushing a modified datasource, inform about the backfill``
446
+ if check_backfill_required and auto_promote and release_created:
447
+ error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
448
+ raise click.ClickException(error_msg)
449
+
450
+ await push(
451
+ name,
452
+ resources_to_run_fork_downstream,
453
+ resource_versions,
454
+ latest_datasource_versions,
455
+ dry_run,
456
+ fork_downstream,
457
+ fork,
458
+ )
459
+ processed.add(name)
460
+
461
+ # Now, we will create a map of all the endpoints and there dependencies
462
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
463
+ # But does not include the missing gaps
464
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
465
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
466
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
467
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
468
+ for group in groups:
469
+ for name in group:
470
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
471
+ continue
472
+
473
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
474
+
475
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
476
+ groups = [group for group in toposort(endpoints_dep_map)]
477
+
478
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
479
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
480
+ # So we need to reverse the groups
481
+ groups.reverse()
482
+ for group in groups:
483
+ for name in group:
484
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
485
+ continue
486
+
487
+ await push(
488
+ name,
489
+ resources_to_run_fork_downstream,
490
+ resource_versions,
491
+ latest_datasource_versions,
492
+ dry_run,
493
+ fork_downstream,
494
+ fork,
495
+ )
496
+ processed.add(name)
497
+
498
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
499
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
500
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
501
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
502
+ for group in groups:
503
+ for name in group:
504
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
505
+ continue
506
+
507
+ await push(
508
+ name,
509
+ resources_to_run_fork_downstream,
510
+ resource_versions,
511
+ latest_datasource_versions,
512
+ dry_run,
513
+ fork_downstream,
514
+ fork,
515
+ )
516
+ processed.add(name)
517
+
518
+ # Finally, we need to deploy the materialized views from right to left.
519
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
520
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
521
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
522
+ for group in groups:
523
+ for name in group:
524
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
525
+ continue
526
+
527
+ await push(
528
+ name,
529
+ resources_to_run_fork_downstream,
530
+ resource_versions,
531
+ latest_datasource_versions,
532
+ dry_run,
533
+ fork_downstream,
534
+ fork,
535
+ )
536
+ processed.add(name)
537
+
538
+ await push_files(dependencies_graph, dry_run)
539
+
540
+ if not dry_run and not run_tests:
541
+ if upload_fixtures:
542
+ click.echo(FeedbackManager.info_pushing_fixtures())
543
+
544
+ processed = set()
545
+ for group in toposort(dependencies_graph.dep_map):
546
+ for f in group:
547
+ name = os.path.basename(f)
548
+ if name not in processed and name in dependencies_graph.to_run:
549
+ await check_fixtures_data(
550
+ tb_client,
551
+ dependencies_graph.to_run[name],
552
+ debug,
553
+ folder,
554
+ force,
555
+ mode="replace",
556
+ )
557
+ processed.add(name)
558
+ for f in dependencies_graph.to_run:
559
+ if f not in processed:
560
+ await check_fixtures_data(
561
+ tb_client,
562
+ dependencies_graph.to_run[f],
563
+ debug,
564
+ folder,
565
+ force,
566
+ mode="replace",
567
+ )
568
+ else:
569
+ if verbose:
570
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
571
+
572
+ return dependencies_graph.to_run
573
+
574
+
575
+ async def check_fixtures_data(
576
+ client: TinyB, resource: Dict[str, Any], debug: bool, folder: str = "", force: bool = False, mode: str = "replace"
577
+ ):
578
+ if debug:
579
+ click.echo(FeedbackManager.info_checking_file(file=pp.pformat(resource)))
580
+ if resource["resource"] in ["pipes", "tokens"]:
581
+ pass
582
+ elif resource["resource"] == "datasources":
583
+ datasource_name = resource["params"]["name"]
584
+ name = os.path.basename(resource["filename"]).rsplit(".", 1)[0]
585
+ fixture_path = Path(folder) / "fixtures" / f"{name}.csv"
586
+
587
+ if not fixture_path.exists():
588
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.csv"
589
+ if not fixture_path.exists():
590
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.ndjson"
591
+ if not fixture_path.exists():
592
+ fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.parquet"
593
+ if fixture_path.exists():
594
+ # Let's validate only when when we are going to replace the actual data
595
+ result = await client.query(sql=f"SELECT count() as c FROM {datasource_name} FORMAT JSON")
596
+ count = result["data"][0]["c"]
597
+
598
+ if count > 0 and not force:
599
+ raise click.ClickException(
600
+ FeedbackManager.error_push_fixture_will_replace_data(datasource=datasource_name)
601
+ )
602
+
603
+ click.echo(
604
+ FeedbackManager.info_checking_file_size(
605
+ filename=resource["filename"], size=sizeof_fmt(os.stat(fixture_path).st_size)
606
+ )
607
+ )
608
+ sys.stdout.flush()
609
+ try:
610
+ await client.datasource_append_data(
611
+ datasource_name=resource["params"]["name"],
612
+ file=fixture_path,
613
+ mode=mode,
614
+ format=fixture_path.suffix[1:],
615
+ )
616
+ click.echo(FeedbackManager.success_processing_data())
617
+ except Exception as e:
618
+ raise click.ClickException(FeedbackManager.error_processing_blocks(error=e))
619
+
620
+ else:
621
+ click.echo(FeedbackManager.warning_fixture_not_found(datasource_name=name))
622
+ else:
623
+ raise click.ClickException(FeedbackManager.error_unknown_resource(resource=resource["resource"]))
624
+
625
+
626
+ def is_new(
627
+ name: str,
628
+ changed: Dict[str, str],
629
+ normal_dependency: Dict[str, Set[str]],
630
+ fork_downstream_dependency: Dict[str, Set[str]],
631
+ ) -> bool:
632
+ def is_git_new(name: str):
633
+ return changed and changed.get(name) == "A"
634
+
635
+ if not is_git_new(name):
636
+ return False
637
+
638
+ # if should not depend on a changed resource
639
+ if back_deps := normal_dependency.get(name):
640
+ for dep in back_deps:
641
+ if dep in fork_downstream_dependency and not is_git_new(dep):
642
+ return False
643
+
644
+ return True
645
+
646
+
647
+ async def name_matches_existing_resource(resource: str, name: str, tb_client: TinyB):
648
+ if resource == "datasources":
649
+ current_pipes: List[Dict[str, Any]] = await tb_client.pipes()
650
+ if name in [x["name"] for x in current_pipes]:
651
+ return True
652
+ else:
653
+ current_datasources: List[Dict[str, Any]] = await tb_client.datasources()
654
+ if name in [x["name"] for x in current_datasources]:
655
+ return True
656
+ return False
657
+
658
+
659
+ async def exec_file(
660
+ r: Dict[str, Any],
661
+ tb_client: TinyB,
662
+ force: bool,
663
+ check: bool,
664
+ debug: bool,
665
+ populate: bool,
666
+ populate_subset,
667
+ populate_condition,
668
+ unlink_on_populate_error,
669
+ wait_populate,
670
+ user_token: Optional[str],
671
+ override_datasource: bool = False,
672
+ ignore_sql_errors: bool = False,
673
+ skip_confirmation: bool = False,
674
+ only_response_times: bool = False,
675
+ run_tests=False,
676
+ as_standard=False,
677
+ tests_to_run: int = 0,
678
+ tests_relative_change: float = 0.01,
679
+ tests_to_sample_by_params: int = 0,
680
+ tests_filter_by: Optional[List[str]] = None,
681
+ tests_failfast: bool = False,
682
+ tests_ignore_order: bool = False,
683
+ tests_validate_processed_bytes: bool = False,
684
+ tests_check_requests_from_branch: bool = False,
685
+ current_ws: Optional[Dict[str, Any]] = None,
686
+ fork_downstream: Optional[bool] = False,
687
+ fork: Optional[bool] = False,
688
+ git_release: Optional[bool] = False,
689
+ build: Optional[bool] = False,
690
+ ):
691
+ if debug:
692
+ click.echo(FeedbackManager.debug_running_file(file=pp.pformat(r)))
693
+ if r["resource"] == "pipes":
694
+ await new_pipe(
695
+ r,
696
+ tb_client,
697
+ force,
698
+ check,
699
+ populate,
700
+ populate_subset,
701
+ populate_condition,
702
+ unlink_on_populate_error,
703
+ wait_populate,
704
+ ignore_sql_errors=ignore_sql_errors,
705
+ only_response_times=only_response_times,
706
+ run_tests=run_tests,
707
+ as_standard=as_standard,
708
+ tests_to_run=tests_to_run,
709
+ tests_relative_change=tests_relative_change,
710
+ tests_to_sample_by_params=tests_to_sample_by_params,
711
+ tests_filter_by=tests_filter_by,
712
+ tests_failfast=tests_failfast,
713
+ tests_ignore_order=tests_ignore_order,
714
+ tests_validate_processed_bytes=tests_validate_processed_bytes,
715
+ override_datasource=override_datasource,
716
+ tests_check_requests_from_branch=tests_check_requests_from_branch,
717
+ fork_downstream=fork_downstream,
718
+ fork=fork,
719
+ )
720
+ await update_tags_in_resource(r, "pipe", tb_client)
721
+ elif r["resource"] == "datasources":
722
+ await new_ds(
723
+ r,
724
+ tb_client,
725
+ user_token,
726
+ force,
727
+ skip_confirmation=skip_confirmation,
728
+ current_ws=current_ws,
729
+ fork_downstream=fork_downstream,
730
+ fork=fork,
731
+ build=build,
732
+ )
733
+ await update_tags_in_resource(r, "datasource", tb_client)
734
+ else:
735
+ raise click.ClickException(FeedbackManager.error_unknown_resource(resource=r["resource"]))
736
+
737
+
738
+ def get_remote_resource_name_without_version(remote_resource_name: str) -> str:
739
+ """
740
+ >>> get_remote_resource_name_without_version("r__datasource")
741
+ 'r__datasource'
742
+ >>> get_remote_resource_name_without_version("r__datasource__v0")
743
+ 'r__datasource'
744
+ >>> get_remote_resource_name_without_version("datasource")
745
+ 'datasource'
746
+ """
747
+ parts = get_name_version(remote_resource_name)
748
+ return parts["name"]
749
+
750
+
751
+ def create_downstream_dependency_graph(dependency_graph: Dict[str, Set[str]], all_resources: Dict[str, Dict[str, Any]]):
752
+ """
753
+ This function reverses the dependency graph obtained from build_graph so you have downstream dependencies for each node in the graph.
754
+
755
+ Additionally takes into account target_datasource of materialized views
756
+ """
757
+ downstream_dependency_graph: Dict[str, Set[str]] = {node: set() for node in dependency_graph}
758
+
759
+ for node, dependencies in dependency_graph.items():
760
+ for dependency in dependencies:
761
+ if dependency not in downstream_dependency_graph:
762
+ # a shared data source, we can skip it
763
+ continue
764
+ downstream_dependency_graph[dependency].add(node)
765
+
766
+ for key in dict(downstream_dependency_graph):
767
+ target_datasource = get_target_materialized_data_source_name(all_resources[key])
768
+ if target_datasource:
769
+ downstream_dependency_graph[key].update({target_datasource})
770
+ try:
771
+ downstream_dependency_graph[target_datasource].remove(key)
772
+ except KeyError:
773
+ pass
774
+
775
+ return downstream_dependency_graph
776
+
777
+
778
+ def update_dep_map_recursively(
779
+ dep_map: Dict[str, Set[str]],
780
+ downstream_dep_map: Dict[str, Set[str]],
781
+ all_resources: Dict[str, Dict[str, Any]],
782
+ to_run: Dict[str, Dict[str, Any]],
783
+ dep_map_keys: List[str],
784
+ key: Optional[str] = None,
785
+ visited: Optional[List[str]] = None,
786
+ ):
787
+ """
788
+ Given a downstream_dep_map obtained from create_downstream_dependency_graph this function updates each node recursively to complete the downstream dependency graph for each node
789
+ """
790
+ if not visited:
791
+ visited = list()
792
+ if not key and len(dep_map_keys) == 0:
793
+ return
794
+ if not key:
795
+ key = dep_map_keys.pop()
796
+ if key not in dep_map:
797
+ dep_map[key] = set()
798
+ else:
799
+ visited.append(key)
800
+ return
801
+
802
+ for dep in downstream_dep_map.get(key, {}):
803
+ if dep not in downstream_dep_map:
804
+ continue
805
+ to_run[dep] = all_resources.get(dep, {})
806
+ update_dep_map_recursively(
807
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=dep, visited=visited
808
+ )
809
+ dep_map[key].update(downstream_dep_map[dep])
810
+ dep_map[key].update({dep})
811
+ try:
812
+ dep_map[key].remove(key)
813
+ except KeyError:
814
+ pass
815
+
816
+ to_run[key] = all_resources.get(key, {})
817
+ update_dep_map_recursively(
818
+ dep_map, downstream_dep_map, all_resources, to_run, dep_map_keys, key=None, visited=visited
819
+ )
820
+
821
+
822
+ def generate_forkdownstream_graph(
823
+ all_dep_map: Dict[str, Set[str]],
824
+ all_resources: Dict[str, Dict[str, Any]],
825
+ to_run: Dict[str, Dict[str, Any]],
826
+ dep_map_keys: List[str],
827
+ ) -> Tuple[Dict[str, Set[str]], Dict[str, Dict[str, Any]]]:
828
+ """
829
+ This function for a given graph of dependencies from left to right. It will generate a new graph with the dependencies from right to left, but taking into account that even if some nodes are not inside to_run, they are still dependencies that need to be deployed.
830
+
831
+ >>> deps, _ = generate_forkdownstream_graph(
832
+ ... {
833
+ ... 'a': {'b'},
834
+ ... 'b': {'c'},
835
+ ... 'c': set(),
836
+ ... },
837
+ ... {
838
+ ... 'a': {'resource_name': 'a'},
839
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
840
+ ... 'c': {'resource_name': 'c'},
841
+ ... },
842
+ ... {
843
+ ... 'a': {'resource_name': 'a'},
844
+ ... },
845
+ ... ['a', 'b', 'c'],
846
+ ... )
847
+ >>> {k: sorted(v) for k, v in deps.items()}
848
+ {'c': [], 'b': ['a', 'c'], 'a': []}
849
+
850
+ >>> deps, _ = generate_forkdownstream_graph(
851
+ ... {
852
+ ... 'a': {'b'},
853
+ ... 'b': {'c'},
854
+ ... 'c': set(),
855
+ ... },
856
+ ... {
857
+ ... 'a': {'resource_name': 'a'},
858
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
859
+ ... 'c': {'resource_name': 'c'},
860
+ ... },
861
+ ... {
862
+ ... 'b': {'resource_name': 'b'},
863
+ ... },
864
+ ... ['a', 'b', 'c'],
865
+ ... )
866
+ >>> {k: sorted(v) for k, v in deps.items()}
867
+ {'c': [], 'b': ['a', 'c'], 'a': []}
868
+
869
+ >>> deps, _ = generate_forkdownstream_graph(
870
+ ... {
871
+ ... 'migrated__a': {'a'},
872
+ ... 'a': {'b'},
873
+ ... 'b': {'c'},
874
+ ... 'c': set(),
875
+ ... },
876
+ ... {
877
+ ... 'migrated__a': {'resource_name': 'migrated__a', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'a'}}]},
878
+ ... 'a': {'resource_name': 'a'},
879
+ ... 'b': {'resource_name': 'b', 'nodes': [{'params': {'type': 'materialized', 'datasource': 'c'}}] },
880
+ ... 'c': {'resource_name': 'c'},
881
+ ... },
882
+ ... {
883
+ ... 'migrated__a': {'resource_name': 'migrated__a'},
884
+ ... 'a': {'resource_name': 'a'},
885
+ ... },
886
+ ... ['migrated_a', 'a', 'b', 'c'],
887
+ ... )
888
+ >>> {k: sorted(v) for k, v in deps.items()}
889
+ {'c': [], 'b': ['a', 'c'], 'a': [], 'migrated_a': []}
890
+ """
891
+ downstream_dep_map = create_downstream_dependency_graph(all_dep_map, all_resources)
892
+ new_dep_map: Dict[str, Set[str]] = {}
893
+ new_to_run = deepcopy(to_run)
894
+ update_dep_map_recursively(new_dep_map, downstream_dep_map, all_resources, new_to_run, dep_map_keys)
895
+ return new_dep_map, new_to_run
896
+
897
+
898
+ @dataclass
899
+ class GraphDependencies:
900
+ """
901
+ This class is used to store the dependencies graph and the resources that are going to be deployed
902
+ """
903
+
904
+ dep_map: Dict[str, Set[str]]
905
+ to_run: Dict[str, Dict[str, Any]]
906
+
907
+ # The same as above but for the whole project, not just the resources affected by the current deployment
908
+ all_dep_map: Dict[str, Set[str]]
909
+ all_resources: Dict[str, Dict[str, Any]]
910
+
911
+
912
+ async def build_graph(
913
+ filenames: Iterable[str],
914
+ tb_client: TinyB,
915
+ dir_path: Optional[str] = None,
916
+ resource_versions=None,
917
+ workspace_map: Optional[Dict] = None,
918
+ process_dependencies: bool = False,
919
+ verbose: bool = False,
920
+ skip_connectors: bool = False,
921
+ workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
922
+ current_ws: Optional[Dict[str, Any]] = None,
923
+ changed: Optional[Dict[str, Any]] = None,
924
+ only_changes: bool = False,
925
+ fork_downstream: Optional[bool] = False,
926
+ is_internal: Optional[bool] = False,
927
+ build: Optional[bool] = False,
928
+ ) -> GraphDependencies:
929
+ """
930
+ This method will generate a dependency graph for the given files. It will also return a map of all the resources that are going to be deployed.
931
+ By default it will generate the graph from left to right, but if fork-downstream, it will generate the graph from right to left.
932
+ """
933
+ to_run: Dict[str, Any] = {}
934
+ deps: List[str] = []
935
+ dep_map: Dict[str, Any] = {}
936
+ embedded_datasources = {}
937
+ if not workspace_map:
938
+ workspace_map = {}
939
+
940
+ # These dictionaries are used to store all the resources and there dependencies for the whole project
941
+ # This is used for the downstream dependency graph
942
+ all_dep_map: Dict[str, Set[str]] = {}
943
+ all_resources: Dict[str, Dict[str, Any]] = {}
944
+
945
+ if dir_path is None:
946
+ dir_path = os.getcwd()
947
+
948
+ # When using fork-downstream or --only-changes, we need to generate all the graph of all the resources and their dependencies
949
+ # This way we can add more resources into the to_run dictionary if needed.
950
+ if process_dependencies and only_changes:
951
+ all_dependencies_graph = await build_graph(
952
+ get_project_filenames(dir_path),
953
+ tb_client,
954
+ dir_path=dir_path,
955
+ process_dependencies=True,
956
+ resource_versions=resource_versions,
957
+ workspace_map=workspace_map,
958
+ skip_connectors=True,
959
+ workspace_lib_paths=workspace_lib_paths,
960
+ current_ws=current_ws,
961
+ changed=None,
962
+ only_changes=False,
963
+ is_internal=is_internal,
964
+ build=build,
965
+ )
966
+ all_dep_map = all_dependencies_graph.dep_map
967
+ all_resources = all_dependencies_graph.to_run
968
+
969
+ async def process(
970
+ filename: str,
971
+ deps: List[str],
972
+ dep_map: Dict[str, Any],
973
+ to_run: Dict[str, Any],
974
+ workspace_lib_paths: Optional[List[Tuple[str, str]]],
975
+ ):
976
+ name, kind = filename.rsplit(".", 1)
977
+ warnings = []
978
+
979
+ try:
980
+ res = await process_file(
981
+ filename,
982
+ tb_client,
983
+ resource_versions=resource_versions,
984
+ skip_connectors=skip_connectors,
985
+ workspace_map=workspace_map,
986
+ workspace_lib_paths=workspace_lib_paths,
987
+ current_ws=current_ws,
988
+ )
989
+ except click.ClickException as e:
990
+ raise e
991
+ except IncludeFileNotFoundException as e:
992
+ raise click.ClickException(FeedbackManager.error_deleted_include(include_file=str(e), filename=filename))
993
+ except Exception as e:
994
+ raise click.ClickException(str(e))
995
+
996
+ for r in res:
997
+ fn = r["resource_name"]
998
+ warnings = r.get("warnings", [])
999
+ if changed and fn in changed and (not changed[fn] or changed[fn] in ["shared", "remote"]):
1000
+ continue
1001
+
1002
+ if (
1003
+ fork_downstream
1004
+ and r.get("resource", "") == "pipes"
1005
+ and any(["engine" in x.get("params", {}) for x in r.get("nodes", [])])
1006
+ ):
1007
+ raise click.ClickException(FeedbackManager.error_forkdownstream_pipes_with_engine(pipe=fn))
1008
+
1009
+ to_run[fn] = r
1010
+ file_deps = r.get("deps", [])
1011
+ deps += file_deps
1012
+ # calculate and look for deps
1013
+ dep_list = []
1014
+ for x in file_deps:
1015
+ if x not in INTERNAL_TABLES or is_internal:
1016
+ f, ds = find_file_by_name(dir_path, x, verbose, workspace_lib_paths=workspace_lib_paths, resource=r)
1017
+ if f:
1018
+ dep_list.append(f.rsplit(".", 1)[0])
1019
+ if ds:
1020
+ ds_fn = ds["resource_name"]
1021
+ prev = to_run.get(ds_fn, {})
1022
+ to_run[ds_fn] = deepcopy(r)
1023
+ try:
1024
+ to_run[ds_fn]["deps"] = list(
1025
+ set(to_run[ds_fn].get("deps", []) + prev.get("deps", []) + [fn])
1026
+ )
1027
+ except ValueError:
1028
+ pass
1029
+ embedded_datasources[x] = to_run[ds_fn]
1030
+ else:
1031
+ e_ds = embedded_datasources.get(x, None)
1032
+ if e_ds:
1033
+ dep_list.append(e_ds["resource_name"])
1034
+
1035
+ # In case the datasource is to be shared and we have mapping, let's replace the name
1036
+ if "shared_with" in r and workspace_map:
1037
+ mapped_workspaces: List[str] = []
1038
+ for shared_with in r["shared_with"]:
1039
+ mapped_workspaces.append(
1040
+ workspace_map.get(shared_with)
1041
+ if workspace_map.get(shared_with, None) is not None
1042
+ else shared_with # type: ignore
1043
+ )
1044
+ r["shared_with"] = mapped_workspaces
1045
+
1046
+ dep_map[fn] = set(dep_list)
1047
+ return os.path.basename(name), warnings
1048
+
1049
+ processed = set()
1050
+
1051
+ async def get_processed(filenames: Iterable[str]):
1052
+ for filename in filenames:
1053
+ # just process changed filenames (tb deploy and --only-changes)
1054
+ if changed:
1055
+ resource = Path(filename).resolve().stem
1056
+ if resource in changed and (not changed[resource] or changed[resource] in ["shared", "remote"]):
1057
+ continue
1058
+ if os.path.isdir(filename):
1059
+ await get_processed(filenames=get_project_filenames(filename))
1060
+ else:
1061
+ if verbose:
1062
+ click.echo(FeedbackManager.info_processing_file(filename=filename))
1063
+
1064
+ if ".incl" in filename:
1065
+ click.echo(FeedbackManager.warning_skipping_include_file(file=filename))
1066
+
1067
+ name, warnings = await process(filename, deps, dep_map, to_run, workspace_lib_paths)
1068
+ processed.add(name)
1069
+
1070
+ if verbose:
1071
+ if len(warnings) == 1:
1072
+ click.echo(FeedbackManager.warning_pipe_restricted_param(word=warnings[0]))
1073
+ elif len(warnings) > 1:
1074
+ click.echo(
1075
+ FeedbackManager.warning_pipe_restricted_params(
1076
+ words=", ".join(["'{}'".format(param) for param in warnings[:-1]]),
1077
+ last_word=warnings[-1],
1078
+ )
1079
+ )
1080
+
1081
+ await get_processed(filenames=filenames)
1082
+
1083
+ if process_dependencies:
1084
+ if only_changes:
1085
+ for key in dict(to_run):
1086
+ # look for deps that are the target data source of a materialized node
1087
+ target_datasource = get_target_materialized_data_source_name(to_run[key])
1088
+ if target_datasource:
1089
+ # look in all_dep_map items that have as a dependency the target data source and are an endpoint
1090
+ for _key, _deps in all_dep_map.items():
1091
+ for dep in _deps:
1092
+ if (
1093
+ dep == target_datasource
1094
+ or (dep == key and target_datasource not in all_dep_map.get(key, []))
1095
+ ) and is_endpoint_with_no_dependencies(
1096
+ all_resources.get(_key, {}), all_dep_map, all_resources
1097
+ ):
1098
+ dep_map[_key] = _deps
1099
+ to_run[_key] = all_resources.get(_key)
1100
+ else:
1101
+ while len(deps) > 0:
1102
+ dep = deps.pop()
1103
+ if dep not in processed:
1104
+ processed.add(dep)
1105
+ f = full_path_by_name(dir_path, dep, workspace_lib_paths)
1106
+ if f:
1107
+ if verbose:
1108
+ try:
1109
+ processed_filename = f.relative_to(os.getcwd())
1110
+ except ValueError:
1111
+ processed_filename = f
1112
+ # This is to avoid processing shared data sources
1113
+ if "vendor/" in str(processed_filename):
1114
+ click.echo(FeedbackManager.info_skipping_resource(resource=processed_filename))
1115
+ continue
1116
+ click.echo(FeedbackManager.info_processing_file(filename=processed_filename))
1117
+ await process(str(f), deps, dep_map, to_run, workspace_lib_paths)
1118
+
1119
+ return GraphDependencies(dep_map, to_run, all_dep_map, all_resources)
1120
+
1121
+
1122
+ async def process_file(
1123
+ filename: str,
1124
+ tb_client: TinyB,
1125
+ resource_versions: Optional[Dict] = None,
1126
+ skip_connectors: bool = False,
1127
+ workspace_map: Optional[Dict] = None,
1128
+ workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
1129
+ current_ws: Optional[Dict[str, Any]] = None,
1130
+ ):
1131
+ if workspace_map is None:
1132
+ workspace_map = {}
1133
+
1134
+ if resource_versions is None:
1135
+ resource_versions = {}
1136
+ resource_versions_string = {k: f"__v{v}" for k, v in resource_versions.items() if v >= 0}
1137
+
1138
+ def get_engine_params(node: Dict[str, Any]) -> Dict[str, Any]:
1139
+ params = {}
1140
+
1141
+ if "engine" in node:
1142
+ engine = node["engine"]["type"]
1143
+ params["engine"] = engine
1144
+ args = node["engine"]["args"]
1145
+ for k, v in args:
1146
+ params[f"engine_{k}"] = v
1147
+ return params
1148
+
1149
+ async def get_kafka_params(node: Dict[str, Any]):
1150
+ params = {key: value for key, value in node.items() if key.startswith("kafka")}
1151
+
1152
+ if not skip_connectors:
1153
+ try:
1154
+ connector_params = {
1155
+ "kafka_bootstrap_servers": params.get("kafka_bootstrap_servers", None),
1156
+ "kafka_key": params.get("kafka_key", None),
1157
+ "kafka_secret": params.get("kafka_secret", None),
1158
+ "kafka_connection_name": params.get("kafka_connection_name", None),
1159
+ "kafka_auto_offset_reset": params.get("kafka_auto_offset_reset", None),
1160
+ "kafka_schema_registry_url": params.get("kafka_schema_registry_url", None),
1161
+ "kafka_ssl_ca_pem": get_ca_pem_content(params.get("kafka_ssl_ca_pem", None), filename),
1162
+ "kafka_sasl_mechanism": params.get("kafka_sasl_mechanism", None),
1163
+ }
1164
+
1165
+ connector = await tb_client.get_connection(**connector_params)
1166
+ if not connector:
1167
+ click.echo(
1168
+ FeedbackManager.info_creating_kafka_connection(connection_name=params["kafka_connection_name"])
1169
+ )
1170
+ required_params = [
1171
+ connector_params["kafka_bootstrap_servers"],
1172
+ connector_params["kafka_key"],
1173
+ connector_params["kafka_secret"],
1174
+ ]
1175
+
1176
+ if not all(required_params):
1177
+ raise click.ClickException(FeedbackManager.error_unknown_kafka_connection(datasource=name))
1178
+
1179
+ connector = await tb_client.connection_create_kafka(**connector_params)
1180
+ except Exception as e:
1181
+ raise click.ClickException(
1182
+ FeedbackManager.error_connection_create(
1183
+ connection_name=params["kafka_connection_name"], error=str(e)
1184
+ )
1185
+ )
1186
+
1187
+ click.echo(FeedbackManager.success_connection_using(connection_name=connector["name"]))
1188
+
1189
+ params.update(
1190
+ {
1191
+ "connector": connector["id"],
1192
+ "service": "kafka",
1193
+ }
1194
+ )
1195
+
1196
+ return params
1197
+
1198
+ async def get_import_params(datasource: Dict[str, Any], node: Dict[str, Any]) -> Dict[str, Any]:
1199
+ params: Dict[str, Any] = {key: value for key, value in node.items() if key.startswith("import_")}
1200
+
1201
+ if len(params) == 0 or skip_connectors:
1202
+ return params
1203
+
1204
+ service: Optional[str] = node.get("import_service", None)
1205
+
1206
+ if service and service.lower() == "bigquery":
1207
+ if not await tb_client.check_gcp_read_permissions():
1208
+ raise click.ClickException(FeedbackManager.error_unknown_bq_connection(datasource=datasource["name"]))
1209
+
1210
+ # Bigquery doesn't have a datalink, so we can stop here
1211
+ return params
1212
+
1213
+ # Rest of connectors
1214
+
1215
+ connector_id: Optional[str] = node.get("import_connector", None)
1216
+ connector_name: Optional[str] = node.get("import_connection_name", None)
1217
+ if not connector_name and not connector_id:
1218
+ raise click.ClickException(FeedbackManager.error_missing_connection_name(datasource=datasource["name"]))
1219
+
1220
+ if not connector_id:
1221
+ assert isinstance(connector_name, str)
1222
+
1223
+ connector: Optional[Dict[str, Any]] = await tb_client.get_connector(connector_name, service)
1224
+
1225
+ if not connector:
1226
+ raise Exception(
1227
+ FeedbackManager.error_unknown_connection(datasource=datasource["name"], connection=connector_name)
1228
+ )
1229
+ connector_id = connector["id"]
1230
+ service = connector["service"]
1231
+
1232
+ # The API needs the connector ID to create the datasource.
1233
+ params["import_connector"] = connector_id
1234
+ if service:
1235
+ params["import_service"] = service
1236
+
1237
+ if import_from_timestamp := params.get("import_from_timestamp", None):
1238
+ try:
1239
+ str(datetime.fromisoformat(import_from_timestamp).isoformat())
1240
+ except ValueError:
1241
+ raise click.ClickException(
1242
+ FeedbackManager.error_invalid_import_from_timestamp(datasource=datasource["name"])
1243
+ )
1244
+
1245
+ if service in PREVIEW_CONNECTOR_SERVICES:
1246
+ if not params.get("import_bucket_uri", None):
1247
+ raise click.ClickException(FeedbackManager.error_missing_bucket_uri(datasource=datasource["name"]))
1248
+ elif service == "dynamodb":
1249
+ if not params.get("import_table_arn", None):
1250
+ raise click.ClickException(FeedbackManager.error_missing_table_arn(datasource=datasource["name"]))
1251
+ if not params.get("import_export_bucket", None):
1252
+ raise click.ClickException(FeedbackManager.error_missing_export_bucket(datasource=datasource["name"]))
1253
+ else:
1254
+ if not params.get("import_external_datasource", None):
1255
+ raise click.ClickException(
1256
+ FeedbackManager.error_missing_external_datasource(datasource=datasource["name"])
1257
+ )
1258
+
1259
+ return params
1260
+
1261
+ if DataFileExtensions.DATASOURCE in filename:
1262
+ doc = parse_datasource(filename)
1263
+ node = doc.nodes[0]
1264
+ deps: List[str] = []
1265
+ # reemplace tables on materialized columns
1266
+ columns = parse_table_structure(node["schema"])
1267
+
1268
+ _format = "csv"
1269
+ for x in columns:
1270
+ if x["default_value"] and x["default_value"].lower().startswith("materialized"):
1271
+ # turn expression to a select query to sql_get_used_tables can get the used tables
1272
+ q = "select " + x["default_value"][len("materialized") :]
1273
+ tables = await tb_client.sql_get_used_tables(q)
1274
+ # materialized columns expressions could have joins so we need to add them as a dep
1275
+ deps += tables
1276
+ # generate replacements and replace the query
1277
+ replacements = {t: t + resource_versions_string.get(t, "") for t in tables}
1278
+
1279
+ replaced_results = await tb_client.replace_tables(q, replacements)
1280
+ x["default_value"] = replaced_results.replace("SELECT", "materialized", 1)
1281
+ if x.get("jsonpath", None):
1282
+ _format = "ndjson"
1283
+
1284
+ schema = ",".join(schema_to_sql_columns(columns))
1285
+
1286
+ name = os.path.basename(filename).rsplit(".", 1)[0]
1287
+
1288
+ if workspace_lib_paths:
1289
+ for wk_name, wk_path in workspace_lib_paths:
1290
+ try:
1291
+ Path(filename).relative_to(wk_path)
1292
+ name = f"{workspace_map.get(wk_name, wk_name)}.{name}"
1293
+ except ValueError:
1294
+ # the path was not relative, not inside workspace
1295
+ pass
1296
+
1297
+ version = f"__v{doc.version}" if doc.version is not None else ""
1298
+
1299
+ def append_version_to_name(name: str, version: str) -> str:
1300
+ if version != "":
1301
+ name = name.replace(".", "_")
1302
+ return name + version
1303
+ return name
1304
+
1305
+ description = node.get("description", "")
1306
+ indexes_list = node.get("indexes", [])
1307
+ indexes = None
1308
+ if indexes_list:
1309
+ indexes = "\n".join([index.to_sql() for index in indexes_list])
1310
+ params = {
1311
+ "name": append_version_to_name(name, version),
1312
+ "description": description,
1313
+ "schema": schema,
1314
+ "indexes": indexes,
1315
+ "indexes_list": indexes_list,
1316
+ "format": _format,
1317
+ }
1318
+
1319
+ params.update(get_engine_params(node))
1320
+
1321
+ if "import_service" in node or "import_connection_name" in node:
1322
+ VALID_SERVICES: Tuple[str, ...] = ("bigquery", "snowflake", "s3", "s3_iamrole", "gcs", "dynamodb")
1323
+
1324
+ import_params = await get_import_params(params, node)
1325
+
1326
+ service = import_params.get("import_service", None)
1327
+ if service and service not in VALID_SERVICES:
1328
+ raise Exception(f"Unknown import service: {service}")
1329
+
1330
+ if service in PREVIEW_CONNECTOR_SERVICES:
1331
+ ON_DEMAND_CRON = ON_DEMAND
1332
+ AUTO_CRON = "@auto"
1333
+ ON_DEMAND_CRON_EXPECTED_BY_THE_API = "@once"
1334
+ VALID_CRONS: Tuple[str, ...] = (ON_DEMAND_CRON, AUTO_CRON)
1335
+ cron = node.get("import_schedule", ON_DEMAND_CRON)
1336
+
1337
+ if cron not in VALID_CRONS:
1338
+ valid_values = ", ".join(VALID_CRONS)
1339
+ raise Exception(f"Invalid import schedule: '{cron}'. Valid values are: {valid_values}")
1340
+
1341
+ if cron == ON_DEMAND_CRON:
1342
+ import_params["import_schedule"] = ON_DEMAND_CRON_EXPECTED_BY_THE_API
1343
+ if cron == AUTO_CRON:
1344
+ period: int = DEFAULT_CRON_PERIOD
1345
+
1346
+ if current_ws:
1347
+ workspaces = (await tb_client.user_workspaces()).get("workspaces", [])
1348
+ workspace_rate_limits: Dict[str, Dict[str, int]] = next(
1349
+ (w.get("rate_limits", {}) for w in workspaces if w["id"] == current_ws["id"]), {}
1350
+ )
1351
+ period = workspace_rate_limits.get("api_datasources_create_append_replace", {}).get(
1352
+ "period", DEFAULT_CRON_PERIOD
1353
+ )
1354
+
1355
+ def seconds_to_cron_expression(seconds: int) -> str:
1356
+ minutes = seconds // 60
1357
+ hours = minutes // 60
1358
+ days = hours // 24
1359
+ if days > 0:
1360
+ return f"0 0 */{days} * *"
1361
+ if hours > 0:
1362
+ return f"0 */{hours} * * *"
1363
+ if minutes > 0:
1364
+ return f"*/{minutes} * * * *"
1365
+ return f"*/{seconds} * * * *"
1366
+
1367
+ import_params["import_schedule"] = seconds_to_cron_expression(period)
1368
+
1369
+ # Include all import_ parameters in the datasource params
1370
+ params.update(import_params)
1371
+
1372
+ # Substitute the import parameters with the ones used by the
1373
+ # import API:
1374
+ # - If an import parameter is not present and there's a default
1375
+ # value, use the default value.
1376
+ # - If the resulting value is None, do not add the parameter.
1377
+ #
1378
+ # Note: any unknown import_ parameter is leaved as is.
1379
+ for key in ImportReplacements.get_datafile_parameter_keys():
1380
+ replacement, default_value = ImportReplacements.get_api_param_for_datafile_param(service, key)
1381
+ if not replacement:
1382
+ continue # We should not reach this never, but just in case...
1383
+
1384
+ value: Any
1385
+ try:
1386
+ value = params[key]
1387
+ del params[key]
1388
+ except KeyError:
1389
+ value = default_value
1390
+
1391
+ if value:
1392
+ params[replacement] = value
1393
+
1394
+ if "kafka_connection_name" in node:
1395
+ kafka_params = await get_kafka_params(node)
1396
+ params.update(kafka_params)
1397
+ del params["format"]
1398
+
1399
+ if "tags" in node:
1400
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1401
+ params.update(tags)
1402
+
1403
+ resources: List[Dict[str, Any]] = []
1404
+
1405
+ resources.append(
1406
+ {
1407
+ "resource": "datasources",
1408
+ "resource_name": name,
1409
+ "version": doc.version,
1410
+ "params": params,
1411
+ "filename": filename,
1412
+ "deps": deps,
1413
+ "tokens": doc.tokens,
1414
+ "shared_with": doc.shared_with,
1415
+ "filtering_tags": doc.filtering_tags,
1416
+ }
1417
+ )
1418
+
1419
+ return resources
1420
+
1421
+ elif DataFileExtensions.PIPE in filename:
1422
+ doc = parse_pipe(filename)
1423
+ version = f"__v{doc.version}" if doc.version is not None else ""
1424
+ name = os.path.basename(filename).split(".")[0]
1425
+ description = doc.description if doc.description is not None else ""
1426
+
1427
+ deps = []
1428
+ nodes: List[Dict[str, Any]] = []
1429
+
1430
+ is_copy = any([node for node in doc.nodes if node.get("type", "standard").lower() == PipeNodeTypes.COPY])
1431
+ for node in doc.nodes:
1432
+ sql = node["sql"]
1433
+ node_type = node.get("type", "standard").lower()
1434
+ params = {
1435
+ "name": node["name"],
1436
+ "type": node_type,
1437
+ "description": node.get("description", ""),
1438
+ "target_datasource": node.get("target_datasource", None),
1439
+ "copy_schedule": node.get(CopyParameters.COPY_SCHEDULE, None),
1440
+ "mode": node.get("mode", CopyModes.APPEND),
1441
+ }
1442
+
1443
+ is_export_node = ExportReplacements.is_export_node(node)
1444
+ export_params = ExportReplacements.get_params_from_datafile(node) if is_export_node else None
1445
+
1446
+ sql = sql.strip()
1447
+ is_template = False
1448
+ if sql[0] == "%":
1449
+ try:
1450
+ sql_rendered, _, _ = render_sql_template(sql[1:], test_mode=True)
1451
+ except Exception as e:
1452
+ raise click.ClickException(
1453
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1454
+ )
1455
+ is_template = True
1456
+ else:
1457
+ sql_rendered = sql
1458
+
1459
+ try:
1460
+ dependencies = await tb_client.sql_get_used_tables(sql_rendered, raising=True, is_copy=is_copy)
1461
+ deps += [t for t in dependencies if t not in [n["name"] for n in doc.nodes]]
1462
+
1463
+ except Exception as e:
1464
+ raise click.ClickException(
1465
+ FeedbackManager.error_parsing_node(node=node["name"], pipe=name, error=str(e))
1466
+ )
1467
+
1468
+ if is_template:
1469
+ deps += get_used_tables_in_template(sql[1:])
1470
+
1471
+ is_neither_copy_nor_materialized = "datasource" not in node and "target_datasource" not in node
1472
+ if "engine" in node and is_neither_copy_nor_materialized:
1473
+ raise ValueError("Defining ENGINE options in a node requires a DATASOURCE")
1474
+
1475
+ if "datasource" in node:
1476
+ params["datasource"] = node["datasource"] + resource_versions_string.get(node["datasource"], "")
1477
+ deps += [node["datasource"]]
1478
+
1479
+ if "target_datasource" in node:
1480
+ params["target_datasource"] = node["target_datasource"] + resource_versions_string.get(
1481
+ node["target_datasource"], ""
1482
+ )
1483
+ deps += [node["target_datasource"]]
1484
+
1485
+ params.update(get_engine_params(node))
1486
+
1487
+ def create_replacement_for_resource(name: str) -> str:
1488
+ for old_ws, new_ws in workspace_map.items():
1489
+ name = name.replace(f"{old_ws}.", f"{new_ws}.")
1490
+ return name + resource_versions_string.get(name, "")
1491
+
1492
+ replacements = {
1493
+ x: create_replacement_for_resource(x) for x in deps if x not in [n["name"] for n in doc.nodes]
1494
+ }
1495
+
1496
+ # FIXME: Ideally we should use await tb_client.replace_tables(sql, replacements)
1497
+ for old, new in replacements.items():
1498
+ sql = re.sub("([\t \\n']+|^)" + old + "([\t \\n'\\)]+|$)", "\\1" + new + "\\2", sql)
1499
+
1500
+ if "tags" in node:
1501
+ tags = {k: v[0] for k, v in urllib.parse.parse_qs(node["tags"]).items()}
1502
+ params.update(tags)
1503
+
1504
+ nodes.append(
1505
+ {
1506
+ "sql": sql,
1507
+ "params": params,
1508
+ "export_params": export_params,
1509
+ }
1510
+ )
1511
+
1512
+ return [
1513
+ {
1514
+ "resource": "pipes",
1515
+ "resource_name": name,
1516
+ "version": doc.version,
1517
+ "filename": filename,
1518
+ "name": name + version,
1519
+ "nodes": nodes,
1520
+ "deps": [x for x in set(deps)],
1521
+ "tokens": doc.tokens,
1522
+ "description": description,
1523
+ "warnings": doc.warnings,
1524
+ "filtering_tags": doc.filtering_tags,
1525
+ }
1526
+ ]
1527
+ else:
1528
+ raise click.ClickException(FeedbackManager.error_file_extension(filename=filename))
1529
+
1530
+
1531
+ def sizeof_fmt(num: Union[int, float], suffix: str = "b") -> str:
1532
+ """Readable file size
1533
+ :param num: Bytes value
1534
+ :type num: int
1535
+ :param suffix: Unit suffix (optionnal) default = o
1536
+ :type suffix: str
1537
+ :rtype: str
1538
+ """
1539
+ for unit in ["", "k", "M", "G", "T", "P", "E", "Z"]:
1540
+ if abs(num) < 1024.0:
1541
+ return "%3.1f %s%s" % (num, unit, suffix)
1542
+ num /= 1024.0
1543
+ return "%.1f%s%s" % (num, "Yi", suffix)
1544
+
1545
+
1546
+ def full_path_by_name(
1547
+ folder: str, name: str, workspace_lib_paths: Optional[List[Tuple[str, str]]] = None
1548
+ ) -> Optional[Path]:
1549
+ f = Path(folder)
1550
+ ds = name + ".datasource"
1551
+ if os.path.isfile(os.path.join(folder, ds)):
1552
+ return f / ds
1553
+ if os.path.isfile(f / "datasources" / ds):
1554
+ return f / "datasources" / ds
1555
+
1556
+ pipe = name + ".pipe"
1557
+ if os.path.isfile(os.path.join(folder, pipe)):
1558
+ return f / pipe
1559
+
1560
+ if os.path.isfile(f / "endpoints" / pipe):
1561
+ return f / "endpoints" / pipe
1562
+
1563
+ if os.path.isfile(f / "pipes" / pipe):
1564
+ return f / "pipes" / pipe
1565
+
1566
+ if os.path.isfile(f / "sinks" / pipe):
1567
+ return f / "sinks" / pipe
1568
+
1569
+ if os.path.isfile(f / "copies" / pipe):
1570
+ return f / "copies" / pipe
1571
+
1572
+ if os.path.isfile(f / "playgrounds" / pipe):
1573
+ return f / "playgrounds" / pipe
1574
+
1575
+ if os.path.isfile(f / "materializations" / pipe):
1576
+ return f / "materializations" / pipe
1577
+
1578
+ if workspace_lib_paths:
1579
+ for wk_name, wk_path in workspace_lib_paths:
1580
+ if name.startswith(f"{wk_name}."):
1581
+ r = full_path_by_name(wk_path, name.replace(f"{wk_name}.", ""))
1582
+ if r:
1583
+ return r
1584
+ return None
1585
+
1586
+
1587
+ async def folder_push(
1588
+ tb_client: TinyB,
1589
+ filenames: Optional[List[str]] = None,
1590
+ dry_run: bool = False,
1591
+ check: bool = False,
1592
+ push_deps: bool = False,
1593
+ only_changes: bool = False,
1594
+ git_release: bool = False,
1595
+ debug: bool = False,
1596
+ force: bool = False,
1597
+ override_datasource: bool = False,
1598
+ folder: str = ".",
1599
+ populate: bool = False,
1600
+ populate_subset=None,
1601
+ populate_condition: Optional[str] = None,
1602
+ unlink_on_populate_error: bool = False,
1603
+ upload_fixtures: bool = False,
1604
+ wait: bool = False,
1605
+ ignore_sql_errors: bool = False,
1606
+ skip_confirmation: bool = False,
1607
+ only_response_times: bool = False,
1608
+ workspace_map=None,
1609
+ workspace_lib_paths=None,
1610
+ no_versions: bool = False,
1611
+ run_tests: bool = False,
1612
+ as_standard: bool = False,
1613
+ raise_on_exists: bool = False,
1614
+ verbose: bool = True,
1615
+ tests_to_run: int = 0,
1616
+ tests_relative_change: float = 0.01,
1617
+ tests_sample_by_params: int = 0,
1618
+ tests_filter_by: Optional[List[str]] = None,
1619
+ tests_failfast: bool = False,
1620
+ tests_ignore_order: bool = False,
1621
+ tests_validate_processed_bytes: bool = False,
1622
+ tests_check_requests_from_branch: bool = False,
1623
+ config: Optional[CLIConfig] = None,
1624
+ user_token: Optional[str] = None,
1625
+ fork_downstream: Optional[bool] = False,
1626
+ fork: Optional[bool] = False,
1627
+ is_internal: Optional[bool] = False,
1628
+ release_created: Optional[bool] = False,
1629
+ auto_promote: Optional[bool] = False,
1630
+ check_backfill_required: bool = False,
1631
+ use_main: bool = False,
1632
+ check_outdated: bool = True,
1633
+ hide_folders: bool = False,
1634
+ ):
1635
+ workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces_and_branches()).get("workspaces", [])
1636
+ current_ws: Dict[str, Any] = next(
1637
+ (workspace for workspace in workspaces if config and workspace.get("id", ".") == config.get("id", "..")), {}
1638
+ )
1639
+ is_branch = current_ws.get("is_branch", False)
1640
+
1641
+ if not workspace_map:
1642
+ workspace_map = {}
1643
+ if not workspace_lib_paths:
1644
+ workspace_lib_paths = []
1645
+
1646
+ workspace_lib_paths = list(workspace_lib_paths)
1647
+ # include vendor libs without overriding user ones
1648
+ existing_workspaces = set(x[1] for x in workspace_lib_paths)
1649
+ vendor_path = Path("vendor")
1650
+ if vendor_path.exists():
1651
+ for x in vendor_path.iterdir():
1652
+ if x.is_dir() and x.name not in existing_workspaces:
1653
+ workspace_lib_paths.append((x.name, x))
1654
+
1655
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
1656
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
1657
+
1658
+ existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
1659
+ # replace workspace mapping names
1660
+ for old_ws, new_ws in workspace_map.items():
1661
+ existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
1662
+
1663
+ remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
1664
+
1665
+ # replace workspace mapping names
1666
+ for old_ws, new_ws in workspace_map.items():
1667
+ remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
1668
+
1669
+ if not filenames:
1670
+ filenames = get_project_filenames(folder)
1671
+
1672
+ # build graph to get new versions for all the files involved in the query
1673
+ # dependencies need to be processed always to get the versions
1674
+ dependencies_graph = await build_graph(
1675
+ filenames,
1676
+ tb_client,
1677
+ dir_path=folder,
1678
+ process_dependencies=True,
1679
+ workspace_map=workspace_map,
1680
+ skip_connectors=True,
1681
+ workspace_lib_paths=workspace_lib_paths,
1682
+ current_ws=current_ws,
1683
+ changed=None,
1684
+ only_changes=only_changes,
1685
+ fork_downstream=fork_downstream,
1686
+ is_internal=is_internal,
1687
+ )
1688
+
1689
+ resource_versions = {}
1690
+ latest_datasource_versions = {}
1691
+ changed = None
1692
+ # If we have datasources using VERSION, let's try to get the latest version
1693
+ dependencies_graph = await build_graph(
1694
+ filenames,
1695
+ tb_client,
1696
+ dir_path=folder,
1697
+ resource_versions=latest_datasource_versions,
1698
+ workspace_map=workspace_map,
1699
+ process_dependencies=push_deps,
1700
+ verbose=verbose,
1701
+ workspace_lib_paths=workspace_lib_paths,
1702
+ current_ws=current_ws,
1703
+ changed=None,
1704
+ only_changes=only_changes,
1705
+ fork_downstream=fork_downstream,
1706
+ is_internal=is_internal,
1707
+ )
1708
+
1709
+ if debug:
1710
+ pp.pprint(dependencies_graph.to_run)
1711
+
1712
+ if verbose:
1713
+ click.echo(FeedbackManager.info_building_dependencies())
1714
+
1715
+ def should_push_file(
1716
+ name: str,
1717
+ remote_resource_names: List[str],
1718
+ latest_datasource_versions: Dict[str, Any],
1719
+ force: bool,
1720
+ run_tests: bool,
1721
+ ) -> bool:
1722
+ """
1723
+ Function to know if we need to run a file or not
1724
+ """
1725
+ if name not in remote_resource_names:
1726
+ return True
1727
+ # When we need to try to push a file when it doesn't exist and the version is different that the existing one
1728
+ resource_full_name = (
1729
+ f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
1730
+ )
1731
+ if resource_full_name not in existing_resources:
1732
+ return True
1733
+ if force or run_tests:
1734
+ return True
1735
+ return False
1736
+
1737
+ async def push(
1738
+ name: str,
1739
+ to_run: Dict[str, Dict[str, Any]],
1740
+ resource_versions: Dict[str, Any],
1741
+ latest_datasource_versions: Dict[str, Any],
1742
+ dry_run: bool,
1743
+ fork_downstream: Optional[bool] = False,
1744
+ fork: Optional[bool] = False,
1745
+ ):
1746
+ if name in to_run:
1747
+ resource = to_run[name]["resource"]
1748
+ if not dry_run:
1749
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1750
+ if name not in resource_versions:
1751
+ version = ""
1752
+ if name in latest_datasource_versions:
1753
+ version = f"(v{latest_datasource_versions[name]})"
1754
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
1755
+ else:
1756
+ click.echo(
1757
+ FeedbackManager.info_processing_resource(
1758
+ name=name,
1759
+ version=latest_datasource_versions[name],
1760
+ latest_version=resource_versions.get(name),
1761
+ )
1762
+ )
1763
+ try:
1764
+ await exec_file(
1765
+ to_run[name],
1766
+ tb_client,
1767
+ force,
1768
+ check,
1769
+ debug and verbose,
1770
+ populate,
1771
+ populate_subset,
1772
+ populate_condition,
1773
+ unlink_on_populate_error,
1774
+ wait,
1775
+ user_token,
1776
+ override_datasource,
1777
+ ignore_sql_errors,
1778
+ skip_confirmation,
1779
+ only_response_times,
1780
+ run_tests,
1781
+ as_standard,
1782
+ tests_to_run,
1783
+ tests_relative_change,
1784
+ tests_sample_by_params,
1785
+ tests_filter_by,
1786
+ tests_failfast,
1787
+ tests_ignore_order,
1788
+ tests_validate_processed_bytes,
1789
+ tests_check_requests_from_branch,
1790
+ current_ws,
1791
+ fork_downstream,
1792
+ fork,
1793
+ git_release,
1794
+ )
1795
+ if not run_tests:
1796
+ click.echo(
1797
+ FeedbackManager.success_create(
1798
+ name=(
1799
+ name
1800
+ if to_run[name]["version"] is None
1801
+ else f'{name}__v{to_run[name]["version"]}'
1802
+ )
1803
+ )
1804
+ )
1805
+ except Exception as e:
1806
+ filename = (
1807
+ os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
1808
+ )
1809
+ exception = FeedbackManager.error_push_file_exception(
1810
+ filename=filename,
1811
+ error=e,
1812
+ )
1813
+ raise click.ClickException(exception)
1814
+ else:
1815
+ if raise_on_exists:
1816
+ raise AlreadyExistsException(
1817
+ FeedbackManager.warning_name_already_exists(
1818
+ name=name if to_run[name]["version"] is None else f'{name}__v{to_run[name]["version"]}'
1819
+ )
1820
+ )
1821
+ else:
1822
+ if await name_matches_existing_resource(resource, name, tb_client):
1823
+ if resource == "pipes":
1824
+ click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
1825
+ else:
1826
+ click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
1827
+ else:
1828
+ click.echo(
1829
+ FeedbackManager.warning_name_already_exists(
1830
+ name=(
1831
+ name
1832
+ if to_run[name]["version"] is None
1833
+ else f'{name}__v{to_run[name]["version"]}'
1834
+ )
1835
+ )
1836
+ )
1837
+ else:
1838
+ if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1839
+ if name not in resource_versions:
1840
+ version = ""
1841
+ if name in latest_datasource_versions:
1842
+ version = f"(v{latest_datasource_versions[name]})"
1843
+ click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
1844
+ else:
1845
+ click.echo(
1846
+ FeedbackManager.info_dry_processing_resource(
1847
+ name=name,
1848
+ version=latest_datasource_versions[name],
1849
+ latest_version=resource_versions.get(name),
1850
+ )
1851
+ )
1852
+ else:
1853
+ if await name_matches_existing_resource(resource, name, tb_client):
1854
+ if resource == "pipes":
1855
+ click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
1856
+ else:
1857
+ click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
1858
+ else:
1859
+ click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
1860
+
1861
+ async def push_files(
1862
+ dependency_graph: GraphDependencies,
1863
+ dry_run: bool = False,
1864
+ check_backfill_required: bool = False,
1865
+ ):
1866
+ endpoints_dep_map = dict()
1867
+ processed = set()
1868
+
1869
+ dependencies_graph = dependency_graph.dep_map
1870
+ resources_to_run = dependency_graph.to_run
1871
+
1872
+ if not fork_downstream:
1873
+ # First, we will deploy the all the resources following the dependency graph except for the endpoints
1874
+ groups = [group for group in toposort(dependencies_graph)]
1875
+ for group in groups:
1876
+ for name in group:
1877
+ if name in processed:
1878
+ continue
1879
+
1880
+ if is_endpoint_with_no_dependencies(
1881
+ resources_to_run.get(name, {}),
1882
+ dependencies_graph,
1883
+ resources_to_run,
1884
+ ):
1885
+ endpoints_dep_map[name] = dependencies_graph[name]
1886
+ continue
1887
+
1888
+ await push(
1889
+ name,
1890
+ resources_to_run,
1891
+ resource_versions,
1892
+ latest_datasource_versions,
1893
+ dry_run,
1894
+ fork_downstream,
1895
+ fork,
1896
+ )
1897
+ processed.add(name)
1898
+
1899
+ # Then, we will deploy the endpoints that are on the dependency graph
1900
+ groups = [group for group in toposort(endpoints_dep_map)]
1901
+ for group in groups:
1902
+ for name in group:
1903
+ if name not in processed:
1904
+ await push(
1905
+ name,
1906
+ resources_to_run,
1907
+ resource_versions,
1908
+ latest_datasource_versions,
1909
+ dry_run,
1910
+ fork_downstream,
1911
+ fork,
1912
+ )
1913
+ processed.add(name)
1914
+ else:
1915
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
1916
+ # If we have a graph like this:
1917
+ # A -> B -> C
1918
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
1919
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
1920
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
1921
+ dependency_graph.all_dep_map,
1922
+ dependency_graph.all_resources,
1923
+ resources_to_run,
1924
+ list(dependency_graph.dep_map.keys()),
1925
+ )
1926
+
1927
+ # First, we will deploy the datasources that need to be deployed.
1928
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
1929
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
1930
+ groups.reverse()
1931
+ for group in groups:
1932
+ for name in group:
1933
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
1934
+ continue
1935
+
1936
+ # If the resource is new, we will use the normal resource information to deploy it
1937
+ # This is mostly used for datasources with connections.
1938
+ # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
1939
+ # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
1940
+ if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
1941
+ await push(
1942
+ name,
1943
+ resources_to_run,
1944
+ resource_versions,
1945
+ latest_datasource_versions,
1946
+ dry_run,
1947
+ fork_downstream,
1948
+ fork,
1949
+ )
1950
+ else:
1951
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
1952
+ kafka_connection_name = (
1953
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
1954
+ )
1955
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
1956
+ if release_created and (kafka_connection_name or service):
1957
+ connector = "Kafka" if kafka_connection_name else service
1958
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
1959
+ raise click.ClickException(error_msg)
1960
+
1961
+ # If we are pushing a modified datasource, inform about the backfill``
1962
+ if check_backfill_required and auto_promote and release_created:
1963
+ error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
1964
+ raise click.ClickException(error_msg)
1965
+
1966
+ await push(
1967
+ name,
1968
+ resources_to_run_fork_downstream,
1969
+ resource_versions,
1970
+ latest_datasource_versions,
1971
+ dry_run,
1972
+ fork_downstream,
1973
+ fork,
1974
+ )
1975
+ processed.add(name)
1976
+
1977
+ # Now, we will create a map of all the endpoints and there dependencies
1978
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
1979
+ # But does not include the missing gaps
1980
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
1981
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
1982
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
1983
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
1984
+ for group in groups:
1985
+ for name in group:
1986
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
1987
+ continue
1988
+
1989
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
1990
+
1991
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
1992
+ groups = [group for group in toposort(endpoints_dep_map)]
1993
+
1994
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
1995
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
1996
+ # So we need to reverse the groups
1997
+ groups.reverse()
1998
+ for group in groups:
1999
+ for name in group:
2000
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
2001
+ continue
2002
+
2003
+ await push(
2004
+ name,
2005
+ resources_to_run_fork_downstream,
2006
+ resource_versions,
2007
+ latest_datasource_versions,
2008
+ dry_run,
2009
+ fork_downstream,
2010
+ fork,
2011
+ )
2012
+ processed.add(name)
2013
+
2014
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
2015
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
2016
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2017
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2018
+ for group in groups:
2019
+ for name in group:
2020
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
2021
+ continue
2022
+
2023
+ await push(
2024
+ name,
2025
+ resources_to_run_fork_downstream,
2026
+ resource_versions,
2027
+ latest_datasource_versions,
2028
+ dry_run,
2029
+ fork_downstream,
2030
+ fork,
2031
+ )
2032
+ processed.add(name)
2033
+
2034
+ # Finally, we need to deploy the materialized views from right to left.
2035
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
2036
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2037
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2038
+ for group in groups:
2039
+ for name in group:
2040
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
2041
+ continue
2042
+
2043
+ await push(
2044
+ name,
2045
+ resources_to_run_fork_downstream,
2046
+ resource_versions,
2047
+ latest_datasource_versions,
2048
+ dry_run,
2049
+ fork_downstream,
2050
+ fork,
2051
+ )
2052
+ processed.add(name)
2053
+
2054
+ await push_files(dependencies_graph, dry_run)
2055
+
2056
+ if not dry_run and not run_tests:
2057
+ if upload_fixtures:
2058
+ click.echo(FeedbackManager.info_pushing_fixtures())
2059
+
2060
+ # We need to upload the fixtures even if there is no change
2061
+ if is_branch:
2062
+ filenames = get_project_filenames(folder, with_vendor=True)
2063
+ dependencies_graph = await build_graph(
2064
+ filenames,
2065
+ tb_client,
2066
+ dir_path=folder,
2067
+ resource_versions=latest_datasource_versions,
2068
+ workspace_map=workspace_map,
2069
+ process_dependencies=push_deps,
2070
+ verbose=verbose,
2071
+ workspace_lib_paths=workspace_lib_paths,
2072
+ current_ws=current_ws,
2073
+ )
2074
+
2075
+ processed = set()
2076
+ for group in toposort(dependencies_graph.dep_map):
2077
+ for f in group:
2078
+ name = os.path.basename(f)
2079
+ if name not in processed and name in dependencies_graph.to_run:
2080
+ await check_fixtures_data(
2081
+ tb_client,
2082
+ dependencies_graph.to_run[name],
2083
+ debug,
2084
+ folder,
2085
+ force,
2086
+ mode="append" if is_branch else "replace",
2087
+ )
2088
+ processed.add(name)
2089
+ for f in dependencies_graph.to_run:
2090
+ if f not in processed:
2091
+ await check_fixtures_data(
2092
+ tb_client,
2093
+ dependencies_graph.to_run[f],
2094
+ debug,
2095
+ folder,
2096
+ force,
2097
+ mode="append" if is_branch else "replace",
2098
+ )
2099
+ else:
2100
+ if verbose:
2101
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
2102
+
2103
+ return dependencies_graph.to_run