tinybird 0.0.1.dev18__py3-none-any.whl → 0.0.1.dev20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (49) hide show
  1. tinybird/client.py +24 -4
  2. tinybird/config.py +1 -1
  3. tinybird/feedback_manager.py +8 -30
  4. tinybird/tb/__cli__.py +8 -0
  5. tinybird/tb/modules/auth.py +1 -1
  6. tinybird/tb/modules/build.py +26 -80
  7. tinybird/tb/modules/cicd.py +1 -1
  8. tinybird/tb/modules/cli.py +11 -837
  9. tinybird/tb/modules/common.py +1 -55
  10. tinybird/tb/modules/connection.py +1 -1
  11. tinybird/tb/modules/create.py +18 -7
  12. tinybird/tb/modules/datafile/build.py +142 -971
  13. tinybird/tb/modules/datafile/build_common.py +1 -1
  14. tinybird/tb/modules/datafile/build_datasource.py +1 -1
  15. tinybird/tb/modules/datafile/build_pipe.py +1 -1
  16. tinybird/tb/modules/datafile/common.py +12 -11
  17. tinybird/tb/modules/datafile/diff.py +1 -1
  18. tinybird/tb/modules/datafile/fixture.py +1 -1
  19. tinybird/tb/modules/datafile/format_common.py +0 -7
  20. tinybird/tb/modules/datafile/format_datasource.py +0 -2
  21. tinybird/tb/modules/datafile/format_pipe.py +0 -2
  22. tinybird/tb/modules/datafile/parse_datasource.py +1 -1
  23. tinybird/tb/modules/datafile/parse_pipe.py +1 -1
  24. tinybird/tb/modules/datafile/pull.py +1 -1
  25. tinybird/tb/modules/datasource.py +4 -75
  26. tinybird/tb/modules/feedback_manager.py +1048 -0
  27. tinybird/tb/modules/fmt.py +1 -1
  28. tinybird/tb/modules/job.py +1 -1
  29. tinybird/tb/modules/llm.py +6 -4
  30. tinybird/tb/modules/local.py +26 -21
  31. tinybird/tb/modules/local_common.py +2 -1
  32. tinybird/tb/modules/login.py +18 -11
  33. tinybird/tb/modules/mock.py +18 -7
  34. tinybird/tb/modules/pipe.py +4 -126
  35. tinybird/tb/modules/{build_shell.py → shell.py} +66 -36
  36. tinybird/tb/modules/table.py +88 -5
  37. tinybird/tb/modules/tag.py +2 -2
  38. tinybird/tb/modules/test.py +45 -29
  39. tinybird/tb/modules/tinyunit/tinyunit.py +1 -1
  40. tinybird/tb/modules/token.py +2 -2
  41. tinybird/tb/modules/watch.py +72 -0
  42. tinybird/tb/modules/workspace.py +1 -1
  43. tinybird/tb/modules/workspace_members.py +1 -1
  44. {tinybird-0.0.1.dev18.dist-info → tinybird-0.0.1.dev20.dist-info}/METADATA +1 -1
  45. tinybird-0.0.1.dev20.dist-info/RECORD +76 -0
  46. tinybird-0.0.1.dev18.dist-info/RECORD +0 -73
  47. {tinybird-0.0.1.dev18.dist-info → tinybird-0.0.1.dev20.dist-info}/WHEEL +0 -0
  48. {tinybird-0.0.1.dev18.dist-info → tinybird-0.0.1.dev20.dist-info}/entry_points.txt +0 -0
  49. {tinybird-0.0.1.dev18.dist-info → tinybird-0.0.1.dev20.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@ import datetime
2
2
  import os
3
3
  import os.path
4
4
  import re
5
- import sys
6
5
  import urllib
7
6
  from copy import deepcopy
8
7
  from dataclasses import dataclass
@@ -13,7 +12,6 @@ import click
13
12
  from toposort import toposort
14
13
 
15
14
  from tinybird.client import TinyB
16
- from tinybird.feedback_manager import FeedbackManager
17
15
  from tinybird.sql import parse_table_structure, schema_to_sql_columns
18
16
  from tinybird.sql_template import get_used_tables_in_template, render_sql_template
19
17
  from tinybird.tb.modules.common import get_ca_pem_content
@@ -46,6 +44,7 @@ from tinybird.tb.modules.datafile.common import (
46
44
  from tinybird.tb.modules.datafile.exceptions import AlreadyExistsException, IncludeFileNotFoundException
47
45
  from tinybird.tb.modules.datafile.parse_datasource import parse_datasource
48
46
  from tinybird.tb.modules.datafile.parse_pipe import parse_pipe
47
+ from tinybird.tb.modules.feedback_manager import FeedbackManager
49
48
 
50
49
 
51
50
  async def folder_build(
@@ -57,14 +56,12 @@ async def folder_build(
57
56
  is_vendor: bool = False,
58
57
  current_ws: Optional[Dict[str, Any]] = None,
59
58
  local_ws: Optional[Dict[str, Any]] = None,
60
- workspaces: Optional[List[Dict[str, Any]]] = None,
61
59
  watch: bool = False,
62
60
  ):
63
61
  config = CLIConfig.get_project_config()
64
62
  build = True
65
63
  dry_run = False
66
64
  force = True
67
- push_deps = True
68
65
  only_changes = True
69
66
  debug = False
70
67
  check = True
@@ -72,18 +69,11 @@ async def folder_build(
72
69
  populate_subset = None
73
70
  populate_condition = None
74
71
  tests_to_run = 0
75
- tests_failfast = True
76
72
  override_datasource = False
77
- tests_check_requests_from_branch = False
78
73
  skip_confirmation = True
79
74
  wait = False
80
75
  unlink_on_populate_error = False
81
- upload_fixtures = False
82
76
  only_response_times = False
83
- workspace_map: Dict[str, Any] = {}
84
- tests_sample_by_params = 1
85
- tests_ignore_order = False
86
- tests_validate_processed_bytes = False
87
77
  run_tests = False
88
78
  verbose = False
89
79
  as_standard = False
@@ -91,8 +81,6 @@ async def folder_build(
91
81
  fork_downstream = True
92
82
  fork = False
93
83
  release_created = False
94
- auto_promote = False
95
- hide_folders = False
96
84
  tests_relative_change = 0.01
97
85
  tests_sample_by_params = 0
98
86
  tests_filter_by = None
@@ -100,12 +88,8 @@ async def folder_build(
100
88
  tests_ignore_order = False
101
89
  tests_validate_processed_bytes = False
102
90
  tests_check_requests_from_branch = False
103
- git_release = False
104
- workspace_lib_paths = []
91
+ vendor_paths = []
105
92
 
106
- workspace_lib_paths = list(workspace_lib_paths)
107
- # include vendor libs without overriding user ones
108
- existing_workspaces = set(x[1] for x in workspace_lib_paths)
109
93
  vendor_path = Path("vendor")
110
94
  user_token = config.get_user_token()
111
95
  user_client = deepcopy(tb_client)
@@ -118,7 +102,7 @@ async def folder_build(
118
102
  if vendor_path.exists() and not is_vendor and not watch:
119
103
  user_workspaces = await user_client.user_workspaces()
120
104
  for x in vendor_path.iterdir():
121
- if x.is_dir() and x.name not in existing_workspaces:
105
+ if x.is_dir() and x.name:
122
106
  if user_token:
123
107
  try:
124
108
  ws_to_delete = next((ws for ws in user_workspaces["workspaces"] if ws["name"] == x.name), None)
@@ -128,7 +112,7 @@ async def folder_build(
128
112
  pass
129
113
  vendor_ws = await user_client.create_workspace(x.name, template=None)
130
114
  vendor_workspaces.append(vendor_ws)
131
- workspace_lib_paths.append((x.name, x))
115
+ vendor_paths.append((x.name, str(x)))
132
116
 
133
117
  workspaces: List[Dict[str, Any]] = (await user_client.user_workspaces()).get("workspaces", [])
134
118
 
@@ -137,7 +121,7 @@ async def folder_build(
137
121
  local_ws_id = local_workspace.get("id")
138
122
  local_ws = next((ws for ws in workspaces if ws["id"] == local_ws_id), {})
139
123
 
140
- current_ws: Dict[str, Any] = current_ws or local_ws
124
+ current_ws = current_ws or local_ws
141
125
 
142
126
  for vendor_ws in [ws for ws in workspaces if ws["name"] in [ws["name"] for ws in vendor_workspaces]]:
143
127
  ws_client = deepcopy(tb_client)
@@ -153,21 +137,11 @@ async def folder_build(
153
137
  pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
154
138
 
155
139
  existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
156
- # replace workspace mapping names
157
- for old_ws, new_ws in workspace_map.items():
158
- existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
159
-
160
140
  remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
161
141
 
162
- # replace workspace mapping names
163
- for old_ws, new_ws in workspace_map.items():
164
- remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
165
-
166
142
  if not filenames:
167
143
  filenames = get_project_filenames(folder)
168
144
 
169
- changed = None
170
-
171
145
  # build graph to get new versions for all the files involved in the query
172
146
  # dependencies need to be processed always to get the versions
173
147
  dependencies_graph = await build_graph(
@@ -175,32 +149,9 @@ async def folder_build(
175
149
  tb_client,
176
150
  dir_path=folder,
177
151
  process_dependencies=True,
178
- workspace_map=workspace_map,
179
152
  skip_connectors=True,
180
- workspace_lib_paths=workspace_lib_paths,
153
+ vendor_paths=vendor_paths,
181
154
  current_ws=current_ws,
182
- changed=changed,
183
- only_changes=only_changes,
184
- fork_downstream=fork_downstream,
185
- is_internal=is_internal,
186
- build=build,
187
- )
188
-
189
- resource_versions = {}
190
- latest_datasource_versions = {}
191
-
192
- # If we have datasources using VERSION, let's try to get the latest version
193
- dependencies_graph = await build_graph(
194
- filenames,
195
- tb_client,
196
- dir_path=folder,
197
- resource_versions=latest_datasource_versions,
198
- workspace_map=workspace_map,
199
- process_dependencies=push_deps,
200
- verbose=verbose,
201
- workspace_lib_paths=workspace_lib_paths,
202
- current_ws=current_ws,
203
- changed=changed,
204
155
  only_changes=only_changes,
205
156
  fork_downstream=fork_downstream,
206
157
  is_internal=is_internal,
@@ -213,7 +164,6 @@ async def folder_build(
213
164
  def should_push_file(
214
165
  name: str,
215
166
  remote_resource_names: List[str],
216
- latest_datasource_versions: Dict[str, Any],
217
167
  force: bool,
218
168
  run_tests: bool,
219
169
  ) -> bool:
@@ -223,9 +173,7 @@ async def folder_build(
223
173
  if name not in remote_resource_names:
224
174
  return True
225
175
  # When we need to try to push a file when it doesn't exist and the version is different that the existing one
226
- resource_full_name = (
227
- f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
228
- )
176
+ resource_full_name = name
229
177
  if resource_full_name not in existing_resources:
230
178
  return True
231
179
  if force or run_tests:
@@ -235,8 +183,6 @@ async def folder_build(
235
183
  async def push(
236
184
  name: str,
237
185
  to_run: Dict[str, Dict[str, Any]],
238
- resource_versions: Dict[str, Any],
239
- latest_datasource_versions: Dict[str, Any],
240
186
  dry_run: bool,
241
187
  fork_downstream: Optional[bool] = False,
242
188
  fork: Optional[bool] = False,
@@ -244,20 +190,8 @@ async def folder_build(
244
190
  if name in to_run:
245
191
  resource = to_run[name]["resource"]
246
192
  if not dry_run:
247
- if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
248
- if name not in resource_versions:
249
- version = ""
250
- if name in latest_datasource_versions:
251
- version = f"(v{latest_datasource_versions[name]})"
252
- click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
253
- else:
254
- click.echo(
255
- FeedbackManager.info_processing_resource(
256
- name=name,
257
- version=latest_datasource_versions[name],
258
- latest_version=resource_versions.get(name),
259
- )
260
- )
193
+ if should_push_file(name, remote_resource_names, force, run_tests):
194
+ click.echo(FeedbackManager.info_processing_new_resource(name=name, version=""))
261
195
  try:
262
196
  await exec_file(
263
197
  to_run[name],
@@ -289,7 +223,6 @@ async def folder_build(
289
223
  local_ws,
290
224
  fork_downstream,
291
225
  fork,
292
- git_release,
293
226
  build,
294
227
  is_vendor,
295
228
  )
@@ -304,9 +237,7 @@ async def folder_build(
304
237
  )
305
238
  )
306
239
  except Exception as e:
307
- filename = (
308
- os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
309
- )
240
+ filename = to_run[name]["filename"]
310
241
  exception = FeedbackManager.error_push_file_exception(
311
242
  filename=filename,
312
243
  error=e,
@@ -336,26 +267,9 @@ async def folder_build(
336
267
  )
337
268
  )
338
269
  else:
339
- if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
340
- if name not in resource_versions:
341
- version = ""
342
- if name in latest_datasource_versions:
343
- version = f"(v{latest_datasource_versions[name]})"
344
- if build:
345
- extension = "pipe" if resource == "pipes" else "datasource"
346
- click.echo(
347
- FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=version)
348
- )
349
- else:
350
- click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
351
- else:
352
- click.echo(
353
- FeedbackManager.info_dry_processing_resource(
354
- name=name,
355
- version=latest_datasource_versions[name],
356
- latest_version=resource_versions.get(name),
357
- )
358
- )
270
+ if should_push_file(name, remote_resource_names, force, run_tests):
271
+ extension = "pipe" if resource == "pipes" else "datasource"
272
+ click.echo(FeedbackManager.info_building_resource(name=f"{name}.{extension}", version=""))
359
273
  else:
360
274
  if await name_matches_existing_resource(resource, name, tb_client):
361
275
  if resource == "pipes":
@@ -368,317 +282,143 @@ async def folder_build(
368
282
  async def push_files(
369
283
  dependency_graph: GraphDependencies,
370
284
  dry_run: bool = False,
371
- check_backfill_required: bool = False,
372
285
  ):
373
286
  endpoints_dep_map = dict()
374
287
  processed = set()
375
288
 
376
- dependencies_graph = dependency_graph.dep_map
377
289
  resources_to_run = dependency_graph.to_run
378
290
 
379
- if not fork_downstream:
380
- # First, we will deploy the all the resources following the dependency graph except for the endpoints
381
- groups = [group for group in toposort(dependencies_graph)]
382
- for group in groups:
383
- for name in group:
384
- if name in processed:
385
- continue
386
-
387
- if is_endpoint_with_no_dependencies(
388
- resources_to_run.get(name, {}),
389
- dependencies_graph,
390
- resources_to_run,
391
- ):
392
- endpoints_dep_map[name] = dependencies_graph[name]
393
- continue
394
-
395
- await push(
396
- name,
397
- resources_to_run,
398
- resource_versions,
399
- latest_datasource_versions,
400
- dry_run,
401
- fork_downstream,
402
- fork,
403
- )
404
- processed.add(name)
405
-
406
- # Then, we will deploy the endpoints that are on the dependency graph
407
- groups = [group for group in toposort(endpoints_dep_map)]
408
- for group in groups:
409
- for name in group:
410
- if name not in processed:
411
- await push(
412
- name,
413
- resources_to_run,
414
- resource_versions,
415
- latest_datasource_versions,
416
- dry_run,
417
- fork_downstream,
418
- fork,
419
- )
420
- processed.add(name)
421
- else:
422
- # This will generate the graph from right to left and will fill the gaps of the dependencies
423
- # If we have a graph like this:
424
- # A -> B -> C
425
- # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
426
- # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
427
- dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
428
- dependency_graph.all_dep_map,
429
- dependency_graph.all_resources,
430
- resources_to_run,
431
- list(dependency_graph.dep_map.keys()),
432
- )
291
+ # This will generate the graph from right to left and will fill the gaps of the dependencies
292
+ # If we have a graph like this:
293
+ # A -> B -> C
294
+ # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
295
+ # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
296
+ dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
297
+ dependency_graph.all_dep_map,
298
+ dependency_graph.all_resources,
299
+ resources_to_run,
300
+ list(dependency_graph.dep_map.keys()),
301
+ )
433
302
 
434
- # First, we will deploy the datasources that need to be deployed.
435
- # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
436
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
303
+ # First, we will deploy the datasources that need to be deployed.
304
+ # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
305
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
437
306
 
438
- for group in groups:
439
- for name in group:
440
- is_vendor = resources_to_run_fork_downstream.get(name, {}).get("filename", "").startswith("vendor/")
441
- if not is_vendor:
442
- try:
443
- await tb_client.datasource_delete(name, force=True)
444
- except Exception:
445
- pass
446
- try:
447
- await tb_client.pipe_delete(name)
448
- except Exception:
449
- pass
307
+ for group in groups:
308
+ for name in group:
309
+ is_vendor = resources_to_run_fork_downstream.get(name, {}).get("filename", "").startswith("vendor/")
310
+ if not is_vendor:
311
+ try:
312
+ await tb_client.datasource_delete(name, force=True)
313
+ except Exception:
314
+ pass
315
+ try:
316
+ await tb_client.pipe_delete(name)
317
+ except Exception:
318
+ pass
450
319
 
451
- groups.reverse()
452
- for group in groups:
453
- for name in group:
454
- if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
455
- continue
456
-
457
- # If the resource is new, we will use the normal resource information to deploy it
458
- # This is mostly used for datasources with connections.
459
- # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
460
- # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
461
- if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
462
- await push(
463
- name,
464
- resources_to_run,
465
- resource_versions,
466
- latest_datasource_versions,
467
- dry_run,
468
- fork_downstream,
469
- fork,
470
- )
471
- else:
472
- # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
473
- kafka_connection_name = (
474
- resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
475
- )
476
- service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
477
- if release_created and (kafka_connection_name or service):
478
- connector = "Kafka" if kafka_connection_name else service
479
- error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
480
- raise click.ClickException(error_msg)
481
-
482
- # If we are pushing a modified datasource, inform about the backfill``
483
- if check_backfill_required and auto_promote and release_created:
484
- error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
485
- raise click.ClickException(error_msg)
486
-
487
- await push(
488
- name,
489
- resources_to_run_fork_downstream,
490
- resource_versions,
491
- latest_datasource_versions,
492
- dry_run,
493
- fork_downstream,
494
- fork,
495
- )
496
- processed.add(name)
497
-
498
- # Now, we will create a map of all the endpoints and there dependencies
499
- # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
500
- # But does not include the missing gaps
501
- # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
502
- # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
503
- # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
504
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
505
- for group in groups:
506
- for name in group:
507
- if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
508
- continue
509
-
510
- endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
511
-
512
- # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
513
- groups = [group for group in toposort(endpoints_dep_map)]
514
-
515
- # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
516
- # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
517
- # So we need to reverse the groups
518
- groups.reverse()
519
- for group in groups:
520
- for name in group:
521
- if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
522
- continue
523
-
524
- await push(
525
- name,
526
- resources_to_run_fork_downstream,
527
- resource_versions,
528
- latest_datasource_versions,
529
- dry_run,
530
- fork_downstream,
531
- fork,
532
- )
533
- processed.add(name)
534
-
535
- # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
536
- # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
537
- # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
538
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
539
- for group in groups:
540
- for name in group:
541
- if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
542
- continue
543
-
544
- await push(
545
- name,
546
- resources_to_run_fork_downstream,
547
- resource_versions,
548
- latest_datasource_versions,
549
- dry_run,
550
- fork_downstream,
551
- fork,
552
- )
553
- processed.add(name)
554
-
555
- # Finally, we need to deploy the materialized views from right to left.
556
- # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
557
- # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
558
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
559
- for group in groups:
560
- for name in group:
561
- if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
562
- continue
563
-
564
- await push(
565
- name,
566
- resources_to_run_fork_downstream,
567
- resource_versions,
568
- latest_datasource_versions,
569
- dry_run,
570
- fork_downstream,
571
- fork,
572
- )
573
- processed.add(name)
320
+ groups.reverse()
321
+ for group in groups:
322
+ for name in group:
323
+ if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
324
+ continue
574
325
 
575
- await push_files(dependencies_graph, dry_run)
326
+ # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
327
+ kafka_connection_name = (
328
+ resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
329
+ )
330
+ service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
331
+ if release_created and (kafka_connection_name or service):
332
+ connector = "Kafka" if kafka_connection_name else service
333
+ error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
334
+ raise click.ClickException(error_msg)
335
+
336
+ await push(
337
+ name,
338
+ resources_to_run_fork_downstream,
339
+ dry_run,
340
+ fork_downstream,
341
+ fork,
342
+ )
343
+ processed.add(name)
576
344
 
577
- if not dry_run and not run_tests:
578
- if upload_fixtures:
579
- click.echo(FeedbackManager.info_pushing_fixtures())
345
+ # Now, we will create a map of all the endpoints and there dependencies
346
+ # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
347
+ # But does not include the missing gaps
348
+ # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
349
+ # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
350
+ # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
351
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
352
+ for group in groups:
353
+ for name in group:
354
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
355
+ continue
580
356
 
581
- processed = set()
582
- for group in toposort(dependencies_graph.dep_map):
583
- for f in group:
584
- name = os.path.basename(f)
585
- if name not in processed and name in dependencies_graph.to_run:
586
- await check_fixtures_data(
587
- tb_client,
588
- dependencies_graph.to_run[name],
589
- debug,
590
- folder,
591
- force,
592
- mode="replace",
593
- )
594
- processed.add(name)
595
- for f in dependencies_graph.to_run:
596
- if f not in processed:
597
- await check_fixtures_data(
598
- tb_client,
599
- dependencies_graph.to_run[f],
600
- debug,
601
- folder,
602
- force,
603
- mode="replace",
604
- )
605
- else:
606
- if verbose:
607
- click.echo(FeedbackManager.info_not_pushing_fixtures())
357
+ endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
608
358
 
609
- return dependencies_graph.to_run
359
+ # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
360
+ groups = [group for group in toposort(endpoints_dep_map)]
610
361
 
362
+ # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
363
+ # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
364
+ # So we need to reverse the groups
365
+ groups.reverse()
366
+ for group in groups:
367
+ for name in group:
368
+ if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
369
+ continue
611
370
 
612
- async def check_fixtures_data(
613
- client: TinyB, resource: Dict[str, Any], debug: bool, folder: str = "", force: bool = False, mode: str = "replace"
614
- ):
615
- if debug:
616
- click.echo(FeedbackManager.info_checking_file(file=pp.pformat(resource)))
617
- if resource["resource"] in ["pipes", "tokens"]:
618
- pass
619
- elif resource["resource"] == "datasources":
620
- datasource_name = resource["params"]["name"]
621
- name = os.path.basename(resource["filename"]).rsplit(".", 1)[0]
622
- fixture_path = Path(folder) / "fixtures" / f"{name}.csv"
623
-
624
- if not fixture_path.exists():
625
- fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.csv"
626
- if not fixture_path.exists():
627
- fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.ndjson"
628
- if not fixture_path.exists():
629
- fixture_path = Path(folder) / "datasources" / "fixtures" / f"{name}.parquet"
630
- if fixture_path.exists():
631
- # Let's validate only when when we are going to replace the actual data
632
- result = await client.query(sql=f"SELECT count() as c FROM {datasource_name} FORMAT JSON")
633
- count = result["data"][0]["c"]
634
-
635
- if count > 0 and not force:
636
- raise click.ClickException(
637
- FeedbackManager.error_push_fixture_will_replace_data(datasource=datasource_name)
371
+ await push(
372
+ name,
373
+ resources_to_run_fork_downstream,
374
+ dry_run,
375
+ fork_downstream,
376
+ fork,
638
377
  )
378
+ processed.add(name)
639
379
 
640
- click.echo(
641
- FeedbackManager.info_checking_file_size(
642
- filename=resource["filename"], size=sizeof_fmt(os.stat(fixture_path).st_size)
643
- )
644
- )
645
- sys.stdout.flush()
646
- try:
647
- await client.datasource_append_data(
648
- datasource_name=resource["params"]["name"],
649
- file=fixture_path,
650
- mode=mode,
651
- format=fixture_path.suffix[1:],
652
- )
653
- click.echo(FeedbackManager.success_processing_data())
654
- except Exception as e:
655
- raise click.ClickException(FeedbackManager.error_processing_blocks(error=e))
380
+ # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
381
+ # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
382
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
383
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
384
+ for group in groups:
385
+ for name in group:
386
+ if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
387
+ continue
656
388
 
657
- else:
658
- click.echo(FeedbackManager.warning_fixture_not_found(datasource_name=name))
659
- else:
660
- raise click.ClickException(FeedbackManager.error_unknown_resource(resource=resource["resource"]))
389
+ await push(
390
+ name,
391
+ resources_to_run_fork_downstream,
392
+ dry_run,
393
+ fork_downstream,
394
+ fork,
395
+ )
396
+ processed.add(name)
661
397
 
398
+ # Finally, we need to deploy the materialized views from right to left.
399
+ # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
400
+ # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
401
+ groups = [group for group in toposort(dependencies_graph_fork_downstream)]
402
+ for group in groups:
403
+ for name in group:
404
+ if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
405
+ continue
662
406
 
663
- def is_new(
664
- name: str,
665
- changed: Dict[str, str],
666
- normal_dependency: Dict[str, Set[str]],
667
- fork_downstream_dependency: Dict[str, Set[str]],
668
- ) -> bool:
669
- def is_git_new(name: str):
670
- return changed and changed.get(name) == "A"
407
+ await push(
408
+ name,
409
+ resources_to_run_fork_downstream,
410
+ dry_run,
411
+ fork_downstream,
412
+ fork,
413
+ )
414
+ processed.add(name)
671
415
 
672
- if not is_git_new(name):
673
- return False
416
+ await push_files(dependencies_graph, dry_run)
674
417
 
675
- # if should not depend on a changed resource
676
- if back_deps := normal_dependency.get(name):
677
- for dep in back_deps:
678
- if dep in fork_downstream_dependency and not is_git_new(dep):
679
- return False
418
+ if not dry_run and not run_tests and verbose:
419
+ click.echo(FeedbackManager.info_not_pushing_fixtures())
680
420
 
681
- return True
421
+ return dependencies_graph.to_run
682
422
 
683
423
 
684
424
  async def name_matches_existing_resource(resource: str, name: str, tb_client: TinyB):
@@ -723,7 +463,6 @@ async def exec_file(
723
463
  local_ws: Optional[Dict[str, Any]] = None,
724
464
  fork_downstream: Optional[bool] = False,
725
465
  fork: Optional[bool] = False,
726
- git_release: Optional[bool] = False,
727
466
  build: Optional[bool] = False,
728
467
  is_vendor: Optional[bool] = False,
729
468
  ):
@@ -954,12 +693,10 @@ async def build_graph(
954
693
  filenames: Iterable[str],
955
694
  tb_client: TinyB,
956
695
  dir_path: Optional[str] = None,
957
- resource_versions=None,
958
- workspace_map: Optional[Dict] = None,
959
696
  process_dependencies: bool = False,
960
697
  verbose: bool = False,
961
698
  skip_connectors: bool = False,
962
- workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
699
+ vendor_paths: Optional[List[Tuple[str, str]]] = None,
963
700
  current_ws: Optional[Dict[str, Any]] = None,
964
701
  changed: Optional[Dict[str, Any]] = None,
965
702
  only_changes: bool = False,
@@ -975,8 +712,6 @@ async def build_graph(
975
712
  deps: List[str] = []
976
713
  dep_map: Dict[str, Any] = {}
977
714
  embedded_datasources = {}
978
- if not workspace_map:
979
- workspace_map = {}
980
715
 
981
716
  # These dictionaries are used to store all the resources and there dependencies for the whole project
982
717
  # This is used for the downstream dependency graph
@@ -994,10 +729,8 @@ async def build_graph(
994
729
  tb_client,
995
730
  dir_path=dir_path,
996
731
  process_dependencies=True,
997
- resource_versions=resource_versions,
998
- workspace_map=workspace_map,
999
732
  skip_connectors=True,
1000
- workspace_lib_paths=workspace_lib_paths,
733
+ vendor_paths=vendor_paths,
1001
734
  current_ws=current_ws,
1002
735
  changed=None,
1003
736
  only_changes=False,
@@ -1012,7 +745,7 @@ async def build_graph(
1012
745
  deps: List[str],
1013
746
  dep_map: Dict[str, Any],
1014
747
  to_run: Dict[str, Any],
1015
- workspace_lib_paths: Optional[List[Tuple[str, str]]],
748
+ vendor_paths: Optional[List[Tuple[str, str]]],
1016
749
  ):
1017
750
  name, kind = filename.rsplit(".", 1)
1018
751
  warnings = []
@@ -1021,10 +754,7 @@ async def build_graph(
1021
754
  res = await process_file(
1022
755
  filename,
1023
756
  tb_client,
1024
- resource_versions=resource_versions,
1025
757
  skip_connectors=skip_connectors,
1026
- workspace_map=workspace_map,
1027
- workspace_lib_paths=workspace_lib_paths,
1028
758
  current_ws=current_ws,
1029
759
  )
1030
760
  except click.ClickException as e:
@@ -1058,7 +788,7 @@ async def build_graph(
1058
788
  dep_list = []
1059
789
  for x in file_deps:
1060
790
  if x not in INTERNAL_TABLES or is_internal:
1061
- f, ds = find_file_by_name(dir_path, x, verbose, workspace_lib_paths=workspace_lib_paths, resource=r)
791
+ f, ds = find_file_by_name(dir_path, x, verbose, vendor_paths=vendor_paths, resource=r)
1062
792
  if f:
1063
793
  dep_list.append(f.rsplit(".", 1)[0])
1064
794
  if ds:
@@ -1077,17 +807,6 @@ async def build_graph(
1077
807
  if e_ds:
1078
808
  dep_list.append(e_ds["resource_name"])
1079
809
 
1080
- # In case the datasource is to be shared and we have mapping, let's replace the name
1081
- if "shared_with" in r and workspace_map:
1082
- mapped_workspaces: List[str] = []
1083
- for shared_with in r["shared_with"]:
1084
- mapped_workspaces.append(
1085
- workspace_map.get(shared_with)
1086
- if workspace_map.get(shared_with, None) is not None
1087
- else shared_with # type: ignore
1088
- )
1089
- r["shared_with"] = mapped_workspaces
1090
-
1091
810
  dep_map[resource_name] = set(dep_list)
1092
811
  return os.path.basename(name), warnings
1093
812
 
@@ -1109,7 +828,7 @@ async def build_graph(
1109
828
  if ".incl" in filename:
1110
829
  click.echo(FeedbackManager.warning_skipping_include_file(file=filename))
1111
830
 
1112
- name, warnings = await process(filename, deps, dep_map, to_run, workspace_lib_paths)
831
+ name, warnings = await process(filename, deps, dep_map, to_run, vendor_paths)
1113
832
  processed.add(name)
1114
833
 
1115
834
  if verbose:
@@ -1147,7 +866,7 @@ async def build_graph(
1147
866
  dep = deps.pop()
1148
867
  if dep not in processed:
1149
868
  processed.add(dep)
1150
- f = full_path_by_name(dir_path, dep, workspace_lib_paths)
869
+ f = full_path_by_name(dir_path, dep, vendor_paths)
1151
870
  if f:
1152
871
  if verbose:
1153
872
  try:
@@ -1159,7 +878,7 @@ async def build_graph(
1159
878
  click.echo(FeedbackManager.info_skipping_resource(resource=processed_filename))
1160
879
  continue
1161
880
  click.echo(FeedbackManager.info_processing_file(filename=processed_filename))
1162
- await process(str(f), deps, dep_map, to_run, workspace_lib_paths)
881
+ await process(str(f), deps, dep_map, to_run, vendor_paths)
1163
882
 
1164
883
  return GraphDependencies(dep_map, to_run, all_dep_map, all_resources)
1165
884
 
@@ -1167,21 +886,12 @@ async def build_graph(
1167
886
  async def process_file(
1168
887
  filename: str,
1169
888
  tb_client: TinyB,
1170
- resource_versions: Optional[Dict] = None,
1171
889
  skip_connectors: bool = False,
1172
- workspace_map: Optional[Dict] = None,
1173
- workspace_lib_paths: Optional[List[Tuple[str, str]]] = None,
1174
890
  current_ws: Optional[Dict[str, Any]] = None,
1175
891
  ) -> List[Dict[str, Any]]:
1176
892
  """Returns a list of resources
1177
893
 
1178
894
  For both datasources and pipes, a list of just one item is returned"""
1179
- if workspace_map is None:
1180
- workspace_map = {}
1181
-
1182
- if resource_versions is None:
1183
- resource_versions = {}
1184
- resource_versions_string = {k: f"__v{v}" for k, v in resource_versions.items() if v >= 0}
1185
895
 
1186
896
  def get_engine_params(node: Dict[str, Any]) -> Dict[str, Any]:
1187
897
  params = {}
@@ -1284,7 +994,7 @@ async def process_file(
1284
994
 
1285
995
  if import_from_timestamp := params.get("import_from_timestamp", None):
1286
996
  try:
1287
- str(datetime.fromisoformat(import_from_timestamp).isoformat())
997
+ str(datetime.datetime.fromisoformat(import_from_timestamp).isoformat())
1288
998
  except ValueError:
1289
999
  raise click.ClickException(
1290
1000
  FeedbackManager.error_invalid_import_from_timestamp(datasource=datasource["name"])
@@ -1322,7 +1032,7 @@ async def process_file(
1322
1032
  # materialized columns expressions could have joins so we need to add them as a dep
1323
1033
  deps += tables
1324
1034
  # generate replacements and replace the query
1325
- replacements = {t: t + resource_versions_string.get(t, "") for t in tables}
1035
+ replacements = {t: t for t in tables}
1326
1036
 
1327
1037
  replaced_results = await tb_client.replace_tables(q, replacements)
1328
1038
  x["default_value"] = replaced_results.replace("SELECT", "materialized", 1)
@@ -1333,15 +1043,6 @@ async def process_file(
1333
1043
 
1334
1044
  name = os.path.basename(filename).rsplit(".", 1)[0]
1335
1045
 
1336
- if workspace_lib_paths:
1337
- for wk_name, wk_path in workspace_lib_paths:
1338
- try:
1339
- Path(filename).relative_to(wk_path)
1340
- name = f"{workspace_map.get(wk_name, wk_name)}.{name}"
1341
- except ValueError:
1342
- # the path was not relative, not inside workspace
1343
- pass
1344
-
1345
1046
  version = f"__v{doc.version}" if doc.version is not None else ""
1346
1047
 
1347
1048
  def append_version_to_name(name: str, version: str) -> str:
@@ -1523,25 +1224,16 @@ async def process_file(
1523
1224
  raise ValueError("Defining ENGINE options in a node requires a DATASOURCE")
1524
1225
 
1525
1226
  if "datasource" in node:
1526
- params["datasource"] = node["datasource"] + resource_versions_string.get(node["datasource"], "")
1227
+ params["datasource"] = node["datasource"]
1527
1228
  deps += [node["datasource"]]
1528
1229
 
1529
1230
  if "target_datasource" in node:
1530
- params["target_datasource"] = node["target_datasource"] + resource_versions_string.get(
1531
- node["target_datasource"], ""
1532
- )
1231
+ params["target_datasource"] = node["target_datasource"]
1533
1232
  deps += [node["target_datasource"]]
1534
1233
 
1535
1234
  params.update(get_engine_params(node))
1536
1235
 
1537
- def create_replacement_for_resource(name: str) -> str:
1538
- for old_ws, new_ws in workspace_map.items():
1539
- name = name.replace(f"{old_ws}.", f"{new_ws}.")
1540
- return name + resource_versions_string.get(name, "")
1541
-
1542
- replacements = {
1543
- x: create_replacement_for_resource(x) for x in deps if x not in [n["name"] for n in doc.nodes]
1544
- }
1236
+ replacements = {x: x for x in deps if x not in [n["name"] for n in doc.nodes]}
1545
1237
 
1546
1238
  # FIXME: Ideally we should use await tb_client.replace_tables(sql, replacements)
1547
1239
  for old, new in replacements.items():
@@ -1593,9 +1285,7 @@ def sizeof_fmt(num: Union[int, float], suffix: str = "b") -> str:
1593
1285
  return "%.1f%s%s" % (num, "Yi", suffix)
1594
1286
 
1595
1287
 
1596
- def full_path_by_name(
1597
- folder: str, name: str, workspace_lib_paths: Optional[List[Tuple[str, str]]] = None
1598
- ) -> Optional[Path]:
1288
+ def full_path_by_name(folder: str, name: str, vendor_paths: Optional[List[Tuple[str, str]]] = None) -> Optional[Path]:
1599
1289
  f = Path(folder)
1600
1290
  ds = name + ".datasource"
1601
1291
  if os.path.isfile(os.path.join(folder, ds)):
@@ -1625,529 +1315,10 @@ def full_path_by_name(
1625
1315
  if os.path.isfile(f / "materializations" / pipe):
1626
1316
  return f / "materializations" / pipe
1627
1317
 
1628
- if workspace_lib_paths:
1629
- for wk_name, wk_path in workspace_lib_paths:
1318
+ if vendor_paths:
1319
+ for wk_name, wk_path in vendor_paths:
1630
1320
  if name.startswith(f"{wk_name}."):
1631
1321
  r = full_path_by_name(wk_path, name.replace(f"{wk_name}.", ""))
1632
1322
  if r:
1633
1323
  return r
1634
1324
  return None
1635
-
1636
-
1637
- async def folder_push(
1638
- tb_client: TinyB,
1639
- filenames: Optional[List[str]] = None,
1640
- dry_run: bool = False,
1641
- check: bool = False,
1642
- push_deps: bool = False,
1643
- only_changes: bool = False,
1644
- git_release: bool = False,
1645
- debug: bool = False,
1646
- force: bool = False,
1647
- override_datasource: bool = False,
1648
- folder: str = ".",
1649
- populate: bool = False,
1650
- populate_subset=None,
1651
- populate_condition: Optional[str] = None,
1652
- unlink_on_populate_error: bool = False,
1653
- upload_fixtures: bool = False,
1654
- wait: bool = False,
1655
- ignore_sql_errors: bool = False,
1656
- skip_confirmation: bool = False,
1657
- only_response_times: bool = False,
1658
- workspace_map=None,
1659
- workspace_lib_paths=None,
1660
- no_versions: bool = False,
1661
- run_tests: bool = False,
1662
- as_standard: bool = False,
1663
- raise_on_exists: bool = False,
1664
- verbose: bool = True,
1665
- tests_to_run: int = 0,
1666
- tests_relative_change: float = 0.01,
1667
- tests_sample_by_params: int = 0,
1668
- tests_filter_by: Optional[List[str]] = None,
1669
- tests_failfast: bool = False,
1670
- tests_ignore_order: bool = False,
1671
- tests_validate_processed_bytes: bool = False,
1672
- tests_check_requests_from_branch: bool = False,
1673
- config: Optional[CLIConfig] = None,
1674
- user_token: Optional[str] = None,
1675
- fork_downstream: Optional[bool] = False,
1676
- fork: Optional[bool] = False,
1677
- is_internal: Optional[bool] = False,
1678
- release_created: Optional[bool] = False,
1679
- auto_promote: Optional[bool] = False,
1680
- check_backfill_required: bool = False,
1681
- use_main: bool = False,
1682
- check_outdated: bool = True,
1683
- hide_folders: bool = False,
1684
- ):
1685
- workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces_and_branches()).get("workspaces", [])
1686
- current_ws: Dict[str, Any] = next(
1687
- (workspace for workspace in workspaces if config and workspace.get("id", ".") == config.get("id", "..")), {}
1688
- )
1689
- is_branch = current_ws.get("is_branch", False)
1690
-
1691
- if not workspace_map:
1692
- workspace_map = {}
1693
- if not workspace_lib_paths:
1694
- workspace_lib_paths = []
1695
-
1696
- workspace_lib_paths = list(workspace_lib_paths)
1697
- # include vendor libs without overriding user ones
1698
- existing_workspaces = set(x[1] for x in workspace_lib_paths)
1699
- vendor_path = Path("vendor")
1700
- if vendor_path.exists():
1701
- for x in vendor_path.iterdir():
1702
- if x.is_dir() and x.name not in existing_workspaces:
1703
- workspace_lib_paths.append((x.name, x))
1704
-
1705
- datasources: List[Dict[str, Any]] = await tb_client.datasources()
1706
- pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
1707
-
1708
- existing_resources: List[str] = [x["name"] for x in datasources] + [x["name"] for x in pipes]
1709
- # replace workspace mapping names
1710
- for old_ws, new_ws in workspace_map.items():
1711
- existing_resources = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in existing_resources]
1712
-
1713
- remote_resource_names = [get_remote_resource_name_without_version(x) for x in existing_resources]
1714
-
1715
- # replace workspace mapping names
1716
- for old_ws, new_ws in workspace_map.items():
1717
- remote_resource_names = [re.sub(f"^{old_ws}\.", f"{new_ws}.", x) for x in remote_resource_names]
1718
-
1719
- if not filenames:
1720
- filenames = get_project_filenames(folder)
1721
-
1722
- # build graph to get new versions for all the files involved in the query
1723
- # dependencies need to be processed always to get the versions
1724
- dependencies_graph = await build_graph(
1725
- filenames,
1726
- tb_client,
1727
- dir_path=folder,
1728
- process_dependencies=True,
1729
- workspace_map=workspace_map,
1730
- skip_connectors=True,
1731
- workspace_lib_paths=workspace_lib_paths,
1732
- current_ws=current_ws,
1733
- changed=None,
1734
- only_changes=only_changes,
1735
- fork_downstream=fork_downstream,
1736
- is_internal=is_internal,
1737
- )
1738
-
1739
- resource_versions = {}
1740
- latest_datasource_versions = {}
1741
- changed = None
1742
- # If we have datasources using VERSION, let's try to get the latest version
1743
- dependencies_graph = await build_graph(
1744
- filenames,
1745
- tb_client,
1746
- dir_path=folder,
1747
- resource_versions=latest_datasource_versions,
1748
- workspace_map=workspace_map,
1749
- process_dependencies=push_deps,
1750
- verbose=verbose,
1751
- workspace_lib_paths=workspace_lib_paths,
1752
- current_ws=current_ws,
1753
- changed=None,
1754
- only_changes=only_changes,
1755
- fork_downstream=fork_downstream,
1756
- is_internal=is_internal,
1757
- )
1758
-
1759
- if debug:
1760
- pp.pprint(dependencies_graph.to_run)
1761
-
1762
- if verbose:
1763
- click.echo(FeedbackManager.info_building_dependencies())
1764
-
1765
- def should_push_file(
1766
- name: str,
1767
- remote_resource_names: List[str],
1768
- latest_datasource_versions: Dict[str, Any],
1769
- force: bool,
1770
- run_tests: bool,
1771
- ) -> bool:
1772
- """
1773
- Function to know if we need to run a file or not
1774
- """
1775
- if name not in remote_resource_names:
1776
- return True
1777
- # When we need to try to push a file when it doesn't exist and the version is different that the existing one
1778
- resource_full_name = (
1779
- f"{name}__v{latest_datasource_versions.get(name)}" if name in latest_datasource_versions else name
1780
- )
1781
- if resource_full_name not in existing_resources:
1782
- return True
1783
- if force or run_tests:
1784
- return True
1785
- return False
1786
-
1787
- async def push(
1788
- name: str,
1789
- to_run: Dict[str, Dict[str, Any]],
1790
- resource_versions: Dict[str, Any],
1791
- latest_datasource_versions: Dict[str, Any],
1792
- dry_run: bool,
1793
- fork_downstream: Optional[bool] = False,
1794
- fork: Optional[bool] = False,
1795
- ):
1796
- if name in to_run:
1797
- resource = to_run[name]["resource"]
1798
- if not dry_run:
1799
- if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1800
- if name not in resource_versions:
1801
- version = ""
1802
- if name in latest_datasource_versions:
1803
- version = f"(v{latest_datasource_versions[name]})"
1804
- click.echo(FeedbackManager.info_processing_new_resource(name=name, version=version))
1805
- else:
1806
- click.echo(
1807
- FeedbackManager.info_processing_resource(
1808
- name=name,
1809
- version=latest_datasource_versions[name],
1810
- latest_version=resource_versions.get(name),
1811
- )
1812
- )
1813
- try:
1814
- await exec_file(
1815
- to_run[name],
1816
- tb_client,
1817
- force,
1818
- check,
1819
- debug and verbose,
1820
- populate,
1821
- populate_subset,
1822
- populate_condition,
1823
- unlink_on_populate_error,
1824
- wait,
1825
- user_token,
1826
- override_datasource,
1827
- ignore_sql_errors,
1828
- skip_confirmation,
1829
- only_response_times,
1830
- run_tests,
1831
- as_standard,
1832
- tests_to_run,
1833
- tests_relative_change,
1834
- tests_sample_by_params,
1835
- tests_filter_by,
1836
- tests_failfast,
1837
- tests_ignore_order,
1838
- tests_validate_processed_bytes,
1839
- tests_check_requests_from_branch,
1840
- current_ws,
1841
- fork_downstream,
1842
- fork,
1843
- git_release,
1844
- )
1845
- if not run_tests:
1846
- click.echo(
1847
- FeedbackManager.success_create(
1848
- name=(
1849
- name
1850
- if to_run[name]["version"] is None
1851
- else f'{name}__v{to_run[name]["version"]}'
1852
- )
1853
- )
1854
- )
1855
- except Exception as e:
1856
- filename = (
1857
- os.path.basename(to_run[name]["filename"]) if hide_folders else to_run[name]["filename"]
1858
- )
1859
- exception = FeedbackManager.error_push_file_exception(
1860
- filename=filename,
1861
- error=e,
1862
- )
1863
- raise click.ClickException(exception)
1864
- else:
1865
- if raise_on_exists:
1866
- raise AlreadyExistsException(
1867
- FeedbackManager.warning_name_already_exists(
1868
- name=name if to_run[name]["version"] is None else f'{name}__v{to_run[name]["version"]}'
1869
- )
1870
- )
1871
- else:
1872
- if await name_matches_existing_resource(resource, name, tb_client):
1873
- if resource == "pipes":
1874
- click.echo(FeedbackManager.error_pipe_cannot_be_pushed(name=name))
1875
- else:
1876
- click.echo(FeedbackManager.error_datasource_cannot_be_pushed(name=name))
1877
- else:
1878
- click.echo(
1879
- FeedbackManager.warning_name_already_exists(
1880
- name=(
1881
- name
1882
- if to_run[name]["version"] is None
1883
- else f'{name}__v{to_run[name]["version"]}'
1884
- )
1885
- )
1886
- )
1887
- else:
1888
- if should_push_file(name, remote_resource_names, latest_datasource_versions, force, run_tests):
1889
- if name not in resource_versions:
1890
- version = ""
1891
- if name in latest_datasource_versions:
1892
- version = f"(v{latest_datasource_versions[name]})"
1893
- click.echo(FeedbackManager.info_dry_processing_new_resource(name=name, version=version))
1894
- else:
1895
- click.echo(
1896
- FeedbackManager.info_dry_processing_resource(
1897
- name=name,
1898
- version=latest_datasource_versions[name],
1899
- latest_version=resource_versions.get(name),
1900
- )
1901
- )
1902
- else:
1903
- if await name_matches_existing_resource(resource, name, tb_client):
1904
- if resource == "pipes":
1905
- click.echo(FeedbackManager.warning_pipe_cannot_be_pushed(name=name))
1906
- else:
1907
- click.echo(FeedbackManager.warning_datasource_cannot_be_pushed(name=name))
1908
- else:
1909
- click.echo(FeedbackManager.warning_dry_name_already_exists(name=name))
1910
-
1911
- async def push_files(
1912
- dependency_graph: GraphDependencies,
1913
- dry_run: bool = False,
1914
- check_backfill_required: bool = False,
1915
- ):
1916
- endpoints_dep_map = dict()
1917
- processed = set()
1918
-
1919
- dependencies_graph = dependency_graph.dep_map
1920
- resources_to_run = dependency_graph.to_run
1921
-
1922
- if not fork_downstream:
1923
- # First, we will deploy the all the resources following the dependency graph except for the endpoints
1924
- groups = [group for group in toposort(dependencies_graph)]
1925
- for group in groups:
1926
- for name in group:
1927
- if name in processed:
1928
- continue
1929
-
1930
- if is_endpoint_with_no_dependencies(
1931
- resources_to_run.get(name, {}),
1932
- dependencies_graph,
1933
- resources_to_run,
1934
- ):
1935
- endpoints_dep_map[name] = dependencies_graph[name]
1936
- continue
1937
-
1938
- await push(
1939
- name,
1940
- resources_to_run,
1941
- resource_versions,
1942
- latest_datasource_versions,
1943
- dry_run,
1944
- fork_downstream,
1945
- fork,
1946
- )
1947
- processed.add(name)
1948
-
1949
- # Then, we will deploy the endpoints that are on the dependency graph
1950
- groups = [group for group in toposort(endpoints_dep_map)]
1951
- for group in groups:
1952
- for name in group:
1953
- if name not in processed:
1954
- await push(
1955
- name,
1956
- resources_to_run,
1957
- resource_versions,
1958
- latest_datasource_versions,
1959
- dry_run,
1960
- fork_downstream,
1961
- fork,
1962
- )
1963
- processed.add(name)
1964
- else:
1965
- # This will generate the graph from right to left and will fill the gaps of the dependencies
1966
- # If we have a graph like this:
1967
- # A -> B -> C
1968
- # If we only modify A, the normal dependencies graph will only contain a node like _{A => B}
1969
- # But we need a graph that contains A, B and C and the dependencies between them to deploy them in the right order
1970
- dependencies_graph_fork_downstream, resources_to_run_fork_downstream = generate_forkdownstream_graph(
1971
- dependency_graph.all_dep_map,
1972
- dependency_graph.all_resources,
1973
- resources_to_run,
1974
- list(dependency_graph.dep_map.keys()),
1975
- )
1976
-
1977
- # First, we will deploy the datasources that need to be deployed.
1978
- # We need to deploy the datasources from left to right as some datasources might have MV that depend on the column types of previous datasources. Ex: `test_change_column_type_landing_datasource` test
1979
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
1980
- groups.reverse()
1981
- for group in groups:
1982
- for name in group:
1983
- if name in processed or not is_datasource(resources_to_run_fork_downstream[name]):
1984
- continue
1985
-
1986
- # If the resource is new, we will use the normal resource information to deploy it
1987
- # This is mostly used for datasources with connections.
1988
- # At the moment, `resources_to_run_fork_downstream` is generated by `all_resources` and this is generated using the parameter `skip_connectors=True`
1989
- # TODO: Should the `resources_to_run_fork_downstream` be generated using the `skip_connectors` parameter?
1990
- if is_new(name, changed, dependencies_graph_fork_downstream, dependencies_graph_fork_downstream):
1991
- await push(
1992
- name,
1993
- resources_to_run,
1994
- resource_versions,
1995
- latest_datasource_versions,
1996
- dry_run,
1997
- fork_downstream,
1998
- fork,
1999
- )
2000
- else:
2001
- # If we are trying to modify a Kafka or CDK datasource, we need to inform the user that the resource needs to be post-released
2002
- kafka_connection_name = (
2003
- resources_to_run_fork_downstream[name].get("params", {}).get("kafka_connection_name")
2004
- )
2005
- service = resources_to_run_fork_downstream[name].get("params", {}).get("import_service")
2006
- if release_created and (kafka_connection_name or service):
2007
- connector = "Kafka" if kafka_connection_name else service
2008
- error_msg = FeedbackManager.error_connector_require_post_release(connector=connector)
2009
- raise click.ClickException(error_msg)
2010
-
2011
- # If we are pushing a modified datasource, inform about the backfill``
2012
- if check_backfill_required and auto_promote and release_created:
2013
- error_msg = FeedbackManager.error_check_backfill_required(resource_name=name)
2014
- raise click.ClickException(error_msg)
2015
-
2016
- await push(
2017
- name,
2018
- resources_to_run_fork_downstream,
2019
- resource_versions,
2020
- latest_datasource_versions,
2021
- dry_run,
2022
- fork_downstream,
2023
- fork,
2024
- )
2025
- processed.add(name)
2026
-
2027
- # Now, we will create a map of all the endpoints and there dependencies
2028
- # We are using the forkdownstream graph to get the dependencies of the endpoints as the normal dependencies graph only contains the resources that are going to be deployed
2029
- # But does not include the missing gaps
2030
- # If we have ENDPOINT_A ----> MV_PIPE_B -----> DATASOURCE_B ------> ENDPOINT_C
2031
- # Where endpoint A is being used in the MV_PIPE_B, if we only modify the endpoint A
2032
- # The dependencies graph will only contain the endpoint A and the MV_PIPE_B, but not the DATASOURCE_B and the ENDPOINT_C
2033
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2034
- for group in groups:
2035
- for name in group:
2036
- if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
2037
- continue
2038
-
2039
- endpoints_dep_map[name] = dependencies_graph_fork_downstream[name]
2040
-
2041
- # Now that we have the dependencies of the endpoints, we need to check that the resources has not been deployed yet and only care about the endpoints that depend on endpoints
2042
- groups = [group for group in toposort(endpoints_dep_map)]
2043
-
2044
- # As we have used the forkdownstream graph to get the dependencies of the endpoints, we have all the dependencies of the endpoints
2045
- # But we need to deploy the endpoints and the dependencies of the endpoints from left to right
2046
- # So we need to reverse the groups
2047
- groups.reverse()
2048
- for group in groups:
2049
- for name in group:
2050
- if name in processed or not is_endpoint(resources_to_run_fork_downstream[name]):
2051
- continue
2052
-
2053
- await push(
2054
- name,
2055
- resources_to_run_fork_downstream,
2056
- resource_versions,
2057
- latest_datasource_versions,
2058
- dry_run,
2059
- fork_downstream,
2060
- fork,
2061
- )
2062
- processed.add(name)
2063
-
2064
- # Now we should have the endpoints and datasources deployed, we can deploy the rest of the pipes (copy & sinks)
2065
- # We need to rely on the forkdownstream graph as it contains all the modified pipes as well as the dependencies of the pipes
2066
- # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2067
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2068
- for group in groups:
2069
- for name in group:
2070
- if name in processed or is_materialized(resources_to_run_fork_downstream.get(name)):
2071
- continue
2072
-
2073
- await push(
2074
- name,
2075
- resources_to_run_fork_downstream,
2076
- resource_versions,
2077
- latest_datasource_versions,
2078
- dry_run,
2079
- fork_downstream,
2080
- fork,
2081
- )
2082
- processed.add(name)
2083
-
2084
- # Finally, we need to deploy the materialized views from right to left.
2085
- # We need to rely on the forkdownstream graph as it contains all the modified materialized views as well as the dependencies of the materialized views
2086
- # In this case, we don't need to generate a new graph as we did for the endpoints as the pipes are not going to be used as dependencies and the datasources are already deployed
2087
- groups = [group for group in toposort(dependencies_graph_fork_downstream)]
2088
- for group in groups:
2089
- for name in group:
2090
- if name in processed or not is_materialized(resources_to_run_fork_downstream.get(name)):
2091
- continue
2092
-
2093
- await push(
2094
- name,
2095
- resources_to_run_fork_downstream,
2096
- resource_versions,
2097
- latest_datasource_versions,
2098
- dry_run,
2099
- fork_downstream,
2100
- fork,
2101
- )
2102
- processed.add(name)
2103
-
2104
- await push_files(dependencies_graph, dry_run)
2105
-
2106
- if not dry_run and not run_tests:
2107
- if upload_fixtures:
2108
- click.echo(FeedbackManager.info_pushing_fixtures())
2109
-
2110
- # We need to upload the fixtures even if there is no change
2111
- if is_branch:
2112
- filenames = get_project_filenames(folder, with_vendor=True)
2113
- dependencies_graph = await build_graph(
2114
- filenames,
2115
- tb_client,
2116
- dir_path=folder,
2117
- resource_versions=latest_datasource_versions,
2118
- workspace_map=workspace_map,
2119
- process_dependencies=push_deps,
2120
- verbose=verbose,
2121
- workspace_lib_paths=workspace_lib_paths,
2122
- current_ws=current_ws,
2123
- )
2124
-
2125
- processed = set()
2126
- for group in toposort(dependencies_graph.dep_map):
2127
- for f in group:
2128
- name = os.path.basename(f)
2129
- if name not in processed and name in dependencies_graph.to_run:
2130
- await check_fixtures_data(
2131
- tb_client,
2132
- dependencies_graph.to_run[name],
2133
- debug,
2134
- folder,
2135
- force,
2136
- mode="append" if is_branch else "replace",
2137
- )
2138
- processed.add(name)
2139
- for f in dependencies_graph.to_run:
2140
- if f not in processed:
2141
- await check_fixtures_data(
2142
- tb_client,
2143
- dependencies_graph.to_run[f],
2144
- debug,
2145
- folder,
2146
- force,
2147
- mode="append" if is_branch else "replace",
2148
- )
2149
- else:
2150
- if verbose:
2151
- click.echo(FeedbackManager.info_not_pushing_fixtures())
2152
-
2153
- return dependencies_graph.to_run