tinybird 0.0.1.dev6__py3-none-any.whl → 0.0.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (30) hide show
  1. tinybird/tb/modules/branch.py +0 -21
  2. tinybird/tb/modules/build.py +7 -18
  3. tinybird/tb/modules/cli.py +11 -131
  4. tinybird/tb/modules/common.py +14 -2
  5. tinybird/tb/modules/create.py +10 -14
  6. tinybird/tb/modules/datafile/build.py +2103 -0
  7. tinybird/tb/modules/datafile/build_common.py +118 -0
  8. tinybird/tb/modules/datafile/build_datasource.py +403 -0
  9. tinybird/tb/modules/datafile/build_pipe.py +648 -0
  10. tinybird/tb/modules/datafile/common.py +897 -0
  11. tinybird/tb/modules/datafile/diff.py +197 -0
  12. tinybird/tb/modules/datafile/exceptions.py +23 -0
  13. tinybird/tb/modules/datafile/format_common.py +66 -0
  14. tinybird/tb/modules/datafile/format_datasource.py +160 -0
  15. tinybird/tb/modules/datafile/format_pipe.py +195 -0
  16. tinybird/tb/modules/datafile/parse_datasource.py +41 -0
  17. tinybird/tb/modules/datafile/parse_pipe.py +69 -0
  18. tinybird/tb/modules/datafile/pipe_checker.py +560 -0
  19. tinybird/tb/modules/datafile/pull.py +157 -0
  20. tinybird/tb/modules/datasource.py +1 -1
  21. tinybird/tb/modules/fmt.py +4 -1
  22. tinybird/tb/modules/pipe.py +8 -2
  23. tinybird/tb/modules/prompts.py +1 -1
  24. tinybird/tb/modules/workspace.py +1 -1
  25. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev7.dist-info}/METADATA +1 -1
  26. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev7.dist-info}/RECORD +29 -16
  27. tinybird/tb/modules/datafile.py +0 -6122
  28. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev7.dist-info}/WHEEL +0 -0
  29. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev7.dist-info}/entry_points.txt +0 -0
  30. {tinybird-0.0.1.dev6.dist-info → tinybird-0.0.1.dev7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,118 @@
1
+ from typing import Any, Dict, List, Optional, Tuple
2
+
3
+ import click
4
+
5
+ from tinybird.client import DoesNotExistException, TinyB
6
+ from tinybird.feedback_manager import FeedbackManager
7
+
8
+
9
+ async def update_tags(resource_id: str, resource_name: str, resource_type: str, tags: List[str], tb_client: TinyB):
10
+ def get_tags_for_resource(all_tags: dict, resource_id: str, resource_name: str) -> List[str]:
11
+ tag_names = []
12
+
13
+ for tag in all_tags.get("tags", []):
14
+ for resource in tag.get("resources", []):
15
+ if resource.get("id") == resource_id or resource.get("name") == resource_name:
16
+ tag_names.append(tag.get("name"))
17
+ break # No need to check other resources in this tag
18
+
19
+ return tag_names
20
+
21
+ def get_tag(all_tags: dict, tag_name: str) -> Optional[dict]:
22
+ for tag in all_tags.get("tags", []):
23
+ if tag.get("name") == tag_name:
24
+ return tag
25
+ return None
26
+
27
+ def compare_tags(current_tags: List[str], new_tags: List[str]) -> Tuple[List[str], List[str]]:
28
+ tags_to_add = list(set(new_tags) - set(current_tags))
29
+ tags_to_remove = list(set(current_tags) - set(new_tags))
30
+ return tags_to_add, tags_to_remove
31
+
32
+ try:
33
+ all_tags = await tb_client.get_all_tags()
34
+ except Exception as e:
35
+ raise Exception(FeedbackManager.error_getting_tags(error=str(e)))
36
+
37
+ # Get all tags of that resource
38
+ current_tags = get_tags_for_resource(all_tags, resource_id, resource_name)
39
+
40
+ # Get the tags to add and remove
41
+ tags_to_add, tags_to_remove = compare_tags(current_tags, tags)
42
+
43
+ # Tags to add
44
+ for tag_name in tags_to_add:
45
+ tag = get_tag(all_tags, tag_name)
46
+
47
+ if not tag:
48
+ # Create new tag
49
+ try:
50
+ await tb_client.create_tag_with_resource(
51
+ name=tag_name,
52
+ resource_id=resource_id,
53
+ resource_name=resource_name,
54
+ resource_type=resource_type,
55
+ )
56
+ except Exception as e:
57
+ raise Exception(FeedbackManager.error_creating_tag(error=str(e)))
58
+ else:
59
+ # Update tag with new resource
60
+ resources = tag.get("resources", [])
61
+ resources.append({"id": resource_id, "name": resource_name, "type": resource_type})
62
+ try:
63
+ await tb_client.update_tag(tag.get("name", tag_name), resources)
64
+ except Exception as e:
65
+ raise Exception(FeedbackManager.error_updating_tag(error=str(e)))
66
+
67
+ # Tags to delete
68
+ for tag_name in tags_to_remove:
69
+ tag = get_tag(all_tags, tag_name)
70
+
71
+ if tag:
72
+ resources = tag.get("resources", [])
73
+ resources = [resource for resource in resources if resource.get("name") != resource_name]
74
+ try:
75
+ await tb_client.update_tag(tag.get("name", tag_name), resources)
76
+ except Exception as e:
77
+ raise Exception(FeedbackManager.error_updating_tag(error=str(e)))
78
+
79
+
80
+ async def update_tags_in_resource(rs: Dict[str, Any], resource_type: str, client: TinyB):
81
+ filtering_tags = rs.get("filtering_tags", [])
82
+
83
+ if not filtering_tags:
84
+ return
85
+
86
+ resource_id = ""
87
+ resource_name = ""
88
+
89
+ if resource_type == "datasource":
90
+ ds_name = rs["params"]["name"]
91
+ try:
92
+ persisted_ds = await client.get_datasource(ds_name)
93
+ resource_id = persisted_ds.get("id", "")
94
+ resource_name = persisted_ds.get("name", "")
95
+ except DoesNotExistException:
96
+ click.echo(
97
+ FeedbackManager.error_tag_generic("Could not get the latest Data Source info for updating its tags.")
98
+ )
99
+ elif resource_type == "pipe":
100
+ pipe_name = rs["name"]
101
+ try:
102
+ persisted_pipe = await client.pipe(pipe_name)
103
+ resource_id = persisted_pipe.get("id", "")
104
+ resource_name = persisted_pipe.get("name", "")
105
+ except DoesNotExistException:
106
+ click.echo(FeedbackManager.error_tag_generic("Could not get the latest Pipe info for updating its tags."))
107
+
108
+ if resource_id and resource_name:
109
+ try:
110
+ await update_tags(
111
+ resource_id=resource_id,
112
+ resource_name=resource_name,
113
+ resource_type=resource_type,
114
+ tags=filtering_tags,
115
+ tb_client=client,
116
+ )
117
+ except Exception as e:
118
+ click.echo(FeedbackManager.error_tag_generic(error=str(e)))
@@ -0,0 +1,403 @@
1
+ import os
2
+ from copy import deepcopy
3
+ from dataclasses import asdict
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import click
7
+
8
+ from tinybird.client import DoesNotExistException, TinyB
9
+ from tinybird.feedback_manager import FeedbackManager
10
+ from tinybird.tb.modules.datafile.common import PREVIEW_CONNECTOR_SERVICES, ImportReplacements
11
+
12
+
13
+ async def new_ds(
14
+ ds: Dict[str, Any],
15
+ client: TinyB,
16
+ user_token: Optional[str],
17
+ force: bool = False,
18
+ skip_confirmation: bool = False,
19
+ current_ws=None,
20
+ fork_downstream: Optional[bool] = False,
21
+ fork: Optional[bool] = False,
22
+ build: Optional[bool] = False,
23
+ ):
24
+ ds_name = ds["params"]["name"]
25
+
26
+ async def manage_tokens():
27
+ # search for token with specified name and adds it if not found or adds permissions to it
28
+ t = None
29
+ for tk in ds["tokens"]:
30
+ token_name = tk["token_name"]
31
+ t = await client.get_token_by_name(token_name)
32
+ if not t:
33
+ token_name = tk["token_name"]
34
+ # DS == token_origin.Origins.DATASOURCE
35
+ await client.create_token(token_name, [f"DATASOURCES:{tk['permissions']}:{ds_name}"], "DS", ds_name)
36
+ else:
37
+ scopes = [f"DATASOURCES:{tk['permissions']}:{ds_name}"]
38
+ for x in t["scopes"]:
39
+ sc = x["type"] if "resource" not in x else f"{x['type']}:{x['resource']}"
40
+ scopes.append(sc)
41
+ await client.alter_tokens(token_name, scopes)
42
+
43
+ datasource_exists = False
44
+ try:
45
+ existing_ds = await client.get_datasource(ds_name)
46
+ datasource_exists = True
47
+ except DoesNotExistException:
48
+ datasource_exists = False
49
+
50
+ engine_param = ds["params"].get("engine", "")
51
+
52
+ if (
53
+ ds["params"].get("service") == "dynamodb"
54
+ and engine_param != ""
55
+ and engine_param.lower() != "replacingmergetree"
56
+ ):
57
+ raise click.ClickException(FeedbackManager.error_dynamodb_engine_not_supported(engine=engine_param))
58
+
59
+ if engine_param.lower() == "join":
60
+ deprecation_notice = FeedbackManager.warning_deprecated(
61
+ warning="Data Sources with Join engine are deprecated and will be removed in the next major release of tinybird-cli. Use MergeTree instead."
62
+ )
63
+ click.echo(deprecation_notice)
64
+
65
+ if not datasource_exists or fork_downstream or fork:
66
+ params = ds["params"]
67
+
68
+ try:
69
+ if (
70
+ params.get("service") in PREVIEW_CONNECTOR_SERVICES
71
+ and params.get("connector")
72
+ and params.get("bucket_uri")
73
+ ):
74
+ bucket_uri = params.get("bucket_uri")
75
+ extension = bucket_uri.split(".")[-1]
76
+ if extension == "gz":
77
+ extension = bucket_uri.split(".")[-2]
78
+ valid_formats = ["csv", "json", "jsonl", "ndjson", "parquet"]
79
+ if extension not in valid_formats:
80
+ raise Exception(FeedbackManager.error_format(extension=extension, valid_formats=valid_formats))
81
+ params["format"] = extension
82
+ datasource_response = await client.datasource_create_from_definition(params)
83
+ datasource = datasource_response.get("datasource", {})
84
+
85
+ if datasource.get("service") == "dynamodb":
86
+ job_id = datasource_response.get("import_id", None)
87
+ if job_id:
88
+ jobs = await client.jobs(status=["waiting", "working"])
89
+ job_url = next((job["job_url"] for job in jobs if job["id"] == job_id), None)
90
+ if job_url:
91
+ click.echo(FeedbackManager.success_dynamodb_initial_load(job_url=job_url))
92
+
93
+ if ds.get("tokens"):
94
+ await manage_tokens()
95
+
96
+ if ds.get("shared_with"):
97
+ if not user_token:
98
+ click.echo(FeedbackManager.info_skipping_shared_with_entry())
99
+ else:
100
+ await share_and_unshare_datasource(
101
+ client,
102
+ datasource,
103
+ user_token,
104
+ workspaces_current_shared_with=[],
105
+ workspaces_to_share=ds["shared_with"],
106
+ current_ws=current_ws,
107
+ )
108
+
109
+ except Exception as e:
110
+ raise click.ClickException(FeedbackManager.error_creating_datasource(error=str(e)))
111
+ return
112
+
113
+ if not force:
114
+ raise click.ClickException(FeedbackManager.error_datasource_already_exists(datasource=ds_name))
115
+
116
+ if ds.get("shared_with", []) or existing_ds.get("shared_with", []):
117
+ if not user_token:
118
+ click.echo(FeedbackManager.info_skipping_shared_with_entry())
119
+ else:
120
+ await share_and_unshare_datasource(
121
+ client,
122
+ existing_ds,
123
+ user_token,
124
+ existing_ds.get("shared_with", []),
125
+ ds.get("shared_with", []),
126
+ current_ws,
127
+ )
128
+
129
+ alter_response = None
130
+ alter_error_message = None
131
+ new_description = None
132
+ new_schema = None
133
+ new_indices = None
134
+ new_ttl = None
135
+
136
+ try:
137
+ if datasource_exists and ds["params"]["description"] != existing_ds["description"]:
138
+ new_description = ds["params"]["description"]
139
+
140
+ if datasource_exists and ds["params"].get("engine_ttl") != existing_ds["engine"].get("ttl"):
141
+ new_ttl = ds["params"].get("engine_ttl", "false")
142
+
143
+ # Schema fixed by the kafka connector
144
+ if datasource_exists and (
145
+ ds["params"]["schema"].replace(" ", "") != existing_ds["schema"]["sql_schema"].replace(" ", "")
146
+ ):
147
+ new_schema = ds["params"]["schema"]
148
+
149
+ if datasource_exists:
150
+ new = [asdict(index) for index in ds.get("params", {}).get("indexes_list", [])]
151
+ existing = existing_ds.get("indexes", [])
152
+ new.sort(key=lambda x: x["name"])
153
+ existing.sort(key=lambda x: x["name"])
154
+ if len(existing) != len(new) or any([(d, d2) for d, d2 in zip(new, existing) if d != d2]):
155
+ new_indices = ds.get("params", {}).get("indexes") or "0"
156
+ if (
157
+ new_description
158
+ or new_schema
159
+ or new_ttl
160
+ or ((new_indices is not None) and (not fork_downstream or not fork))
161
+ ):
162
+ alter_response = await client.alter_datasource(
163
+ ds_name,
164
+ new_schema=new_schema,
165
+ description=new_description,
166
+ ttl=new_ttl,
167
+ dry_run=True,
168
+ indexes=new_indices,
169
+ )
170
+ except Exception as e:
171
+ if "There were no operations to perform" in str(e):
172
+ pass
173
+ else:
174
+ alter_error_message = str(e)
175
+
176
+ if alter_response:
177
+ click.echo(FeedbackManager.info_datasource_doesnt_match(datasource=ds_name))
178
+ for operation in alter_response["operations"]:
179
+ click.echo(f"** - {operation}")
180
+ if alter_response["operations"] and alter_response.get("dependencies", []):
181
+ click.echo(FeedbackManager.info_datasource_alter_dependent_pipes())
182
+ for dependency in alter_response.get("dependencies", []):
183
+ click.echo(f"** - {dependency}")
184
+
185
+ if skip_confirmation:
186
+ make_changes = True
187
+ else:
188
+ make_changes = click.prompt(FeedbackManager.info_ask_for_alter_confirmation()).lower() == "y"
189
+
190
+ if make_changes:
191
+ await client.alter_datasource(
192
+ ds_name,
193
+ new_schema=new_schema,
194
+ description=new_description,
195
+ ttl=new_ttl,
196
+ dry_run=False,
197
+ indexes=new_indices,
198
+ )
199
+ click.echo(FeedbackManager.success_datasource_alter())
200
+ else:
201
+ alter_error_message = "Alter datasource cancelled"
202
+
203
+ if alter_error_message:
204
+ raise click.ClickException(
205
+ FeedbackManager.error_datasource_already_exists_and_alter_failed(
206
+ datasource=ds_name, alter_error_message=alter_error_message
207
+ )
208
+ )
209
+
210
+ if datasource_exists and ds["params"].get("backfill_column") != existing_ds["tags"].get("backfill_column"):
211
+ params = {
212
+ "backfill_column": ds["params"].get("backfill_column"),
213
+ }
214
+
215
+ try:
216
+ click.echo(FeedbackManager.info_update_datasource(datasource=ds_name, params=params))
217
+ await client.update_datasource(ds_name, params)
218
+ click.echo(FeedbackManager.success_update_datasource(datasource=ds_name, params=params))
219
+ make_changes = True
220
+ alter_response = True
221
+ except Exception as e:
222
+ raise click.ClickException(FeedbackManager.error_updating_datasource(datasource=ds_name, error=str(e)))
223
+
224
+ connector_data = None
225
+ promote_error_message = None
226
+
227
+ ds_params = ds["params"]
228
+ service = ds_params.get("service")
229
+ DATASOURCE_VALID_SERVICES_TO_UPDATE = ["bigquery", "snowflake"]
230
+ if datasource_exists and service and service in [*DATASOURCE_VALID_SERVICES_TO_UPDATE, *PREVIEW_CONNECTOR_SERVICES]:
231
+ connector_required_params = {
232
+ "bigquery": ["service", "cron", "external_data_source"],
233
+ "snowflake": ["connector", "service", "cron", "external_data_source"],
234
+ "s3": ["connector", "service", "cron", "bucket_uri"],
235
+ "s3_iamrole": ["connector", "service", "cron", "bucket_uri"],
236
+ "gcs": ["connector", "service", "cron", "bucket_uri"],
237
+ }.get(service, [])
238
+
239
+ if not all(key in ds_params for key in connector_required_params):
240
+ return
241
+
242
+ connector = ds_params.get("connector", None)
243
+
244
+ if service in PREVIEW_CONNECTOR_SERVICES:
245
+ connector_id = existing_ds.get("connector", "")
246
+ if not connector_id:
247
+ return
248
+
249
+ current_connector = await client.get_connector_by_id(existing_ds.get("connector", ""))
250
+ if not current_connector:
251
+ return
252
+
253
+ if current_connector["name"] != ds_params["connection"]:
254
+ param = "connection"
255
+ datafile_param = ImportReplacements.get_datafile_param_for_linker_param(service, param) or param
256
+ raise click.ClickException(FeedbackManager.error_updating_connector_not_supported(param=datafile_param))
257
+
258
+ linkers = current_connector.get("linkers", [])
259
+ linker = next((linker for linker in linkers if linker["datasource_id"] == existing_ds["id"]), None)
260
+ if not linker:
261
+ return
262
+
263
+ linker_settings = linker.get("settings", {})
264
+ for param, value in linker_settings.items():
265
+ ds_params_value = ds_params.get(param, None)
266
+ if ds_params_value and ds_params_value != value:
267
+ datafile_param = ImportReplacements.get_datafile_param_for_linker_param(service, param) or param
268
+ raise Exception(
269
+ FeedbackManager.error_updating_connector_not_supported(param=datafile_param.upper())
270
+ )
271
+ return
272
+
273
+ connector_data = {
274
+ "connector": connector,
275
+ "service": service,
276
+ "cron": ds_params.get("cron", None),
277
+ "external_data_source": ds_params.get("external_data_source", None),
278
+ "bucket_uri": ds_params.get("bucket_uri", None),
279
+ "mode": ds_params.get("mode", "replace"),
280
+ "query": ds_params.get("query", None),
281
+ "ingest_now": ds_params.get("ingest_now", False),
282
+ }
283
+
284
+ try:
285
+ await client.update_datasource(ds_name, connector_data)
286
+ click.echo(FeedbackManager.success_promoting_datasource(datasource=ds_name))
287
+ return
288
+ except Exception as e:
289
+ promote_error_message = str(e)
290
+
291
+ if alter_response and make_changes:
292
+ # alter operation finished
293
+ pass
294
+ else:
295
+ # removed replacing by default. When a datasource is removed data is
296
+ # removed and all the references needs to be updated
297
+ if (
298
+ os.getenv("TB_I_KNOW_WHAT_I_AM_DOING")
299
+ and click.prompt(FeedbackManager.info_ask_for_datasource_confirmation()) == ds_name
300
+ ): # TODO move to CLI
301
+ try:
302
+ await client.datasource_delete(ds_name)
303
+ click.echo(FeedbackManager.success_delete_datasource(datasource=ds_name))
304
+ except Exception:
305
+ raise click.ClickException(FeedbackManager.error_removing_datasource(datasource=ds_name))
306
+ return
307
+ else:
308
+ if alter_error_message:
309
+ raise click.ClickException(
310
+ FeedbackManager.error_datasource_already_exists_and_alter_failed(
311
+ datasource=ds_name, alter_error_message=alter_error_message
312
+ )
313
+ )
314
+ if promote_error_message:
315
+ raise click.ClickException(
316
+ FeedbackManager.error_promoting_datasource(datasource=ds_name, error=promote_error_message)
317
+ )
318
+ else:
319
+ click.echo(FeedbackManager.warning_datasource_already_exists(datasource=ds_name))
320
+
321
+
322
+ async def share_and_unshare_datasource(
323
+ client: TinyB,
324
+ datasource: Dict[str, Any],
325
+ user_token: str,
326
+ workspaces_current_shared_with: List[str],
327
+ workspaces_to_share: List[str],
328
+ current_ws: Optional[Dict[str, Any]],
329
+ ) -> None:
330
+ datasource_name = datasource.get("name", "")
331
+ datasource_id = datasource.get("id", "")
332
+ workspaces: List[Dict[str, Any]]
333
+
334
+ # In case we are pushing to a branch, we don't share the datasource
335
+ # FIXME: Have only once way to get the current workspace
336
+ if current_ws:
337
+ # Force to get all the workspaces the user can access
338
+ workspace = current_ws
339
+ workspaces = (await client.user_workspaces()).get("workspaces", [])
340
+ else:
341
+ workspace = await client.user_workspace_branches()
342
+ workspaces = workspace.get("workspaces", [])
343
+
344
+ if workspace.get("is_branch", False):
345
+ click.echo(FeedbackManager.info_skipping_sharing_datasources_branch(datasource=datasource["name"]))
346
+ return
347
+
348
+ # We duplicate the client to use the user_token
349
+ user_client: TinyB = deepcopy(client)
350
+ user_client.token = user_token
351
+ if not workspaces_current_shared_with:
352
+ for workspace_to_share in workspaces_to_share:
353
+ w: Optional[Dict[str, Any]] = next((w for w in workspaces if w["name"] == workspace_to_share), None)
354
+ if not w:
355
+ raise Exception(
356
+ f"Unable to share datasource with the workspace {workspace_to_share}. Review that you have the admin permissions on this workspace"
357
+ )
358
+
359
+ await user_client.datasource_share(
360
+ datasource_id=datasource_id,
361
+ current_workspace_id=workspace.get("id", ""),
362
+ destination_workspace_id=w.get("id", ""),
363
+ )
364
+ click.echo(
365
+ FeedbackManager.success_datasource_shared(datasource=datasource_name, workspace=w.get("name", ""))
366
+ )
367
+ else:
368
+ shared_with = [
369
+ w
370
+ for w in workspaces
371
+ if next((ws for ws in workspaces_current_shared_with if ws == w["id"] or ws == w["name"]), None)
372
+ ]
373
+ defined_to_share_with = [
374
+ w for w in workspaces if next((ws for ws in workspaces_to_share if ws == w["id"] or ws == w["name"]), None)
375
+ ]
376
+ workspaces_need_to_share = [w for w in defined_to_share_with if w not in shared_with]
377
+ workspaces_need_to_unshare = [w for w in shared_with if w not in defined_to_share_with]
378
+
379
+ for w in workspaces_need_to_share:
380
+ await user_client.datasource_share(
381
+ datasource_id=datasource_id,
382
+ current_workspace_id=workspace.get("id", ""),
383
+ destination_workspace_id=w.get("id", ""),
384
+ )
385
+ click.echo(
386
+ FeedbackManager.success_datasource_shared(datasource=datasource["name"], workspace=w.get("name", ""))
387
+ )
388
+
389
+ for w in workspaces_need_to_unshare:
390
+ await user_client.datasource_unshare(
391
+ datasource_id=datasource_id,
392
+ current_workspace_id=workspace.get("id", ""),
393
+ destination_workspace_id=w.get("id", ""),
394
+ )
395
+ click.echo(
396
+ FeedbackManager.success_datasource_unshared(datasource=datasource_name, workspace=w.get("name", ""))
397
+ )
398
+
399
+
400
+ def is_datasource(resource: Optional[Dict[str, Any]]) -> bool:
401
+ if resource and resource.get("resource") == "datasources":
402
+ return True
403
+ return False