tinybird-cli 5.22.3.dev0__py3-none-any.whl → 6.0.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tinybird/connectors.py DELETED
@@ -1,422 +0,0 @@
1
- import logging
2
- import os
3
- import time
4
- from abc import ABC, abstractmethod
5
- from datetime import datetime
6
- from typing import Any, Dict, List, Optional, Tuple
7
- from urllib.parse import unquote
8
-
9
- import requests
10
-
11
- from .client import ConnectorNothingToLoad
12
-
13
- UNINSTALLED_CONNECTORS = []
14
-
15
- # common dependencies to snowflake and bigquery connectors
16
- try:
17
- from google.cloud import storage
18
- from google.cloud.storage.blob import Blob
19
- from google.oauth2 import service_account
20
- except ImportError:
21
- UNINSTALLED_CONNECTORS += ["bigquery", "snowflake"]
22
-
23
- try:
24
- from google.cloud import bigquery
25
- except ImportError:
26
- UNINSTALLED_CONNECTORS += ["bigquery"]
27
-
28
- try:
29
- import googleapiclient.discovery
30
- import snowflake.connector
31
- from google.api_core.exceptions import PreconditionFailed
32
- except ImportError:
33
- UNINSTALLED_CONNECTORS += ["snowflake"]
34
-
35
- UNINSTALLED_CONNECTORS = list(set(UNINSTALLED_CONNECTORS))
36
-
37
- logger = logging.getLogger("tinybird-connect")
38
-
39
-
40
- def _now():
41
- return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
42
-
43
-
44
- def _log(text):
45
- logger.info(f"{_now()} - {text}")
46
-
47
-
48
- class GCS:
49
- MAX_FILE_SIZE = int(5 * 1024**3)
50
- MAX_COMPOSE = 31
51
- READ_TIMEOUT = 3
52
-
53
- def __init__(self, options):
54
- self.options = options
55
- filename = options["service_account"]
56
- if not filename:
57
- filename = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
58
- self.credentials = service_account.Credentials.from_service_account_file(
59
- filename=filename,
60
- scopes=["https://www.googleapis.com/auth/cloud-platform"],
61
- )
62
- self.storage_client = storage.Client(project=options["project_id"], credentials=self.credentials)
63
-
64
- def gcs_url(self):
65
- return f"gcs://{self.bucket_name()}/"
66
-
67
- def gs_url(self):
68
- return f"gs://{self.bucket_name()}/"
69
-
70
- def bucket_name(self):
71
- return self.options["bucket_name"]
72
-
73
- def sign(self, blob_name):
74
- bucket = self.storage_client.get_bucket(self.bucket_name())
75
- blob = bucket.get_blob(blob_name)
76
- if not blob:
77
- raise Exception("Warning: File not found. This probably just means there is no new data to load")
78
- return blob.generate_signed_url(expiration=86400, version="v4", scheme="https")
79
-
80
- def grant_access(self, member_name):
81
- role_name = self.get_role_name()
82
- service = googleapiclient.discovery.build("iam", "v1", credentials=self.credentials)
83
- result = service.roles().list(parent="projects/" + self.options["project_id"]).execute()
84
- exists = False if "roles" not in result else any(role["title"] == role_name for role in result["roles"])
85
- if not exists:
86
- role_name = self.create_custom_role(service)
87
- self.add_member_to_bucket(member_name, role_name, self.bucket_name())
88
-
89
- def get_role_name(self):
90
- return f"{self.bucket_name().replace('-', '')}_5".lower()
91
-
92
- def create_custom_role(self, service):
93
- service.projects().roles().create(
94
- parent=f"projects/{self.options['project_id']}",
95
- body={
96
- "roleId": self.get_role_name(),
97
- "role": {
98
- "title": self.get_role_name(),
99
- "description": self.get_role_name(),
100
- "includedPermissions": [
101
- "storage.objects.create",
102
- "storage.buckets.get",
103
- "storage.objects.delete",
104
- "storage.objects.get",
105
- "storage.objects.list",
106
- ],
107
- "stage": "GA",
108
- },
109
- },
110
- ).execute()
111
- return self.get_role_name()
112
-
113
- def add_member_to_bucket(self, member_name, iam_role, bucket_name, retry=True):
114
- try:
115
- project_id = self.options["project_id"]
116
- role = f"projects/{project_id}/roles/{iam_role}"
117
- member = f"serviceAccount:{member_name}"
118
-
119
- bucket = self.storage_client.bucket(bucket_name)
120
- policy = bucket.get_iam_policy(requested_policy_version=3)
121
- policy.bindings.append({"role": role, "members": {member}})
122
- bucket.set_iam_policy(policy)
123
- except PreconditionFailed as e:
124
- _log(str(e))
125
- if retry:
126
- _log("retrying...")
127
- self.add_member_to_bucket(member_name, iam_role, bucket_name, retry=False)
128
-
129
- def compose(self, source_prefix: str, destination_blob_name: str, mode: str) -> List[str]:
130
- bucket = self.storage_client.bucket(self.bucket_name())
131
- retries: int = 0
132
- sources: Optional[List[Blob]] = None
133
- # for some reason when this list_blobs run in GH actions takes forever and fails
134
- # so retry many times with a lower timeout
135
- while not sources and retries < 5:
136
- try:
137
- sources = list(
138
- self.storage_client.list_blobs(
139
- self.bucket_name(), prefix=source_prefix, timeout=self.READ_TIMEOUT, fields="items(name,size)"
140
- )
141
- )
142
- except requests.exceptions.ReadTimeout:
143
- _log("exception fetching blob list from GCS")
144
- retries += 1
145
-
146
- if sources is None:
147
- raise Exception("couldn't load list of blobs from GCS")
148
-
149
- _log(f"Exported {len(sources)} CSV files")
150
- if len(sources) == 0:
151
- raise ConnectorNothingToLoad("Warning: Nothing to load. Aborting next steps")
152
-
153
- def compose_with_limits(
154
- sources: List[Blob], max_file_size: float = float("inf"), iteration: int = 0
155
- ) -> Tuple[List[Blob], List[Blob], int]:
156
- blobs: List[Blob] = []
157
- blobs_to_delete: List[Blob] = []
158
- chunk: List[Blob] = []
159
- chunk_size: int = 0
160
- count: int = 0
161
- for i, blob in enumerate(sources):
162
- chunk.append(blob)
163
- chunk_size += blob.size
164
-
165
- if chunk_size >= max_file_size or len(chunk) >= self.MAX_COMPOSE or i == len(sources) - 1:
166
- destination_name: str = f"{destination_blob_name}_{iteration}_part{count}.csv"
167
- destination: Blob = bucket.blob(destination_name)
168
- destination.content_type = "text/csv"
169
- destination.compose(chunk)
170
-
171
- blobs.append(destination)
172
- blobs_to_delete += chunk
173
- chunk = []
174
- chunk_size = 0
175
- count += 1
176
-
177
- return blobs, blobs_to_delete, iteration + 1
178
-
179
- blobs: List[Blob] = []
180
- blobs_to_delete: List[Blob] = []
181
- if mode == "append":
182
- blobs, blobs_to_delete, _ = compose_with_limits(sources, max_file_size=self.MAX_FILE_SIZE)
183
- elif mode == "replace":
184
- blobs = sources
185
- iteration: int = 0
186
- while len(blobs) > 1:
187
- blobs, blobs_to_delete_iteration, iteration = compose_with_limits(blobs, iteration=iteration)
188
- blobs_to_delete += blobs_to_delete_iteration
189
-
190
- try:
191
- _log("Removing temp blobs")
192
- for blob in blobs_to_delete:
193
- blob.delete()
194
- except Exception:
195
- pass
196
-
197
- _log("Done composing!")
198
- return [blob.name for blob in blobs]
199
-
200
- def delete(self, blob_name):
201
- bucket = self.storage_client.bucket(self.bucket_name())
202
- bucket.delete_blob(blob_name)
203
-
204
-
205
- class Connector(ABC):
206
- def __init__(self, options: Dict[str, Any]):
207
- self.options = options
208
- self.options["gcs"] = {
209
- "project_id": options["project_id"],
210
- "service_account": options["service_account"],
211
- "sign_service_account": options["service_account"],
212
- "bucket_name": options["bucket_name"],
213
- }
214
- self.gcs = GCS(self.options["gcs"])
215
-
216
- @abstractmethod
217
- def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
218
- pass
219
-
220
- @abstractmethod
221
- def clean(self, blob_name):
222
- pass
223
-
224
- @abstractmethod
225
- def datasource_analyze(self, resource):
226
- pass
227
-
228
-
229
- class BigQuery(Connector):
230
- def __init__(self, options):
231
- super().__init__(options)
232
- self.configure()
233
-
234
- def configure(self):
235
- pass
236
-
237
- def datasource_analyze(self, resource):
238
- raise Exception("BigQuery does not support data source analyze")
239
-
240
- def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
241
- mm = str(int(round(time.time() * 1000)))
242
- destination = f"{destination}_{mm}"
243
- with_headers: bool = self.options.get("with_headers", False)
244
- sql = f"""
245
- EXPORT DATA OPTIONS(
246
- uri='{self.gcs.gs_url()}{destination}*.csv',
247
- format='CSV',
248
- overwrite=true,
249
- header={"true" if with_headers else "false"},
250
- field_delimiter=',') AS
251
- {sql}
252
- """
253
- self.execute(sql)
254
- urls: List[str] = self.gcs.compose(destination, f"{destination}_final", mode)
255
- return [unquote(self.gcs.sign(url)) for url in urls]
256
-
257
- def execute(self, sql, result=False):
258
- client = self.connector()
259
- job_config = bigquery.QueryJobConfig()
260
-
261
- query_job = client.query(
262
- sql,
263
- job_config=job_config,
264
- )
265
-
266
- return query_job.result() # Waits for the query to finish
267
-
268
- def connector(self):
269
- credentials = service_account.Credentials.from_service_account_file(
270
- filename=self.options["service_account"],
271
- scopes=["https://www.googleapis.com/auth/cloud-platform"],
272
- )
273
- self.credentials = credentials
274
-
275
- return bigquery.Client(credentials=credentials, project=credentials.project_id)
276
-
277
- def clean(self, blob_name):
278
- self.gcs.delete(blob_name)
279
-
280
-
281
- class Snowflake(Connector):
282
- DEFAULT_STORAGE_INTEGRATION = "tb__gcs_int"
283
- DEFAULT_STAGE = "tb__gcs_stage"
284
-
285
- def __init__(self, options):
286
- super().__init__(options)
287
- self.configure()
288
-
289
- def configure(self):
290
- self.create_storage_integration()
291
- self.create_stage()
292
- self.gcs.grant_access(self.get_sf_member_name())
293
-
294
- def create_storage_integration(self):
295
- sql = f"""
296
- create storage integration {self.storage_integration()}
297
- type = external_stage
298
- storage_provider = gcs
299
- enabled = true
300
- storage_allowed_locations = ('{self.gcs.gcs_url()}');
301
- """
302
- try:
303
- self.execute(sql)
304
- except Exception as e:
305
- if "already exists" in str(e):
306
- pass
307
- else:
308
- raise e
309
-
310
- def get_sf_member_name(self):
311
- try:
312
- sql = f"DESC STORAGE INTEGRATION {self.storage_integration()};"
313
- result = self.execute(sql, result=True)
314
- for row in result:
315
- if row["property"] == "STORAGE_GCP_SERVICE_ACCOUNT":
316
- return row["property_value"]
317
- except Exception:
318
- return None
319
-
320
- def create_stage(self):
321
- sql = f"""
322
- create stage "{self.options["schema"]}".{self.stage()}
323
- url='{self.gcs.gcs_url()}'
324
- storage_integration = {self.storage_integration()};
325
- """
326
- try:
327
- self.execute(sql)
328
- except Exception as e:
329
- if "already exists" in str(e):
330
- pass
331
-
332
- def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
333
- mm = str(int(round(time.time() * 1000)))
334
- destination = f"{destination}_{mm}"
335
- with_headers: bool = self.options.get("with_headers", False)
336
- sql = f"""copy into '@{self.stage()}/{destination}'
337
- from ({sql})
338
- overwrite = true
339
- file_format = (TYPE=CSV COMPRESSION=NONE ESCAPE_UNENCLOSED_FIELD=NONE FIELD_DELIMITER='|' FIELD_OPTIONALLY_ENCLOSED_BY='"' null_if=())
340
- header = {"true" if with_headers else "false"}
341
- max_file_size = 2500000000;
342
- """
343
- self.execute(sql)
344
- urls: List[str] = self.gcs.compose(destination, f"{destination}_final", mode)
345
- return [unquote(self.gcs.sign(url)) for url in urls]
346
-
347
- def execute(self, sql, result=False):
348
- ctx = self.connector()
349
- cs = ctx.cursor(snowflake.connector.DictCursor)
350
- try:
351
- cs.execute(f"use role {self.options['role']};")
352
- cs.execute(f"use warehouse {self.options['warehouse']};")
353
- cs.execute(sql)
354
- if result:
355
- result = cs.fetchall()
356
- finally:
357
- cs.close()
358
- ctx.close()
359
- return result
360
-
361
- def datasource_analyze(self, resource):
362
- """returns .datasource file for resource"""
363
- # TODO: deal with the right precisions for Integers
364
- YES = "Y"
365
- NO = "N"
366
-
367
- def from_snowflake_type(t: str, nullable=NO) -> str:
368
- """transforms snowflake types to CH ones"""
369
- the_type = "String"
370
- if t.startswith("NUMBER"):
371
- the_type = "Int32"
372
- if t.startswith(("FLOAT", "DOUBLE", "REAL", "NUMERIC", "DECIMAL")):
373
- the_type = "Float32"
374
- if t == "DATE":
375
- the_type = "Date"
376
- if t == "DATETIME" or t.startswith("TIMESTAMP"):
377
- the_type = "DateTime"
378
- if nullable == YES:
379
- the_type = f"Nullable({the_type})"
380
- return the_type
381
-
382
- result = self.execute(f"DESCRIBE TABLE {resource};", result=True)
383
- sql = []
384
- columns = []
385
- for row in result:
386
- if row["kind"] == "COLUMN":
387
- sql.append(f"{row['name']} {from_snowflake_type(row['type'], row['null?'])}")
388
- columns.append(
389
- {
390
- "path": row["name"],
391
- "name": row["name"],
392
- "present_pct": 1 if row["null?"] != YES else None,
393
- "recommended_type": from_snowflake_type(row["type"]),
394
- }
395
- )
396
- return {"analysis": {"columns": columns, "schema": ", ".join(sql)}}
397
-
398
- def connector(self):
399
- auth = {
400
- key: self.options[key]
401
- for key in self.options.keys() & {"user", "password", "account", "warehouse", "database", "schema"}
402
- }
403
- return snowflake.connector.connect(**auth)
404
-
405
- def stage(self):
406
- return self.options.get("stage") or self.DEFAULT_STAGE
407
-
408
- def storage_integration(self):
409
- return self.options.get("storage_integration") or self.DEFAULT_STORAGE_INTEGRATION
410
-
411
- def clean(self, blob_name):
412
- self.gcs.delete(blob_name)
413
-
414
-
415
- connectors = {
416
- "snowflake": Snowflake,
417
- "bigquery": BigQuery,
418
- }
419
-
420
-
421
- def create_connector(source: str, params: Dict[str, Any]) -> Connector:
422
- return connectors[source](params)
@@ -1,45 +0,0 @@
1
- tinybird/__cli__.py,sha256=bnGPJ7820E46g4vYb-IUKKWXtNxDC9dv3dxTnquqY4A,237
2
- tinybird/check_pypi.py,sha256=_4NkharLyR_ELrAdit-ftqIWvOf7jZNPt3i76frlo9g,975
3
- tinybird/client.py,sha256=VYKuR10NwYmvyvC5QsJPE0H4k9dLDb_TxwxYbEyP1Xs,55592
4
- tinybird/config.py,sha256=4URIMvEO-ysbXrd-hGbXDAeymZfdiPEGauJPaUeccXI,7493
5
- tinybird/connectors.py,sha256=TSux0opkG3gbzTTyRIUfNk4RJoLvTIyp42rdsmmIAYg,15083
6
- tinybird/context.py,sha256=o4yvlXPkMLmdh-XJl3wpmqPAMeRRz5ScKzKlHHKn_I8,1201
7
- tinybird/datafile_common.py,sha256=xYnTdUv_yx17QO37S4SlWOK5wHD103LoJe3l8SvpWNk,233167
8
- tinybird/datatypes.py,sha256=r4WCvspmrXTJHiPjjyOTiZyZl31FO3Ynkwq4LQsYm6E,11059
9
- tinybird/feedback_manager.py,sha256=T370HP53XP4EJ-FCxmsuW6v-eG_xyonGr69a_BRVSVw,69978
10
- tinybird/git_settings.py,sha256=Sw_8rGmribEFJ4Z_6idrVytxpFYk7ez8ei0qHULzs3E,3934
11
- tinybird/sql.py,sha256=7pkwQMUVy0CH5zFgLMZyCx1BeaJSQYC05EmOb9vO3Ls,48694
12
- tinybird/sql_template.py,sha256=VLbog44F0VY5l5yV3nErz75l4hKko5QxywXi7KugaGM,103297
13
- tinybird/sql_template_fmt.py,sha256=Ma4qcs-2r8ZXQC4GUmrCqYz34DsnGF8k5lE2Jwnr314,10638
14
- tinybird/sql_toolset.py,sha256=J4LnRyv-BcoR6Axv24yvPJewCeg0zOZgz-F6ml3q12g,27228
15
- tinybird/syncasync.py,sha256=IPnOx6lMbf9SNddN1eBtssg8vCLHMt76SuZ6YNYm-Yk,27761
16
- tinybird/tb_cli.py,sha256=q1LGAsBVVMJsjR2HK62Pu6vpVtLzNmH8wHrEVUUdVkU,744
17
- tinybird/tornado_template.py,sha256=p3V4ERIWPRdtQuqkaL1NVjivaFoVXZgPJUxwVPce4nc,41850
18
- tinybird/ch_utils/constants.py,sha256=yTNizMzgYNBzUc2EV3moBfdrDIggOe9hiuAgWF7sv2c,4333
19
- tinybird/ch_utils/engine.py,sha256=UEis9pIYg9a7dpGHVYZ2D3pS4yP1MCFfmvzW7Fkrl1U,41270
20
- tinybird/tb_cli_modules/auth.py,sha256=urjPOQtukbQ2ooRUJmUMXS98mLfpOl64Q9NwG0DQjnw,9022
21
- tinybird/tb_cli_modules/branch.py,sha256=92jKpb28yZMav4w5s6HboGty8ivtgSIFh1zxvLK3mBo,39348
22
- tinybird/tb_cli_modules/cicd.py,sha256=0lMkb6CVOFZl5HOwgY8mK4T4mgI7O8335UngLXtCc-c,13851
23
- tinybird/tb_cli_modules/cli.py,sha256=E8648ygwOMFaj1d02qYCWtnpQfei1sYBdE2r4DWg4EM,63394
24
- tinybird/tb_cli_modules/common.py,sha256=VQQjGmbbKR09TWn1lxd_ip4uykqaBcY6UoXB2MlWvgQ,82985
25
- tinybird/tb_cli_modules/config.py,sha256=IsgdtFRnUrkY8-Zo32lmk6O7u3bHie1QCxLwgp4ewgA,11546
26
- tinybird/tb_cli_modules/connection.py,sha256=u_H08nnAUvO0GevAT4WoHrTdtIt8BMPvRJMa-vIJaDg,29399
27
- tinybird/tb_cli_modules/datasource.py,sha256=WM4I1XFkMOBDFglo1GSVIOVELvjjDfs3SC_9zEQUERM,35768
28
- tinybird/tb_cli_modules/exceptions.py,sha256=pmucP4kTF4irIt7dXiG-FcnI-o3mvDusPmch1L8RCWk,3367
29
- tinybird/tb_cli_modules/fmt.py,sha256=edQap4tAqWMWogSIx5zriT75naLi73XTB3NwatmcrFw,3518
30
- tinybird/tb_cli_modules/job.py,sha256=AG69LPb9MbobA1awwJFZJvxqarDKfRlsBjw2V1zvYqc,2964
31
- tinybird/tb_cli_modules/pipe.py,sha256=LEPZ6EqQsW525W1UxuMMAGMMtNMJqxz5G3ZsCHO5XTc,31442
32
- tinybird/tb_cli_modules/regions.py,sha256=QjsL5H6Kg-qr0aYVLrvb1STeJ5Sx_sjvbOYO0LrEGMk,166
33
- tinybird/tb_cli_modules/tag.py,sha256=9YHnruPnUNp1IJUe4qcSEMg9EbquIRo--Nxcsbvkvq8,3488
34
- tinybird/tb_cli_modules/telemetry.py,sha256=W098H6jmS4kpE7hN3tadaREBTf7oMocel-lkKWN0pU8,10466
35
- tinybird/tb_cli_modules/test.py,sha256=Vf8oK96V81HdKGsT79y6MUz6oz_VrYIwTbRnzzJs4rQ,4350
36
- tinybird/tb_cli_modules/token.py,sha256=JXATKTlbXohP9ZDZjlz8E4VYG6zrknKZhuz_wh1zBBc,13793
37
- tinybird/tb_cli_modules/workspace.py,sha256=yg3yb7_GniGJnOy2HqwEzePl47gQD-hywuQJyclZHi0,12455
38
- tinybird/tb_cli_modules/workspace_members.py,sha256=ksXsjd233y9-sNlz4Qb-meZbX4zn1B84e_bSm2i8rhg,8731
39
- tinybird/tb_cli_modules/tinyunit/tinyunit.py,sha256=4QlVoHE-O3akOBuIIJm3Mx2ewiZmPTT9vdgqIi9byTw,11267
40
- tinybird/tb_cli_modules/tinyunit/tinyunit_lib.py,sha256=NHoXcCHPDcKWYLzgP3NViho3Ey-6RV-ynPDzySPrTPE,1817
41
- tinybird_cli-5.22.3.dev0.dist-info/METADATA,sha256=dRLdOfnK115mYqNO88wOCweM89sO9AjH9AydMho3ECY,81473
42
- tinybird_cli-5.22.3.dev0.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
43
- tinybird_cli-5.22.3.dev0.dist-info/entry_points.txt,sha256=PKPKuPmA4IfJYnCFHHUiw-aAWZuBomFvwCklv1OyCjE,43
44
- tinybird_cli-5.22.3.dev0.dist-info/top_level.txt,sha256=VqqqEmkAy7UNaD8-V51FCoMMWXjLUlR0IstvK7tJYVY,54
45
- tinybird_cli-5.22.3.dev0.dist-info/RECORD,,