tinybird 0.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/__cli__.py +8 -0
- tinybird/ch_utils/constants.py +244 -0
- tinybird/ch_utils/engine.py +855 -0
- tinybird/check_pypi.py +25 -0
- tinybird/client.py +1281 -0
- tinybird/config.py +117 -0
- tinybird/connectors.py +428 -0
- tinybird/context.py +23 -0
- tinybird/datafile.py +5589 -0
- tinybird/datatypes.py +434 -0
- tinybird/feedback_manager.py +1022 -0
- tinybird/git_settings.py +145 -0
- tinybird/sql.py +865 -0
- tinybird/sql_template.py +2343 -0
- tinybird/sql_template_fmt.py +281 -0
- tinybird/sql_toolset.py +350 -0
- tinybird/syncasync.py +682 -0
- tinybird/tb_cli.py +25 -0
- tinybird/tb_cli_modules/auth.py +252 -0
- tinybird/tb_cli_modules/branch.py +1043 -0
- tinybird/tb_cli_modules/cicd.py +434 -0
- tinybird/tb_cli_modules/cli.py +1571 -0
- tinybird/tb_cli_modules/common.py +2082 -0
- tinybird/tb_cli_modules/config.py +344 -0
- tinybird/tb_cli_modules/connection.py +803 -0
- tinybird/tb_cli_modules/datasource.py +900 -0
- tinybird/tb_cli_modules/exceptions.py +91 -0
- tinybird/tb_cli_modules/fmt.py +91 -0
- tinybird/tb_cli_modules/job.py +85 -0
- tinybird/tb_cli_modules/pipe.py +858 -0
- tinybird/tb_cli_modules/regions.py +9 -0
- tinybird/tb_cli_modules/tag.py +100 -0
- tinybird/tb_cli_modules/telemetry.py +310 -0
- tinybird/tb_cli_modules/test.py +107 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit.py +340 -0
- tinybird/tb_cli_modules/tinyunit/tinyunit_lib.py +71 -0
- tinybird/tb_cli_modules/token.py +349 -0
- tinybird/tb_cli_modules/workspace.py +269 -0
- tinybird/tb_cli_modules/workspace_members.py +212 -0
- tinybird/tornado_template.py +1194 -0
- tinybird-0.0.1.dev0.dist-info/METADATA +2815 -0
- tinybird-0.0.1.dev0.dist-info/RECORD +45 -0
- tinybird-0.0.1.dev0.dist-info/WHEEL +5 -0
- tinybird-0.0.1.dev0.dist-info/entry_points.txt +2 -0
- tinybird-0.0.1.dev0.dist-info/top_level.txt +4 -0
tinybird/config.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from os import environ, getcwd
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import aiofiles
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from tinybird import __cli__
|
|
10
|
+
from tinybird.feedback_manager import FeedbackManager
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from tinybird.__cli__ import __revision__
|
|
14
|
+
except Exception:
|
|
15
|
+
__revision__ = ""
|
|
16
|
+
|
|
17
|
+
DEFAULT_API_HOST = "https://api.tinybird.co"
|
|
18
|
+
DEFAULT_LOCALHOST = "http://localhost:8001"
|
|
19
|
+
CURRENT_VERSION = f"{__cli__.__version__}"
|
|
20
|
+
VERSION = f"{__cli__.__version__} (rev {__revision__})"
|
|
21
|
+
DEFAULT_UI_HOST = "https://app.tinybird.co"
|
|
22
|
+
SUPPORTED_CONNECTORS = ["bigquery", "snowflake"]
|
|
23
|
+
PROJECT_PATHS = ["datasources", "datasources/fixtures", "endpoints", "pipes", "tests", "scripts", "deploy"]
|
|
24
|
+
DEPRECATED_PROJECT_PATHS = ["endpoints"]
|
|
25
|
+
MIN_WORKSPACE_ID_LENGTH = 36
|
|
26
|
+
LEGACY_HOSTS = {
|
|
27
|
+
"https://api.tinybird.co": "https://app.tinybird.co/gcp/europe-west3",
|
|
28
|
+
"https://api.us-east.tinybird.co": "https://app.tinybird.co/gcp/us-east4",
|
|
29
|
+
"https://api.us-east.aws.tinybird.co": "https://app.tinybird.co/aws/us-east-1",
|
|
30
|
+
"https://api.us-west-2.aws.tinybird.co": "https://app.tinybird.co/aws/us-west-2",
|
|
31
|
+
"https://api.eu-central-1.aws.tinybird.co": "https://app.tinybird.co/aws/eu-central-1",
|
|
32
|
+
"https://api.ap-east.aws.tinybird.co": "https://app.tinybird.co/aws/ap-east",
|
|
33
|
+
"https://api.wadus1.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus1",
|
|
34
|
+
"https://api.wadus2.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus2",
|
|
35
|
+
"https://api.wadus3.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus3",
|
|
36
|
+
"https://api.wadus4.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus4",
|
|
37
|
+
"https://api.wadus5.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus5",
|
|
38
|
+
"https://api.wadus6.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus6",
|
|
39
|
+
"https://api.wadus1.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus1",
|
|
40
|
+
"https://api.wadus2.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus2",
|
|
41
|
+
"https://api.wadus3.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus3",
|
|
42
|
+
"https://api.wadus4.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus4",
|
|
43
|
+
"https://api.wadus5.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus5",
|
|
44
|
+
"https://api.wadus6.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus6",
|
|
45
|
+
"https://ui.tinybird.co": "https://app.tinybird.co/gcp/europe-west3",
|
|
46
|
+
"https://ui.us-east.tinybird.co": "https://app.tinybird.co/gcp/us-east4",
|
|
47
|
+
"https://ui.us-east.aws.tinybird.co": "https://app.tinybird.co/aws/us-east-1",
|
|
48
|
+
"https://ui.us-west-2.aws.tinybird.co": "https://app.tinybird.co/aws/us-west-2",
|
|
49
|
+
"https://ui.eu-central-1.aws.tinybird.co": "https://app.tinybird.co/aws/eu-central-1",
|
|
50
|
+
"https://ui.ap-east.aws.tinybird.co": "https://app.tinybird.co/aws/ap-east",
|
|
51
|
+
"https://inditex-tech.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-tech",
|
|
52
|
+
"https://inditex-c-stg.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-c-stg",
|
|
53
|
+
"https://inditex-c-pro.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-c-pro",
|
|
54
|
+
"https://inditex-z-stg.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-z-stg",
|
|
55
|
+
"https://inditex-rt-pro.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-rt-pro",
|
|
56
|
+
"https://inditex-pro.tinybird.co": "https://app.inditex.tinybird.co/gcp/inditex-pro",
|
|
57
|
+
"https://ui.split.tinybird.co": "https://app.tinybird.co/aws/split-us-east",
|
|
58
|
+
"https://ui.split.us-west-2.aws.tinybird.co": "https://app.tinybird.co/aws/split-us-west-2",
|
|
59
|
+
"https://api.split.tinybird.co": "https://app.tinybird.co/aws/split-us-east",
|
|
60
|
+
"https://api.split.us-west-2.aws.tinybird.co": "https://app.tinybird.co/aws/split-us-west-2",
|
|
61
|
+
"https://ui.wadus1.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus1",
|
|
62
|
+
"https://ui.wadus2.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus2",
|
|
63
|
+
"https://ui.wadus3.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus3",
|
|
64
|
+
"https://ui.wadus4.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus4",
|
|
65
|
+
"https://ui.wadus5.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus5",
|
|
66
|
+
"https://ui.wadus6.gcp.tinybird.co": "https://app.wadus.tinybird.co/gcp/wadus6",
|
|
67
|
+
"https://ui.wadus1.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus1",
|
|
68
|
+
"https://ui.wadus2.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus2",
|
|
69
|
+
"https://ui.wadus3.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus3",
|
|
70
|
+
"https://ui.wadus4.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus4",
|
|
71
|
+
"https://ui.wadus5.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus5",
|
|
72
|
+
"https://ui.wadus6.aws.tinybird.co": "https://app.wadus.tinybird.co/aws/wadus6",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def get_config(host: str, token: Optional[str], semver: Optional[str] = None) -> Dict[str, Any]:
|
|
77
|
+
if host:
|
|
78
|
+
host = host.rstrip("/")
|
|
79
|
+
|
|
80
|
+
config_file = Path(getcwd()) / ".tinyb"
|
|
81
|
+
config = {}
|
|
82
|
+
try:
|
|
83
|
+
async with aiofiles.open(config_file) as file:
|
|
84
|
+
res = await file.read()
|
|
85
|
+
config = json.loads(res)
|
|
86
|
+
except IOError:
|
|
87
|
+
pass
|
|
88
|
+
except json.decoder.JSONDecodeError:
|
|
89
|
+
click.echo(FeedbackManager.error_load_file_config(config_file=config_file))
|
|
90
|
+
return config
|
|
91
|
+
|
|
92
|
+
config["token_passed"] = token
|
|
93
|
+
config["token"] = token or config.get("token", None)
|
|
94
|
+
config["semver"] = semver or config.get("semver", None)
|
|
95
|
+
config["host"] = host or config.get("host", DEFAULT_API_HOST)
|
|
96
|
+
config["workspaces"] = config.get("workspaces", [])
|
|
97
|
+
return config
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def write_config(config: Dict[str, Any], dest_file: str = ".tinyb"):
|
|
101
|
+
config_file = Path(getcwd()) / dest_file
|
|
102
|
+
async with aiofiles.open(config_file, "w") as file:
|
|
103
|
+
await file.write(json.dumps(config, indent=4, sort_keys=True))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_display_host(ui_host: str):
|
|
107
|
+
return LEGACY_HOSTS.get(ui_host, ui_host)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class FeatureFlags:
|
|
111
|
+
@classmethod
|
|
112
|
+
def ignore_sql_errors(cls) -> bool: # Context: #1155
|
|
113
|
+
return "TB_IGNORE_SQL_ERRORS" in environ
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def is_localhost(cls) -> bool:
|
|
117
|
+
return "SET_LOCALHOST" in environ
|
tinybird/connectors.py
ADDED
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
7
|
+
from urllib.parse import unquote
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from .client import ConnectorNothingToLoad
|
|
12
|
+
|
|
13
|
+
UNINSTALLED_CONNECTORS = []
|
|
14
|
+
|
|
15
|
+
# common dependencies to snowflake and bigquery connectors
|
|
16
|
+
try:
|
|
17
|
+
from google.cloud import storage
|
|
18
|
+
from google.cloud.storage.blob import Blob
|
|
19
|
+
from google.oauth2 import service_account
|
|
20
|
+
except ImportError:
|
|
21
|
+
UNINSTALLED_CONNECTORS += ["bigquery", "snowflake"]
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from google.cloud import bigquery
|
|
25
|
+
except ImportError:
|
|
26
|
+
UNINSTALLED_CONNECTORS += ["bigquery"]
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
import googleapiclient.discovery
|
|
30
|
+
import snowflake.connector
|
|
31
|
+
from google.api_core.exceptions import PreconditionFailed
|
|
32
|
+
except ImportError:
|
|
33
|
+
UNINSTALLED_CONNECTORS += ["snowflake"]
|
|
34
|
+
|
|
35
|
+
UNINSTALLED_CONNECTORS = list(set(UNINSTALLED_CONNECTORS))
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger("tinybird-connect")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _now():
|
|
41
|
+
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _log(text):
|
|
45
|
+
logger.info(f"{_now()} - {text}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class GCS:
|
|
49
|
+
MAX_FILE_SIZE = int(5 * 1024**3)
|
|
50
|
+
MAX_COMPOSE = 31
|
|
51
|
+
READ_TIMEOUT = 3
|
|
52
|
+
|
|
53
|
+
def __init__(self, options):
|
|
54
|
+
self.options = options
|
|
55
|
+
filename = options["service_account"]
|
|
56
|
+
if not filename:
|
|
57
|
+
filename = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
|
58
|
+
self.credentials = service_account.Credentials.from_service_account_file(
|
|
59
|
+
filename=filename,
|
|
60
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
61
|
+
)
|
|
62
|
+
self.storage_client = storage.Client(project=options["project_id"], credentials=self.credentials)
|
|
63
|
+
|
|
64
|
+
def gcs_url(self):
|
|
65
|
+
return f"gcs://{self.bucket_name()}/"
|
|
66
|
+
|
|
67
|
+
def gs_url(self):
|
|
68
|
+
return f"gs://{self.bucket_name()}/"
|
|
69
|
+
|
|
70
|
+
def bucket_name(self):
|
|
71
|
+
return self.options["bucket_name"]
|
|
72
|
+
|
|
73
|
+
def sign(self, blob_name):
|
|
74
|
+
bucket = self.storage_client.get_bucket(self.bucket_name())
|
|
75
|
+
blob = bucket.get_blob(blob_name)
|
|
76
|
+
if not blob:
|
|
77
|
+
raise Exception("Warning: File not found. This probably just means there is no new data to load")
|
|
78
|
+
return blob.generate_signed_url(expiration=86400, version="v4", scheme="https")
|
|
79
|
+
|
|
80
|
+
def grant_access(self, member_name):
|
|
81
|
+
role_name = self.get_role_name()
|
|
82
|
+
service = googleapiclient.discovery.build("iam", "v1", credentials=self.credentials)
|
|
83
|
+
result = service.roles().list(parent="projects/" + self.options["project_id"]).execute()
|
|
84
|
+
exists = False if "roles" not in result else any(role["title"] == role_name for role in result["roles"])
|
|
85
|
+
if not exists:
|
|
86
|
+
role_name = self.create_custom_role(service)
|
|
87
|
+
self.add_member_to_bucket(member_name, role_name, self.bucket_name())
|
|
88
|
+
|
|
89
|
+
def get_role_name(self):
|
|
90
|
+
return f"{self.bucket_name().replace('-', '')}_5".lower()
|
|
91
|
+
|
|
92
|
+
def create_custom_role(self, service):
|
|
93
|
+
service.projects().roles().create(
|
|
94
|
+
parent=f"projects/{self.options['project_id']}",
|
|
95
|
+
body={
|
|
96
|
+
"roleId": self.get_role_name(),
|
|
97
|
+
"role": {
|
|
98
|
+
"title": self.get_role_name(),
|
|
99
|
+
"description": self.get_role_name(),
|
|
100
|
+
"includedPermissions": [
|
|
101
|
+
"storage.objects.create",
|
|
102
|
+
"storage.buckets.get",
|
|
103
|
+
"storage.objects.delete",
|
|
104
|
+
"storage.objects.get",
|
|
105
|
+
"storage.objects.list",
|
|
106
|
+
],
|
|
107
|
+
"stage": "GA",
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
).execute()
|
|
111
|
+
return self.get_role_name()
|
|
112
|
+
|
|
113
|
+
def add_member_to_bucket(self, member_name, iam_role, bucket_name, retry=True):
|
|
114
|
+
try:
|
|
115
|
+
project_id = self.options["project_id"]
|
|
116
|
+
role = f"projects/{project_id}/roles/{iam_role}"
|
|
117
|
+
member = f"serviceAccount:{member_name}"
|
|
118
|
+
|
|
119
|
+
bucket = self.storage_client.bucket(bucket_name)
|
|
120
|
+
policy = bucket.get_iam_policy(requested_policy_version=3)
|
|
121
|
+
policy.bindings.append({"role": role, "members": {member}})
|
|
122
|
+
bucket.set_iam_policy(policy)
|
|
123
|
+
except PreconditionFailed as e:
|
|
124
|
+
_log(str(e))
|
|
125
|
+
if retry:
|
|
126
|
+
_log("retrying...")
|
|
127
|
+
self.add_member_to_bucket(member_name, iam_role, bucket_name, retry=False)
|
|
128
|
+
|
|
129
|
+
def compose(self, source_prefix: str, destination_blob_name: str, mode: str) -> List[str]:
|
|
130
|
+
bucket = self.storage_client.bucket(self.bucket_name())
|
|
131
|
+
retries: int = 0
|
|
132
|
+
sources: Optional[List[Blob]] = None
|
|
133
|
+
# for some reason when this list_blobs run in GH actions takes forever and fails
|
|
134
|
+
# so retry many times with a lower timeout
|
|
135
|
+
while not sources and retries < 5:
|
|
136
|
+
try:
|
|
137
|
+
sources = list(
|
|
138
|
+
self.storage_client.list_blobs(
|
|
139
|
+
self.bucket_name(), prefix=source_prefix, timeout=self.READ_TIMEOUT, fields="items(name,size)"
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
except requests.exceptions.ReadTimeout:
|
|
143
|
+
_log("exception fetching blob list from GCS")
|
|
144
|
+
retries += 1
|
|
145
|
+
|
|
146
|
+
if sources is None:
|
|
147
|
+
raise Exception("couldn't load list of blobs from GCS")
|
|
148
|
+
|
|
149
|
+
_log(f"Exported {len(sources)} CSV files")
|
|
150
|
+
if len(sources) == 0:
|
|
151
|
+
raise ConnectorNothingToLoad("Warning: Nothing to load. Aborting next steps")
|
|
152
|
+
|
|
153
|
+
def compose_with_limits(
|
|
154
|
+
sources: List[Blob], max_file_size: float = float("inf"), iteration: int = 0
|
|
155
|
+
) -> Tuple[List[Blob], List[Blob], int]:
|
|
156
|
+
blobs: List[Blob] = []
|
|
157
|
+
blobs_to_delete: List[Blob] = []
|
|
158
|
+
chunk: List[Blob] = []
|
|
159
|
+
chunk_size: int = 0
|
|
160
|
+
count: int = 0
|
|
161
|
+
for i, blob in enumerate(sources):
|
|
162
|
+
chunk.append(blob)
|
|
163
|
+
chunk_size += blob.size
|
|
164
|
+
|
|
165
|
+
if chunk_size >= max_file_size or len(chunk) >= self.MAX_COMPOSE or i == len(sources) - 1:
|
|
166
|
+
destination_name: str = f"{destination_blob_name}_{iteration}_part{count}.csv"
|
|
167
|
+
destination: Blob = bucket.blob(destination_name)
|
|
168
|
+
destination.content_type = "text/csv"
|
|
169
|
+
destination.compose(chunk)
|
|
170
|
+
|
|
171
|
+
blobs.append(destination)
|
|
172
|
+
blobs_to_delete += chunk
|
|
173
|
+
chunk = []
|
|
174
|
+
chunk_size = 0
|
|
175
|
+
count += 1
|
|
176
|
+
|
|
177
|
+
return blobs, blobs_to_delete, iteration + 1
|
|
178
|
+
|
|
179
|
+
blobs: List[Blob] = []
|
|
180
|
+
blobs_to_delete: List[Blob] = []
|
|
181
|
+
if mode == "append":
|
|
182
|
+
blobs, blobs_to_delete, _ = compose_with_limits(sources, max_file_size=self.MAX_FILE_SIZE)
|
|
183
|
+
elif mode == "replace":
|
|
184
|
+
blobs = sources
|
|
185
|
+
iteration: int = 0
|
|
186
|
+
while len(blobs) > 1:
|
|
187
|
+
blobs, blobs_to_delete_iteration, iteration = compose_with_limits(blobs, iteration=iteration)
|
|
188
|
+
blobs_to_delete += blobs_to_delete_iteration
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
_log("Removing temp blobs")
|
|
192
|
+
for blob in blobs_to_delete:
|
|
193
|
+
blob.delete()
|
|
194
|
+
except Exception:
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
_log("Done composing!")
|
|
198
|
+
return [blob.name for blob in blobs]
|
|
199
|
+
|
|
200
|
+
def delete(self, blob_name):
|
|
201
|
+
bucket = self.storage_client.bucket(self.bucket_name())
|
|
202
|
+
bucket.delete_blob(blob_name)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class Connector(ABC):
|
|
206
|
+
def __init__(self, options: Dict[str, Any]):
|
|
207
|
+
self.options = options
|
|
208
|
+
self.options["gcs"] = {
|
|
209
|
+
"project_id": options["project_id"],
|
|
210
|
+
"service_account": options["service_account"],
|
|
211
|
+
"sign_service_account": options["service_account"],
|
|
212
|
+
"bucket_name": options["bucket_name"],
|
|
213
|
+
}
|
|
214
|
+
self.gcs = GCS(self.options["gcs"])
|
|
215
|
+
|
|
216
|
+
@abstractmethod
|
|
217
|
+
def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
@abstractmethod
|
|
221
|
+
def clean(self, blob_name):
|
|
222
|
+
pass
|
|
223
|
+
|
|
224
|
+
@abstractmethod
|
|
225
|
+
def datasource_analyze(self, resource):
|
|
226
|
+
pass
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class BigQuery(Connector):
|
|
230
|
+
def __init__(self, options):
|
|
231
|
+
super().__init__(options)
|
|
232
|
+
self.configure()
|
|
233
|
+
|
|
234
|
+
def configure(self):
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
def datasource_analyze(self, resource):
|
|
238
|
+
raise Exception("BigQuery does not support data source analyze")
|
|
239
|
+
|
|
240
|
+
def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
|
|
241
|
+
mm = str(int(round(time.time() * 1000)))
|
|
242
|
+
destination = f"{destination}_{mm}"
|
|
243
|
+
with_headers: bool = self.options.get("with_headers", False)
|
|
244
|
+
sql = f"""
|
|
245
|
+
EXPORT DATA OPTIONS(
|
|
246
|
+
uri='{self.gcs.gs_url()}{destination}*.csv',
|
|
247
|
+
format='CSV',
|
|
248
|
+
overwrite=true,
|
|
249
|
+
header={ "true" if with_headers else "false" },
|
|
250
|
+
field_delimiter=',') AS
|
|
251
|
+
{sql}
|
|
252
|
+
"""
|
|
253
|
+
self.execute(sql)
|
|
254
|
+
urls: List[str] = self.gcs.compose(destination, f"{destination}_final", mode)
|
|
255
|
+
return [unquote(self.gcs.sign(url)) for url in urls]
|
|
256
|
+
|
|
257
|
+
def execute(self, sql, result=False):
|
|
258
|
+
client = self.connector()
|
|
259
|
+
job_config = bigquery.QueryJobConfig()
|
|
260
|
+
|
|
261
|
+
query_job = client.query(
|
|
262
|
+
sql,
|
|
263
|
+
job_config=job_config,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return query_job.result() # Waits for the query to finish
|
|
267
|
+
|
|
268
|
+
def connector(self):
|
|
269
|
+
credentials = service_account.Credentials.from_service_account_file(
|
|
270
|
+
filename=self.options["service_account"],
|
|
271
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
272
|
+
)
|
|
273
|
+
self.credentials = credentials
|
|
274
|
+
|
|
275
|
+
return bigquery.Client(credentials=credentials, project=credentials.project_id)
|
|
276
|
+
|
|
277
|
+
def clean(self, blob_name):
|
|
278
|
+
self.gcs.delete(blob_name)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class Snowflake(Connector):
|
|
282
|
+
DEFAULT_STORAGE_INTEGRATION = "tb__gcs_int"
|
|
283
|
+
DEFAULT_STAGE = "tb__gcs_stage"
|
|
284
|
+
|
|
285
|
+
def __init__(self, options):
|
|
286
|
+
super().__init__(options)
|
|
287
|
+
self.configure()
|
|
288
|
+
|
|
289
|
+
def configure(self):
|
|
290
|
+
self.create_storage_integration()
|
|
291
|
+
self.create_stage()
|
|
292
|
+
self.gcs.grant_access(self.get_sf_member_name())
|
|
293
|
+
|
|
294
|
+
def create_storage_integration(self):
|
|
295
|
+
sql = f"""
|
|
296
|
+
create storage integration {self.storage_integration()}
|
|
297
|
+
type = external_stage
|
|
298
|
+
storage_provider = gcs
|
|
299
|
+
enabled = true
|
|
300
|
+
storage_allowed_locations = ('{self.gcs.gcs_url()}');
|
|
301
|
+
"""
|
|
302
|
+
try:
|
|
303
|
+
self.execute(sql)
|
|
304
|
+
except Exception as e:
|
|
305
|
+
if "already exists" in str(e):
|
|
306
|
+
pass
|
|
307
|
+
else:
|
|
308
|
+
raise e
|
|
309
|
+
|
|
310
|
+
def get_sf_member_name(self):
|
|
311
|
+
try:
|
|
312
|
+
sql = f"DESC STORAGE INTEGRATION {self.storage_integration()};"
|
|
313
|
+
result = self.execute(sql, result=True)
|
|
314
|
+
for row in result:
|
|
315
|
+
if row["property"] == "STORAGE_GCP_SERVICE_ACCOUNT":
|
|
316
|
+
return row["property_value"]
|
|
317
|
+
except Exception:
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
def create_stage(self):
|
|
321
|
+
sql = f"""
|
|
322
|
+
create stage "{self.options['schema']}".{self.stage()}
|
|
323
|
+
url='{self.gcs.gcs_url()}'
|
|
324
|
+
storage_integration = {self.storage_integration()};
|
|
325
|
+
"""
|
|
326
|
+
try:
|
|
327
|
+
self.execute(sql)
|
|
328
|
+
except Exception as e:
|
|
329
|
+
if "already exists" in str(e):
|
|
330
|
+
pass
|
|
331
|
+
|
|
332
|
+
def export_to_gcs(self, sql: str, destination: str, mode: str) -> List[str]:
|
|
333
|
+
mm = str(int(round(time.time() * 1000)))
|
|
334
|
+
destination = f"{destination}_{mm}"
|
|
335
|
+
with_headers: bool = self.options.get("with_headers", False)
|
|
336
|
+
sql = f"""copy into '@{self.stage()}/{destination}'
|
|
337
|
+
from ({sql})
|
|
338
|
+
overwrite = true
|
|
339
|
+
file_format = (TYPE=CSV COMPRESSION=NONE ESCAPE_UNENCLOSED_FIELD=NONE FIELD_DELIMITER='|' FIELD_OPTIONALLY_ENCLOSED_BY='"' null_if=())
|
|
340
|
+
header = {"true" if with_headers else "false" }
|
|
341
|
+
max_file_size = 2500000000;
|
|
342
|
+
"""
|
|
343
|
+
self.execute(sql)
|
|
344
|
+
urls: List[str] = self.gcs.compose(destination, f"{destination}_final", mode)
|
|
345
|
+
return [unquote(self.gcs.sign(url)) for url in urls]
|
|
346
|
+
|
|
347
|
+
def execute(self, sql, result=False):
|
|
348
|
+
ctx = self.connector()
|
|
349
|
+
cs = ctx.cursor(snowflake.connector.DictCursor)
|
|
350
|
+
try:
|
|
351
|
+
cs.execute(f"use role {self.options['role']};")
|
|
352
|
+
cs.execute(f"use warehouse {self.options['warehouse']};")
|
|
353
|
+
cs.execute(sql)
|
|
354
|
+
if result:
|
|
355
|
+
result = cs.fetchall()
|
|
356
|
+
finally:
|
|
357
|
+
cs.close()
|
|
358
|
+
ctx.close()
|
|
359
|
+
return result
|
|
360
|
+
|
|
361
|
+
def datasource_analyze(self, resource):
|
|
362
|
+
"""returns .datasource file for resource"""
|
|
363
|
+
# TODO: deal with the right precisions for Integers
|
|
364
|
+
YES = "Y"
|
|
365
|
+
NO = "N"
|
|
366
|
+
|
|
367
|
+
def from_snowflake_type(t: str, nullable=NO) -> str:
|
|
368
|
+
"""transforms snowflake types to CH ones"""
|
|
369
|
+
the_type = "String"
|
|
370
|
+
if t.startswith("NUMBER"):
|
|
371
|
+
the_type = "Int32"
|
|
372
|
+
if (
|
|
373
|
+
t.startswith("FLOAT")
|
|
374
|
+
or t.startswith("DOUBLE")
|
|
375
|
+
or t.startswith("REAL")
|
|
376
|
+
or t.startswith("NUMERIC")
|
|
377
|
+
or t.startswith("DECIMAL")
|
|
378
|
+
):
|
|
379
|
+
the_type = "Float32"
|
|
380
|
+
if t == "DATE":
|
|
381
|
+
the_type = "Date"
|
|
382
|
+
if t == "DATETIME" or t.startswith("TIMESTAMP"):
|
|
383
|
+
the_type = "DateTime"
|
|
384
|
+
if nullable == YES:
|
|
385
|
+
the_type = f"Nullable({the_type})"
|
|
386
|
+
return the_type
|
|
387
|
+
|
|
388
|
+
result = self.execute(f"DESCRIBE TABLE {resource};", result=True)
|
|
389
|
+
sql = []
|
|
390
|
+
columns = []
|
|
391
|
+
for row in result:
|
|
392
|
+
if row["kind"] == "COLUMN":
|
|
393
|
+
sql.append(f"{row['name']} {from_snowflake_type(row['type'], row['null?'])}")
|
|
394
|
+
columns.append(
|
|
395
|
+
{
|
|
396
|
+
"path": row["name"],
|
|
397
|
+
"name": row["name"],
|
|
398
|
+
"present_pct": 1 if row["null?"] != YES else None,
|
|
399
|
+
"recommended_type": from_snowflake_type(row["type"]),
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
return {"analysis": {"columns": columns, "schema": ", ".join(sql)}}
|
|
403
|
+
|
|
404
|
+
def connector(self):
|
|
405
|
+
auth = {
|
|
406
|
+
key: self.options[key]
|
|
407
|
+
for key in self.options.keys() & {"user", "password", "account", "warehouse", "database", "schema"}
|
|
408
|
+
}
|
|
409
|
+
return snowflake.connector.connect(**auth)
|
|
410
|
+
|
|
411
|
+
def stage(self):
|
|
412
|
+
return self.options.get("stage") or self.DEFAULT_STAGE
|
|
413
|
+
|
|
414
|
+
def storage_integration(self):
|
|
415
|
+
return self.options.get("storage_integration") or self.DEFAULT_STORAGE_INTEGRATION
|
|
416
|
+
|
|
417
|
+
def clean(self, blob_name):
|
|
418
|
+
self.gcs.delete(blob_name)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
connectors = {
|
|
422
|
+
"snowflake": Snowflake,
|
|
423
|
+
"bigquery": BigQuery,
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def create_connector(source: str, params: Dict[str, Any]) -> Connector:
|
|
428
|
+
return connectors[source](params)
|
tinybird/context.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from contextvars import ContextVar
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
# Avoid circular import error
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from tinybird.user import User
|
|
7
|
+
|
|
8
|
+
workspace_id: ContextVar[str] = ContextVar("workspace_id")
|
|
9
|
+
workspace: ContextVar["User"] = ContextVar("workspace")
|
|
10
|
+
table_id: ContextVar[str] = ContextVar("table_id")
|
|
11
|
+
hfi_frequency: ContextVar[float] = ContextVar("hfi_frequency")
|
|
12
|
+
hfi_frequency_gatherer: ContextVar[float] = ContextVar("hfi_frequency_gatherer")
|
|
13
|
+
use_gatherer: ContextVar[bool] = ContextVar("use_gatherer")
|
|
14
|
+
allow_gatherer_fallback: ContextVar[bool] = ContextVar("allow_gatherer_fallback")
|
|
15
|
+
gatherer_allow_s3_backup_on_user_errors: ContextVar[bool] = ContextVar("gatherer_allow_s3_backup_on_user_errors")
|
|
16
|
+
disable_template_security_validation: ContextVar[bool] = ContextVar("disable_template_security_validation")
|
|
17
|
+
origin: ContextVar[str] = ContextVar("origin")
|
|
18
|
+
request_id: ContextVar[str] = ContextVar("request_id")
|
|
19
|
+
engine: ContextVar[str] = ContextVar("engine")
|
|
20
|
+
wait_parameter: ContextVar[bool] = ContextVar("wait_parameter")
|
|
21
|
+
api_host: ContextVar[str] = ContextVar("api_host")
|
|
22
|
+
ff_split_to_array_escape: ContextVar[bool] = ContextVar("ff_split_to_array_escape")
|
|
23
|
+
ff_preprocess_parameters_circuit_breaker: ContextVar[bool] = ContextVar("ff_preprocess_parameters_circuit_breaker")
|