tinybird 0.0.1.dev5__py3-none-any.whl → 0.0.1.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/__cli__.py +7 -8
- tinybird/tb/cli.py +28 -0
- tinybird/{tb_cli_modules → tb/modules}/auth.py +5 -5
- tinybird/{tb_cli_modules → tb/modules}/branch.py +5 -25
- tinybird/{tb_cli_modules → tb/modules}/build.py +10 -21
- tinybird/tb/modules/cicd.py +271 -0
- tinybird/{tb_cli_modules → tb/modules}/cli.py +20 -140
- tinybird/tb/modules/common.py +2110 -0
- tinybird/tb/modules/config.py +352 -0
- tinybird/{tb_cli_modules → tb/modules}/connection.py +4 -4
- tinybird/{tb_cli_modules → tb/modules}/create.py +20 -20
- tinybird/tb/modules/datafile/build.py +2103 -0
- tinybird/tb/modules/datafile/build_common.py +118 -0
- tinybird/tb/modules/datafile/build_datasource.py +403 -0
- tinybird/tb/modules/datafile/build_pipe.py +648 -0
- tinybird/tb/modules/datafile/common.py +897 -0
- tinybird/tb/modules/datafile/diff.py +197 -0
- tinybird/tb/modules/datafile/exceptions.py +23 -0
- tinybird/tb/modules/datafile/format_common.py +66 -0
- tinybird/tb/modules/datafile/format_datasource.py +160 -0
- tinybird/tb/modules/datafile/format_pipe.py +195 -0
- tinybird/tb/modules/datafile/parse_datasource.py +41 -0
- tinybird/tb/modules/datafile/parse_pipe.py +69 -0
- tinybird/tb/modules/datafile/pipe_checker.py +560 -0
- tinybird/tb/modules/datafile/pull.py +157 -0
- tinybird/{tb_cli_modules → tb/modules}/datasource.py +7 -6
- tinybird/tb/modules/exceptions.py +91 -0
- tinybird/{tb_cli_modules → tb/modules}/fmt.py +6 -3
- tinybird/{tb_cli_modules → tb/modules}/job.py +3 -3
- tinybird/{tb_cli_modules → tb/modules}/llm.py +1 -1
- tinybird/{tb_cli_modules → tb/modules}/local.py +9 -5
- tinybird/{tb_cli_modules → tb/modules}/mock.py +5 -5
- tinybird/{tb_cli_modules → tb/modules}/pipe.py +11 -5
- tinybird/{tb_cli_modules → tb/modules}/prompts.py +1 -1
- tinybird/tb/modules/regions.py +9 -0
- tinybird/{tb_cli_modules → tb/modules}/tag.py +2 -2
- tinybird/tb/modules/telemetry.py +310 -0
- tinybird/{tb_cli_modules → tb/modules}/test.py +5 -5
- tinybird/{tb_cli_modules → tb/modules}/tinyunit/tinyunit.py +1 -1
- tinybird/{tb_cli_modules → tb/modules}/token.py +3 -3
- tinybird/{tb_cli_modules → tb/modules}/workspace.py +5 -5
- tinybird/{tb_cli_modules → tb/modules}/workspace_members.py +4 -4
- tinybird/tb_cli_modules/common.py +9 -25
- tinybird/tb_cli_modules/config.py +0 -8
- {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/METADATA +1 -1
- tinybird-0.0.1.dev7.dist-info/RECORD +71 -0
- tinybird-0.0.1.dev7.dist-info/entry_points.txt +2 -0
- tinybird/datafile.py +0 -6123
- tinybird/tb_cli.py +0 -28
- tinybird-0.0.1.dev5.dist-info/RECORD +0 -52
- tinybird-0.0.1.dev5.dist-info/entry_points.txt +0 -2
- /tinybird/{tb_cli_modules → tb/modules}/table.py +0 -0
- /tinybird/{tb_cli_modules → tb/modules}/tinyunit/tinyunit_lib.py +0 -0
- {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev5.dist-info → tinybird-0.0.1.dev7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from tinybird.feedback_manager import FeedbackManager
|
|
7
|
+
from tinybird.sql_template import get_template_and_variables, render_sql_template
|
|
8
|
+
from tinybird.tb.modules.datafile.common import (
|
|
9
|
+
Datafile,
|
|
10
|
+
format_filename,
|
|
11
|
+
parse,
|
|
12
|
+
)
|
|
13
|
+
from tinybird.tb.modules.datafile.exceptions import IncludeFileNotFoundException, ParseException
|
|
14
|
+
from tinybird.tornado_template import UnClosedIfError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def parse_pipe(
|
|
18
|
+
filename: str,
|
|
19
|
+
replace_includes: bool = True,
|
|
20
|
+
content: Optional[str] = None,
|
|
21
|
+
skip_eval: bool = False,
|
|
22
|
+
hide_folders: bool = False,
|
|
23
|
+
) -> Datafile:
|
|
24
|
+
basepath = ""
|
|
25
|
+
if not content:
|
|
26
|
+
with open(filename) as file:
|
|
27
|
+
s = file.read()
|
|
28
|
+
basepath = os.path.dirname(filename)
|
|
29
|
+
else:
|
|
30
|
+
s = content
|
|
31
|
+
|
|
32
|
+
filename = format_filename(filename, hide_folders)
|
|
33
|
+
try:
|
|
34
|
+
sql = ""
|
|
35
|
+
doc = parse(s, basepath=basepath, replace_includes=replace_includes, skip_eval=skip_eval)
|
|
36
|
+
for node in doc.nodes:
|
|
37
|
+
sql = node.get("sql", "")
|
|
38
|
+
if sql.strip()[0] == "%":
|
|
39
|
+
sql, _, variable_warnings = render_sql_template(sql[1:], test_mode=True, name=node["name"])
|
|
40
|
+
doc.warnings = variable_warnings
|
|
41
|
+
# it'll fail with a ModuleNotFoundError when the toolset is not available but it returns the parsed doc
|
|
42
|
+
from tinybird.sql_toolset import format_sql as toolset_format_sql
|
|
43
|
+
|
|
44
|
+
toolset_format_sql(sql)
|
|
45
|
+
except ParseException as e:
|
|
46
|
+
raise click.ClickException(
|
|
47
|
+
FeedbackManager.error_parsing_file(
|
|
48
|
+
filename=filename, lineno=e.lineno, error=f"{str(e)} + SQL(parse exception): {sql}"
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
except ValueError as e:
|
|
52
|
+
t, template_variables, _ = get_template_and_variables(sql, name=node["name"])
|
|
53
|
+
|
|
54
|
+
if sql.strip()[0] != "%" and len(template_variables) > 0:
|
|
55
|
+
raise click.ClickException(FeedbackManager.error_template_start(filename=filename))
|
|
56
|
+
raise click.ClickException(
|
|
57
|
+
FeedbackManager.error_parsing_file(
|
|
58
|
+
filename=filename, lineno="", error=f"{str(e)} + SQL(value error): {sql}"
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
except UnClosedIfError as e:
|
|
62
|
+
raise click.ClickException(
|
|
63
|
+
FeedbackManager.error_parsing_node_with_unclosed_if(node=e.node, pipe=filename, lineno=e.lineno, sql=e.sql)
|
|
64
|
+
)
|
|
65
|
+
except IncludeFileNotFoundException as e:
|
|
66
|
+
raise click.ClickException(FeedbackManager.error_not_found_include(filename=e, lineno=e.lineno))
|
|
67
|
+
except ModuleNotFoundError:
|
|
68
|
+
pass
|
|
69
|
+
return doc
|
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import math
|
|
5
|
+
import sys
|
|
6
|
+
import unittest
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from operator import itemgetter
|
|
9
|
+
from statistics import mean, median
|
|
10
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
11
|
+
from urllib.parse import parse_qs, urlencode, urlparse
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
from humanfriendly import format_size
|
|
15
|
+
from requests import Response
|
|
16
|
+
|
|
17
|
+
from tinybird.tb.modules.common import getenv_bool
|
|
18
|
+
from tinybird.tb.modules.datafile.common import normalize_array
|
|
19
|
+
|
|
20
|
+
PIPE_CHECKER_RETRIES: int = 3
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PipeChecker(unittest.TestCase):
|
|
24
|
+
RETRIES_LIMIT = PIPE_CHECKER_RETRIES
|
|
25
|
+
|
|
26
|
+
current_response_time: float = 0
|
|
27
|
+
checker_response_time: float = 0
|
|
28
|
+
|
|
29
|
+
current_read_bytes: int = 0
|
|
30
|
+
checker_read_bytes: int = 0
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
request: Dict[str, Any],
|
|
35
|
+
pipe_name: str,
|
|
36
|
+
checker_pipe_name: str,
|
|
37
|
+
token: str,
|
|
38
|
+
only_response_times: bool,
|
|
39
|
+
ignore_order: bool,
|
|
40
|
+
validate_processed_bytes: bool,
|
|
41
|
+
relative_change: float,
|
|
42
|
+
*args,
|
|
43
|
+
**kwargs,
|
|
44
|
+
) -> None:
|
|
45
|
+
super().__init__(*args, **kwargs)
|
|
46
|
+
if request.get("http_method") == "POST":
|
|
47
|
+
self.http_method = "POST"
|
|
48
|
+
self.current_pipe_url, self.pipe_request_params = self._prepare_current_pipe_for_post_request(request)
|
|
49
|
+
else:
|
|
50
|
+
self.http_method = "GET"
|
|
51
|
+
self.current_pipe_url, self.pipe_request_params = self._prepare_current_pipe_url_for_get_request(request)
|
|
52
|
+
|
|
53
|
+
self._process_params()
|
|
54
|
+
self.checker_pipe_name = checker_pipe_name
|
|
55
|
+
self.pipe_name = pipe_name
|
|
56
|
+
self.token = token
|
|
57
|
+
self.only_response_times = only_response_times
|
|
58
|
+
self.ignore_order = ignore_order
|
|
59
|
+
self.validate_processed_bytes = validate_processed_bytes
|
|
60
|
+
self.relative_change = relative_change
|
|
61
|
+
|
|
62
|
+
parsed = urlparse(self.current_pipe_url)
|
|
63
|
+
self.checker_pipe_url = f"{parsed.scheme}://{parsed.netloc}/v0/pipes/{self.checker_pipe_name}.json"
|
|
64
|
+
self.checker_pipe_url += f"?{parsed.query}" if parsed.query is not None and parsed.query != "" else ""
|
|
65
|
+
|
|
66
|
+
def _process_params(self) -> None:
|
|
67
|
+
for key in self.pipe_request_params.keys():
|
|
68
|
+
try:
|
|
69
|
+
self.pipe_request_params[key] = json.loads(self.pipe_request_params[key])
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
def _prepare_current_pipe_url_for_get_request(self, request) -> Tuple[str, Dict[str, str]]:
|
|
74
|
+
current_pipe_url = request.get("endpoint_url", "")
|
|
75
|
+
current_pipe_url = (
|
|
76
|
+
current_pipe_url.replace(".ndjson", ".json").replace(".csv", ".json").replace(".parquet", ".json")
|
|
77
|
+
)
|
|
78
|
+
current_pipe_url = drop_token(current_pipe_url)
|
|
79
|
+
current_pipe_url += ("&" if "?" in current_pipe_url else "?") + "pipe_checker=true"
|
|
80
|
+
return current_pipe_url, request.get("pipe_request_params", {})
|
|
81
|
+
|
|
82
|
+
def _prepare_current_pipe_for_post_request(self, request) -> Tuple[str, Dict[str, str]]:
|
|
83
|
+
current_pipe_url = request.get("endpoint_url", "")
|
|
84
|
+
current_pipe_url = (
|
|
85
|
+
current_pipe_url.replace(".ndjson", ".json").replace(".csv", ".json").replace(".parquet", ".json")
|
|
86
|
+
)
|
|
87
|
+
all_parameters = request.get("pipe_request_params")
|
|
88
|
+
all_parameters.pop("token", None)
|
|
89
|
+
all_parameters["pipe_checker"] = "true"
|
|
90
|
+
|
|
91
|
+
return current_pipe_url, all_parameters
|
|
92
|
+
|
|
93
|
+
def __str__(self):
|
|
94
|
+
post_values = f" - POST Body: {self.pipe_request_params}" if self.http_method == "POST" else ""
|
|
95
|
+
|
|
96
|
+
return f"current {self.current_pipe_url}{post_values}\n new {self.checker_pipe_url}{post_values}"
|
|
97
|
+
|
|
98
|
+
def diff(self, a: Dict[str, Any], b: Dict[str, Any]) -> str:
|
|
99
|
+
a_properties = list(map(lambda x: f"{x}:{a[x]}\n", a.keys()))
|
|
100
|
+
b_properties = list(map(lambda x: f"{x}:{b[x]}\n", b.keys()))
|
|
101
|
+
|
|
102
|
+
return "".join(difflib.context_diff(a_properties, b_properties, self.pipe_name, self.checker_pipe_name))
|
|
103
|
+
|
|
104
|
+
def _do_request_to_pipe(self, pipe_url: str) -> Response:
|
|
105
|
+
headers = {"Authorization": f"Bearer {self.token}"}
|
|
106
|
+
if self.http_method == "GET":
|
|
107
|
+
return requests.get(pipe_url, headers=headers, verify=not getenv_bool("TB_DISABLE_SSL_CHECKS", False))
|
|
108
|
+
else:
|
|
109
|
+
return requests.post(
|
|
110
|
+
pipe_url,
|
|
111
|
+
headers=headers,
|
|
112
|
+
verify=not getenv_bool("TB_DISABLE_SSL_CHECKS", False),
|
|
113
|
+
data=self.pipe_request_params,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def _write_performance(self):
|
|
117
|
+
return ""
|
|
118
|
+
|
|
119
|
+
def _runTest(self) -> None:
|
|
120
|
+
current_r = self._do_request_to_pipe(self.current_pipe_url)
|
|
121
|
+
checker_r = self._do_request_to_pipe(self.checker_pipe_url)
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
self.current_response_time = current_r.elapsed.total_seconds()
|
|
125
|
+
self.checker_response_time = checker_r.elapsed.total_seconds()
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
current_response: Dict[str, Any] = current_r.json()
|
|
130
|
+
checker_response: Dict[str, Any] = checker_r.json()
|
|
131
|
+
|
|
132
|
+
current_data: List[Dict[str, Any]] = current_response.get("data", [])
|
|
133
|
+
checker_data: List[Dict[str, Any]] = checker_response.get("data", [])
|
|
134
|
+
|
|
135
|
+
self.current_read_bytes = current_response.get("statistics", {}).get("bytes_read", 0)
|
|
136
|
+
self.checker_read_bytes = checker_response.get("statistics", {}).get("bytes_read", 0)
|
|
137
|
+
|
|
138
|
+
error_check_fixtures_data: Optional[str] = checker_response.get("error", None)
|
|
139
|
+
self.assertIsNone(
|
|
140
|
+
error_check_fixtures_data,
|
|
141
|
+
"You are trying to push a pipe with errors, please check the output or run with --no-check",
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
increase_response_time = (
|
|
145
|
+
checker_r.elapsed.total_seconds() - current_r.elapsed.total_seconds()
|
|
146
|
+
) / current_r.elapsed.total_seconds()
|
|
147
|
+
if self.only_response_times:
|
|
148
|
+
self.assertLess(
|
|
149
|
+
increase_response_time, 0.25, msg=f"response time has increased {round(increase_response_time * 100)}%"
|
|
150
|
+
)
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
self.assertEqual(len(current_data), len(checker_data), "Number of elements does not match")
|
|
154
|
+
|
|
155
|
+
if self.validate_processed_bytes:
|
|
156
|
+
increase_read_bytes = (self.checker_read_bytes - self.current_read_bytes) / self.current_read_bytes
|
|
157
|
+
self.assertLess(
|
|
158
|
+
round(increase_read_bytes, 2),
|
|
159
|
+
0.25,
|
|
160
|
+
msg=f"The number of processed bytes has increased {round(increase_read_bytes * 100)}%",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
if self.ignore_order:
|
|
164
|
+
current_data = (
|
|
165
|
+
sorted(normalize_array(current_data), key=itemgetter(*[k for k in current_data[0].keys()]))
|
|
166
|
+
if len(current_data) > 0
|
|
167
|
+
else current_data
|
|
168
|
+
)
|
|
169
|
+
checker_data = (
|
|
170
|
+
sorted(normalize_array(checker_data), key=itemgetter(*[k for k in checker_data[0].keys()]))
|
|
171
|
+
if len(checker_data) > 0
|
|
172
|
+
else checker_data
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
for _, (current_data_e, check_fixtures_data_e) in enumerate(zip(current_data, checker_data)):
|
|
176
|
+
self.assertEqual(list(current_data_e.keys()), list(check_fixtures_data_e.keys()))
|
|
177
|
+
for x in current_data_e.keys():
|
|
178
|
+
if isinstance(current_data_e[x], (float, int)):
|
|
179
|
+
d = abs(current_data_e[x] - check_fixtures_data_e[x])
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
self.assertLessEqual(
|
|
183
|
+
d / current_data_e[x],
|
|
184
|
+
self.relative_change,
|
|
185
|
+
f"key {x}. old value: {current_data_e[x]}, new value: {check_fixtures_data_e[x]}\n{self.diff(current_data_e, check_fixtures_data_e)}",
|
|
186
|
+
)
|
|
187
|
+
except ZeroDivisionError:
|
|
188
|
+
self.assertEqual(
|
|
189
|
+
d,
|
|
190
|
+
0,
|
|
191
|
+
f"key {x}. old value: {current_data_e[x]}, new value: {check_fixtures_data_e[x]}\n{self.diff(current_data_e, check_fixtures_data_e)}",
|
|
192
|
+
)
|
|
193
|
+
elif (
|
|
194
|
+
not isinstance(current_data_e[x], (str, bytes))
|
|
195
|
+
and isinstance(current_data_e[x], Iterable)
|
|
196
|
+
and self.ignore_order
|
|
197
|
+
):
|
|
198
|
+
|
|
199
|
+
def flatten(items):
|
|
200
|
+
"""Yield items from any nested iterable; see Reference."""
|
|
201
|
+
output = []
|
|
202
|
+
for x in items:
|
|
203
|
+
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
|
|
204
|
+
output.extend(flatten(x))
|
|
205
|
+
else:
|
|
206
|
+
output.append(x)
|
|
207
|
+
return output
|
|
208
|
+
|
|
209
|
+
self.assertEqual(
|
|
210
|
+
flatten(current_data_e[x]).sort(),
|
|
211
|
+
flatten(check_fixtures_data_e[x]).sort(),
|
|
212
|
+
"\n" + self.diff(current_data_e, check_fixtures_data_e),
|
|
213
|
+
)
|
|
214
|
+
else:
|
|
215
|
+
self.assertEqual(
|
|
216
|
+
current_data_e[x],
|
|
217
|
+
check_fixtures_data_e[x],
|
|
218
|
+
"\n" + self.diff(current_data_e, check_fixtures_data_e),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def runTest(self) -> None:
|
|
222
|
+
if "debug" in self.pipe_request_params or (
|
|
223
|
+
"from" in self.pipe_request_params and self.pipe_request_params["from"] == "ui"
|
|
224
|
+
):
|
|
225
|
+
self.skipTest("found debug param")
|
|
226
|
+
|
|
227
|
+
# Let's retry the validation to avoid false alerts when dealing with endpoints that have continuos ingestion
|
|
228
|
+
retries = 0
|
|
229
|
+
while retries < self.RETRIES_LIMIT:
|
|
230
|
+
try:
|
|
231
|
+
self._runTest()
|
|
232
|
+
except AssertionError as e:
|
|
233
|
+
retries += 1
|
|
234
|
+
if retries >= self.RETRIES_LIMIT:
|
|
235
|
+
raise e
|
|
236
|
+
else:
|
|
237
|
+
break
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass
|
|
241
|
+
class PipeCheckerRunnerResponse:
|
|
242
|
+
pipe_name: str
|
|
243
|
+
test_type: str
|
|
244
|
+
output: str
|
|
245
|
+
metrics_summary: Optional[Dict[str, Any]]
|
|
246
|
+
metrics_timing: Dict[str, Tuple[float, float, float]]
|
|
247
|
+
failed: List[Dict[str, str]]
|
|
248
|
+
was_successfull: bool
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class PipeCheckerRunner:
|
|
252
|
+
checker_stream_result_class = unittest.runner._WritelnDecorator
|
|
253
|
+
|
|
254
|
+
def __init__(self, pipe_name: str, host: str):
|
|
255
|
+
self.pipe_name = pipe_name
|
|
256
|
+
self.host = host
|
|
257
|
+
|
|
258
|
+
def get_sqls_for_requests_to_check(
|
|
259
|
+
self,
|
|
260
|
+
matches: List[str],
|
|
261
|
+
sample_by_params: int,
|
|
262
|
+
limit: int,
|
|
263
|
+
pipe_stats_rt_table: str = "",
|
|
264
|
+
extra_where_clause: str = "",
|
|
265
|
+
):
|
|
266
|
+
pipe_stats_rt = pipe_stats_rt_table or "tinybird.pipe_stats_rt"
|
|
267
|
+
# TODO it may not be needed to extract token, pipe_checker, form or debug. They may be used in next steps
|
|
268
|
+
# TODO extractURLParameter(assumeNotNull(url), 'from') <> 'ui' should read from request_param_names.
|
|
269
|
+
sql_for_coverage = f"""
|
|
270
|
+
SELECT
|
|
271
|
+
groupArraySample({sample_by_params if sample_by_params > 0 else 1})(url) as endpoint_url,
|
|
272
|
+
groupArraySample({sample_by_params if sample_by_params > 0 else 1})(pipe_request_params) as pipe_request_params,
|
|
273
|
+
http_method
|
|
274
|
+
FROM
|
|
275
|
+
(
|
|
276
|
+
Select
|
|
277
|
+
url,
|
|
278
|
+
mapFilter((k, v) -> (k not IN ('token', 'pipe_checker', 'from', 'debug')), parameters) AS pipe_request_params,
|
|
279
|
+
mapKeys(pipe_request_params) request_param_names,
|
|
280
|
+
extractURLParameterNames(assumeNotNull(url)) as url_param_names,
|
|
281
|
+
method as http_method
|
|
282
|
+
FROM {pipe_stats_rt}
|
|
283
|
+
WHERE
|
|
284
|
+
pipe_name = '{self.pipe_name}'
|
|
285
|
+
AND url IS NOT NULL
|
|
286
|
+
AND extractURLParameter(assumeNotNull(url), 'from') <> 'ui'
|
|
287
|
+
AND extractURLParameter(assumeNotNull(url), 'pipe_checker') <> 'true'
|
|
288
|
+
AND extractURLParameter(assumeNotNull(url), 'debug') <> 'query'
|
|
289
|
+
AND error = 0
|
|
290
|
+
AND not mapContains(parameters, '__tb__semver')
|
|
291
|
+
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else ''}
|
|
292
|
+
{ extra_where_clause }
|
|
293
|
+
Limit 5000000 -- Enough to bring data while not processing all requests from highly used pipes
|
|
294
|
+
)
|
|
295
|
+
group by request_param_names, http_method
|
|
296
|
+
FORMAT JSON
|
|
297
|
+
"""
|
|
298
|
+
sql_latest_requests = f"""
|
|
299
|
+
SELECT
|
|
300
|
+
[first_value(url)] as endpoint_url,
|
|
301
|
+
[pipe_request_params] as pipe_request_params,
|
|
302
|
+
http_method
|
|
303
|
+
FROM (
|
|
304
|
+
SELECT assumeNotNull(url) as url,
|
|
305
|
+
mapFilter((k, v) -> (k not IN ('token', 'pipe_checker', 'from', 'debug')), parameters) AS pipe_request_params,
|
|
306
|
+
mapKeys(pipe_request_params) request_param_names,
|
|
307
|
+
extractURLParameterNames(assumeNotNull(url)) as url_param_names,
|
|
308
|
+
method as http_method
|
|
309
|
+
FROM {pipe_stats_rt}
|
|
310
|
+
WHERE
|
|
311
|
+
pipe_name = '{self.pipe_name}'
|
|
312
|
+
AND url IS NOT NULL
|
|
313
|
+
AND extractURLParameter(assumeNotNull(url), 'from') <> 'ui'
|
|
314
|
+
AND extractURLParameter(assumeNotNull(url), 'pipe_checker') <> 'true'
|
|
315
|
+
AND extractURLParameter(assumeNotNull(url), 'debug') <> 'query'
|
|
316
|
+
AND error = 0
|
|
317
|
+
AND not mapContains(parameters, '__tb__semver')
|
|
318
|
+
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else ''}
|
|
319
|
+
{extra_where_clause}
|
|
320
|
+
LIMIT {limit}
|
|
321
|
+
)
|
|
322
|
+
GROUP BY pipe_request_params, http_method
|
|
323
|
+
FORMAT JSON
|
|
324
|
+
"""
|
|
325
|
+
return sql_for_coverage, sql_latest_requests
|
|
326
|
+
|
|
327
|
+
def _get_checker(
|
|
328
|
+
self,
|
|
329
|
+
request: Dict[str, Any],
|
|
330
|
+
checker_pipe_name: str,
|
|
331
|
+
token: str,
|
|
332
|
+
only_response_times: bool,
|
|
333
|
+
ignore_order: bool,
|
|
334
|
+
validate_processed_bytes: bool,
|
|
335
|
+
relative_change: float,
|
|
336
|
+
) -> PipeChecker:
|
|
337
|
+
return PipeChecker(
|
|
338
|
+
request,
|
|
339
|
+
self.pipe_name,
|
|
340
|
+
checker_pipe_name,
|
|
341
|
+
token,
|
|
342
|
+
only_response_times,
|
|
343
|
+
ignore_order,
|
|
344
|
+
validate_processed_bytes,
|
|
345
|
+
relative_change,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def _delta_percentage(self, checker: float, current: float) -> float:
|
|
349
|
+
try:
|
|
350
|
+
if current == 0.0:
|
|
351
|
+
return 0.0
|
|
352
|
+
return round(((checker - current) / current) * 100, 2)
|
|
353
|
+
except Exception as exc:
|
|
354
|
+
logging.warning(f"Error calculating delta: {exc}")
|
|
355
|
+
return 0.0
|
|
356
|
+
|
|
357
|
+
def run_pipe_checker(
|
|
358
|
+
self,
|
|
359
|
+
pipe_requests_to_check: List[Dict[str, Any]],
|
|
360
|
+
checker_pipe_name: str,
|
|
361
|
+
token: str,
|
|
362
|
+
only_response_times: bool,
|
|
363
|
+
ignore_order: bool,
|
|
364
|
+
validate_processed_bytes: bool,
|
|
365
|
+
relative_change: float,
|
|
366
|
+
failfast: bool,
|
|
367
|
+
custom_output: bool = False,
|
|
368
|
+
debug: bool = False,
|
|
369
|
+
) -> PipeCheckerRunnerResponse:
|
|
370
|
+
class PipeCheckerTextTestResult(unittest.TextTestResult):
|
|
371
|
+
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
|
372
|
+
self.custom_output = kwargs.pop("custom_output", False)
|
|
373
|
+
super().__init__(*args, **kwargs)
|
|
374
|
+
self.success: List[PipeChecker] = []
|
|
375
|
+
|
|
376
|
+
def addSuccess(self, test: PipeChecker): # type: ignore
|
|
377
|
+
super().addSuccess(test)
|
|
378
|
+
self.success.append(test)
|
|
379
|
+
|
|
380
|
+
def startTest(self, test):
|
|
381
|
+
if not self.custom_output:
|
|
382
|
+
super().startTest(test)
|
|
383
|
+
else:
|
|
384
|
+
super(unittest.TextTestResult, self).startTest(test)
|
|
385
|
+
|
|
386
|
+
def _write_status(self, test, status):
|
|
387
|
+
if self.custom_output:
|
|
388
|
+
self.stream.write(status.upper())
|
|
389
|
+
self.stream.write(" - ")
|
|
390
|
+
self.stream.write(str(test))
|
|
391
|
+
self.stream.write(" - ")
|
|
392
|
+
self.stream.writeln(test._write_performance())
|
|
393
|
+
|
|
394
|
+
else:
|
|
395
|
+
self.stream.writeln(status)
|
|
396
|
+
self.stream.flush()
|
|
397
|
+
self._newline = True
|
|
398
|
+
|
|
399
|
+
suite = unittest.TestSuite()
|
|
400
|
+
|
|
401
|
+
for _, request in enumerate(pipe_requests_to_check):
|
|
402
|
+
suite.addTest(
|
|
403
|
+
self._get_checker(
|
|
404
|
+
request,
|
|
405
|
+
checker_pipe_name,
|
|
406
|
+
token,
|
|
407
|
+
only_response_times,
|
|
408
|
+
ignore_order,
|
|
409
|
+
validate_processed_bytes,
|
|
410
|
+
relative_change,
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
result = PipeCheckerTextTestResult(
|
|
415
|
+
self.checker_stream_result_class(sys.stdout), # type: ignore
|
|
416
|
+
descriptions=True,
|
|
417
|
+
verbosity=2,
|
|
418
|
+
custom_output=custom_output,
|
|
419
|
+
)
|
|
420
|
+
result.failfast = failfast
|
|
421
|
+
suite.run(result)
|
|
422
|
+
|
|
423
|
+
metrics_summary: Optional[Dict[str, Any]] = None
|
|
424
|
+
metrics_timing: Dict[str, Tuple[float, float, float]] = {}
|
|
425
|
+
|
|
426
|
+
try:
|
|
427
|
+
current_response_times: List[float] = []
|
|
428
|
+
checker_response_times: List[float] = []
|
|
429
|
+
|
|
430
|
+
current_read_bytes: List[int] = []
|
|
431
|
+
checker_read_bytes: List[int] = []
|
|
432
|
+
if result.success:
|
|
433
|
+
for test in result.success:
|
|
434
|
+
current_response_times.append(test.current_response_time)
|
|
435
|
+
checker_response_times.append(test.checker_response_time)
|
|
436
|
+
|
|
437
|
+
current_read_bytes.append(test.current_read_bytes)
|
|
438
|
+
checker_read_bytes.append(test.checker_read_bytes)
|
|
439
|
+
|
|
440
|
+
for test, _ in result.failures: # type: ignore
|
|
441
|
+
current_response_times.append(test.current_response_time)
|
|
442
|
+
checker_response_times.append(test.checker_response_time)
|
|
443
|
+
|
|
444
|
+
current_read_bytes.append(test.current_read_bytes)
|
|
445
|
+
checker_read_bytes.append(test.checker_read_bytes)
|
|
446
|
+
else:
|
|
447
|
+
# if we do not have any successful execution, let's just return a table with dummy metrics https://gitlab.com/tinybird/analytics/-/issues/10875
|
|
448
|
+
current_response_times = [0]
|
|
449
|
+
checker_response_times = [0]
|
|
450
|
+
|
|
451
|
+
current_read_bytes = [0]
|
|
452
|
+
checker_read_bytes = [0]
|
|
453
|
+
|
|
454
|
+
metrics_summary = {
|
|
455
|
+
"run": result.testsRun,
|
|
456
|
+
"passed": len(result.success),
|
|
457
|
+
"failed": len(result.failures),
|
|
458
|
+
"percentage_passed": len(result.success) * 100 / result.testsRun,
|
|
459
|
+
"percentage_failed": len(result.failures) * 100 / result.testsRun,
|
|
460
|
+
}
|
|
461
|
+
metrics_timing = {
|
|
462
|
+
"min response time": (
|
|
463
|
+
min(current_response_times),
|
|
464
|
+
min(checker_response_times),
|
|
465
|
+
self._delta_percentage(min(checker_response_times), min(current_response_times)),
|
|
466
|
+
),
|
|
467
|
+
"max response time": (
|
|
468
|
+
max(current_response_times),
|
|
469
|
+
max(checker_response_times),
|
|
470
|
+
self._delta_percentage(max(checker_response_times), max(current_response_times)),
|
|
471
|
+
),
|
|
472
|
+
"mean response time": (
|
|
473
|
+
float(format(mean(current_response_times), ".6f")),
|
|
474
|
+
float(format(mean(checker_response_times), ".6f")),
|
|
475
|
+
self._delta_percentage(
|
|
476
|
+
float(format(mean(checker_response_times), ".6f")),
|
|
477
|
+
float(format(mean(current_response_times), ".6f")),
|
|
478
|
+
),
|
|
479
|
+
),
|
|
480
|
+
"median response time": (
|
|
481
|
+
median(current_response_times),
|
|
482
|
+
median(checker_response_times),
|
|
483
|
+
self._delta_percentage(median(checker_response_times), median(current_response_times)),
|
|
484
|
+
),
|
|
485
|
+
"p90 response time": (
|
|
486
|
+
sorted(current_response_times)[math.ceil(len(current_response_times) * 0.9) - 1],
|
|
487
|
+
sorted(checker_response_times)[math.ceil(len(checker_response_times) * 0.9) - 1],
|
|
488
|
+
self._delta_percentage(
|
|
489
|
+
sorted(checker_response_times)[math.ceil(len(checker_response_times) * 0.9) - 1],
|
|
490
|
+
sorted(current_response_times)[math.ceil(len(current_response_times) * 0.9) - 1],
|
|
491
|
+
),
|
|
492
|
+
),
|
|
493
|
+
"min read bytes": (
|
|
494
|
+
format_size(min(current_read_bytes)),
|
|
495
|
+
format_size(min(checker_read_bytes)),
|
|
496
|
+
self._delta_percentage(min(checker_read_bytes), min(current_read_bytes)),
|
|
497
|
+
),
|
|
498
|
+
"max read bytes": (
|
|
499
|
+
format_size(max(current_read_bytes)),
|
|
500
|
+
format_size(max(checker_read_bytes)),
|
|
501
|
+
self._delta_percentage(max(checker_read_bytes), max(current_read_bytes)),
|
|
502
|
+
),
|
|
503
|
+
"mean read bytes": (
|
|
504
|
+
format_size(mean(current_read_bytes)),
|
|
505
|
+
format_size(mean(checker_read_bytes)),
|
|
506
|
+
self._delta_percentage(mean(checker_read_bytes), mean(current_read_bytes)),
|
|
507
|
+
),
|
|
508
|
+
"median read bytes": (
|
|
509
|
+
format_size(median(current_read_bytes)),
|
|
510
|
+
format_size(median(checker_read_bytes)),
|
|
511
|
+
self._delta_percentage(median(checker_read_bytes), median(current_read_bytes)),
|
|
512
|
+
),
|
|
513
|
+
"p90 read bytes": (
|
|
514
|
+
format_size(sorted(current_read_bytes)[math.ceil(len(current_read_bytes) * 0.9) - 1]),
|
|
515
|
+
format_size(sorted(checker_read_bytes)[math.ceil(len(checker_read_bytes) * 0.9) - 1]),
|
|
516
|
+
self._delta_percentage(
|
|
517
|
+
sorted(checker_read_bytes)[math.ceil(len(checker_read_bytes) * 0.9) - 1],
|
|
518
|
+
sorted(current_read_bytes)[math.ceil(len(current_read_bytes) * 0.9) - 1],
|
|
519
|
+
),
|
|
520
|
+
),
|
|
521
|
+
}
|
|
522
|
+
except Exception as e:
|
|
523
|
+
if debug:
|
|
524
|
+
logging.exception(e)
|
|
525
|
+
|
|
526
|
+
failures = []
|
|
527
|
+
if not result.wasSuccessful():
|
|
528
|
+
for _test, err in result.failures:
|
|
529
|
+
try:
|
|
530
|
+
i = err.index("AssertionError") + len("AssertionError :")
|
|
531
|
+
failures.append({"name": str(_test), "error": err[i:]})
|
|
532
|
+
except Exception as e:
|
|
533
|
+
if debug:
|
|
534
|
+
logging.exception(e)
|
|
535
|
+
|
|
536
|
+
return PipeCheckerRunnerResponse(
|
|
537
|
+
pipe_name=checker_pipe_name,
|
|
538
|
+
test_type=getattr(self, "test_type", ""),
|
|
539
|
+
output=getattr(result.stream, "_buffer", ""),
|
|
540
|
+
metrics_summary=metrics_summary,
|
|
541
|
+
metrics_timing=metrics_timing,
|
|
542
|
+
failed=failures,
|
|
543
|
+
was_successfull=result.wasSuccessful(),
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def drop_token(url: str) -> str:
|
|
548
|
+
"""
|
|
549
|
+
drops token param from the url query string
|
|
550
|
+
>>> drop_token('https://api.tinybird.co/v0/pipes/aaa.json?token=abcd&a=1')
|
|
551
|
+
'https://api.tinybird.co/v0/pipes/aaa.json?a=1'
|
|
552
|
+
>>> drop_token('https://api.tinybird.co/v0/pipes/aaa.json?a=1')
|
|
553
|
+
'https://api.tinybird.co/v0/pipes/aaa.json?a=1'
|
|
554
|
+
"""
|
|
555
|
+
parsed = urlparse(url)
|
|
556
|
+
qs = parse_qs(parsed.query)
|
|
557
|
+
qs_simplify = {k: v[0] for k, v in qs.items()} # change several arguments to single one
|
|
558
|
+
if "token" in qs_simplify:
|
|
559
|
+
del qs_simplify["token"]
|
|
560
|
+
return f"{parsed.scheme}://{parsed.netloc}{parsed.path}?{urlencode(qs_simplify)}"
|