tinybird 0.0.1.dev43__py3-none-any.whl → 0.0.1.dev46__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinybird/client.py +17 -1
- tinybird/prompts.py +135 -15
- tinybird/tb/__cli__.py +2 -2
- tinybird/tb/cli.py +1 -1
- tinybird/tb/modules/build.py +28 -20
- tinybird/tb/modules/cli.py +18 -62
- tinybird/tb/modules/common.py +3 -2
- tinybird/tb/modules/copy.py +1 -1
- tinybird/tb/modules/create.py +134 -59
- tinybird/tb/modules/datafile/build.py +12 -221
- tinybird/tb/modules/datafile/common.py +1 -1
- tinybird/tb/modules/datafile/format_datasource.py +1 -1
- tinybird/tb/modules/datafile/format_pipe.py +4 -4
- tinybird/tb/modules/datafile/pipe_checker.py +3 -3
- tinybird/tb/modules/datasource.py +1 -1
- tinybird/tb/modules/deployment.py +1 -1
- tinybird/tb/modules/endpoint.py +89 -2
- tinybird/tb/modules/feedback_manager.py +5 -1
- tinybird/tb/modules/local_common.py +10 -7
- tinybird/tb/modules/materialization.py +146 -0
- tinybird/tb/modules/mock.py +56 -16
- tinybird/tb/modules/pipe.py +8 -326
- tinybird/tb/modules/project.py +10 -4
- tinybird/tb/modules/shell.py +3 -3
- tinybird/tb/modules/test.py +73 -38
- tinybird/tb/modules/tinyunit/tinyunit.py +1 -1
- tinybird/tb/modules/update.py +1 -1
- tinybird/tb/modules/workspace.py +2 -1
- {tinybird-0.0.1.dev43.dist-info → tinybird-0.0.1.dev46.dist-info}/METADATA +1 -1
- {tinybird-0.0.1.dev43.dist-info → tinybird-0.0.1.dev46.dist-info}/RECORD +33 -33
- tinybird/tb/modules/build_client.py +0 -199
- {tinybird-0.0.1.dev43.dist-info → tinybird-0.0.1.dev46.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev43.dist-info → tinybird-0.0.1.dev46.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev43.dist-info → tinybird-0.0.1.dev46.dist-info}/top_level.txt +0 -0
|
@@ -288,8 +288,8 @@ class PipeCheckerRunner:
|
|
|
288
288
|
AND extractURLParameter(assumeNotNull(url), 'debug') <> 'query'
|
|
289
289
|
AND error = 0
|
|
290
290
|
AND not mapContains(parameters, '__tb__semver')
|
|
291
|
-
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else
|
|
292
|
-
{
|
|
291
|
+
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else ""}
|
|
292
|
+
{extra_where_clause}
|
|
293
293
|
Limit 5000000 -- Enough to bring data while not processing all requests from highly used pipes
|
|
294
294
|
)
|
|
295
295
|
group by request_param_names, http_method
|
|
@@ -315,7 +315,7 @@ class PipeCheckerRunner:
|
|
|
315
315
|
AND extractURLParameter(assumeNotNull(url), 'debug') <> 'query'
|
|
316
316
|
AND error = 0
|
|
317
317
|
AND not mapContains(parameters, '__tb__semver')
|
|
318
|
-
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else
|
|
318
|
+
{" AND " + " AND ".join([f"mapContains(pipe_request_params, '{match}')" for match in matches]) if matches and len(matches) > 0 else ""}
|
|
319
319
|
{extra_where_clause}
|
|
320
320
|
LIMIT {limit}
|
|
321
321
|
)
|
tinybird/tb/modules/endpoint.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import json
|
|
7
7
|
import re
|
|
8
|
-
from typing import Any, Dict, List, Optional
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
9
|
from urllib.parse import urlencode
|
|
10
10
|
|
|
11
11
|
import click
|
|
@@ -21,7 +21,7 @@ from tinybird.tb.modules.exceptions import CLIPipeException
|
|
|
21
21
|
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
@cli.group(
|
|
24
|
+
@cli.group()
|
|
25
25
|
@click.pass_context
|
|
26
26
|
def endpoint(ctx):
|
|
27
27
|
"""Endpoint commands"""
|
|
@@ -185,3 +185,90 @@ def get_endpoint_token(tokens: List[Dict[str, Any]], pipe_name: str) -> Optional
|
|
|
185
185
|
break
|
|
186
186
|
|
|
187
187
|
return token
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@endpoint.command(name="stats")
|
|
191
|
+
@click.argument("pipes", nargs=-1)
|
|
192
|
+
@click.option(
|
|
193
|
+
"--format",
|
|
194
|
+
"format_",
|
|
195
|
+
type=click.Choice(["json"], case_sensitive=False),
|
|
196
|
+
default=None,
|
|
197
|
+
help="Force a type of the output. To parse the output, keep in mind to use `tb --no-version-warning endpoint stats` option.",
|
|
198
|
+
)
|
|
199
|
+
@click.pass_context
|
|
200
|
+
@coro
|
|
201
|
+
async def endpoint_stats(ctx: click.Context, pipes: Tuple[str, ...], format_: str):
|
|
202
|
+
"""
|
|
203
|
+
Print endpoint stats for the last 7 days
|
|
204
|
+
"""
|
|
205
|
+
client: TinyB = ctx.ensure_object(dict)["client"]
|
|
206
|
+
all_pipes = await client.pipes()
|
|
207
|
+
pipes_to_get_stats = []
|
|
208
|
+
pipes_ids: Dict = {}
|
|
209
|
+
|
|
210
|
+
if pipes:
|
|
211
|
+
# We filter by the pipes we want to look for
|
|
212
|
+
all_pipes = [pipe for pipe in all_pipes if pipe["name"] in pipes]
|
|
213
|
+
|
|
214
|
+
for pipe in all_pipes:
|
|
215
|
+
name_version = get_name_version(pipe["name"])
|
|
216
|
+
if name_version["name"] in pipe["name"]:
|
|
217
|
+
pipes_to_get_stats.append(f"'{pipe['id']}'")
|
|
218
|
+
pipes_ids[pipe["id"]] = name_version
|
|
219
|
+
|
|
220
|
+
if not pipes_to_get_stats:
|
|
221
|
+
if format_ == "json":
|
|
222
|
+
click.echo(json.dumps({"pipes": []}, indent=2))
|
|
223
|
+
else:
|
|
224
|
+
click.echo(FeedbackManager.info_no_pipes_stats())
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
sql = f"""
|
|
228
|
+
SELECT
|
|
229
|
+
pipe_id id,
|
|
230
|
+
sumIf(view_count, date > now() - interval 7 day) requests,
|
|
231
|
+
sumIf(error_count, date > now() - interval 7 day) errors,
|
|
232
|
+
avgMergeIf(avg_duration_state, date > now() - interval 7 day) latency
|
|
233
|
+
FROM tinybird.pipe_stats
|
|
234
|
+
WHERE pipe_id in ({",".join(pipes_to_get_stats)})
|
|
235
|
+
GROUP BY pipe_id
|
|
236
|
+
ORDER BY requests DESC
|
|
237
|
+
FORMAT JSON
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
res = await client.query(sql)
|
|
241
|
+
|
|
242
|
+
if res and "error" in res:
|
|
243
|
+
raise CLIPipeException(FeedbackManager.error_exception(error=str(res["error"])))
|
|
244
|
+
|
|
245
|
+
columns = ["name", "request count", "error count", "avg latency"]
|
|
246
|
+
table_human_readable: List[Tuple] = []
|
|
247
|
+
table_machine_readable: List[Dict] = []
|
|
248
|
+
if res and "data" in res:
|
|
249
|
+
for x in res["data"]:
|
|
250
|
+
tk = pipes_ids[x["id"]]
|
|
251
|
+
table_human_readable.append(
|
|
252
|
+
(
|
|
253
|
+
tk["name"],
|
|
254
|
+
x["requests"],
|
|
255
|
+
x["errors"],
|
|
256
|
+
x["latency"],
|
|
257
|
+
)
|
|
258
|
+
)
|
|
259
|
+
table_machine_readable.append(
|
|
260
|
+
{
|
|
261
|
+
"name": tk["name"],
|
|
262
|
+
"requests": x["requests"],
|
|
263
|
+
"errors": x["errors"],
|
|
264
|
+
"latency": x["latency"],
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
table_human_readable.sort(key=lambda x: (x[1], x[0]))
|
|
269
|
+
table_machine_readable.sort(key=lambda x: x["name"])
|
|
270
|
+
|
|
271
|
+
if format_ == "json":
|
|
272
|
+
click.echo(json.dumps({"pipes": table_machine_readable}, indent=2))
|
|
273
|
+
else:
|
|
274
|
+
echo_safe_humanfriendly_tables_format_smart_table(table_human_readable, column_names=columns)
|
|
@@ -432,6 +432,7 @@ class FeedbackManager:
|
|
|
432
432
|
error_updating_tag = error_message("Error updating tag: {error}")
|
|
433
433
|
error_tag_generic = error_message("There was an issue updating tags. {error}")
|
|
434
434
|
error_tag_not_found = error_message("Tag {tag_name} not found.")
|
|
435
|
+
error_build_failed = error_message("Build failed")
|
|
435
436
|
|
|
436
437
|
info_incl_relative_path = info_message("** Relative path {path} does not exist, skipping.")
|
|
437
438
|
info_ignoring_incl_file = info_message(
|
|
@@ -849,6 +850,7 @@ Ready? """
|
|
|
849
850
|
)
|
|
850
851
|
info_tag_list = info_message("** Tags:")
|
|
851
852
|
info_tag_resources = info_message("** Resources tagged by {tag_name}:")
|
|
853
|
+
info_build_failed = info_message("** Build failed")
|
|
852
854
|
warning_no_release = warning_message(
|
|
853
855
|
"** Warning: Workspace does not have Releases, run `tb init --git` to activate them."
|
|
854
856
|
)
|
|
@@ -964,7 +966,7 @@ Ready? """
|
|
|
964
966
|
)
|
|
965
967
|
success_datasource_alter = success_message("** The Data Source has been correctly updated.")
|
|
966
968
|
success_datasource_kafka_connected = success_message(
|
|
967
|
-
"** Data Source '{id}' created\n
|
|
969
|
+
"** Data Source '{id}' created\n** Kafka streaming connection configured successfully!"
|
|
968
970
|
)
|
|
969
971
|
success_datasource_shared = success_message(
|
|
970
972
|
"** The Data Source {datasource} has been correctly shared with {workspace}"
|
|
@@ -1040,6 +1042,8 @@ Ready? """
|
|
|
1040
1042
|
|
|
1041
1043
|
debug_running_file = print_message("** Running {file}", bcolors.CGREY)
|
|
1042
1044
|
|
|
1045
|
+
highlight_building_project = info_highlight_message("\n» Building project...")
|
|
1046
|
+
|
|
1043
1047
|
success = success_message("{message}")
|
|
1044
1048
|
info = info_message("{message}")
|
|
1045
1049
|
highlight = info_highlight_message("{message}")
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from typing import Optional
|
|
5
4
|
|
|
6
5
|
import requests
|
|
7
6
|
|
|
@@ -9,19 +8,19 @@ from tinybird.client import TinyB
|
|
|
9
8
|
from tinybird.tb.modules.config import CLIConfig
|
|
10
9
|
from tinybird.tb.modules.exceptions import CLIException
|
|
11
10
|
|
|
12
|
-
TB_IMAGE_NAME = "
|
|
11
|
+
TB_IMAGE_NAME = "registry.gitlab.com/tinybird/analytics/tinybird-local-jammy-3.11:beta"
|
|
13
12
|
TB_CONTAINER_NAME = "tinybird-local"
|
|
14
13
|
TB_LOCAL_PORT = int(os.getenv("TB_LOCAL_PORT", 80))
|
|
15
14
|
TB_LOCAL_HOST = f"http://localhost:{TB_LOCAL_PORT}"
|
|
16
15
|
|
|
17
16
|
|
|
18
|
-
async def get_tinybird_local_client(path:
|
|
17
|
+
async def get_tinybird_local_client(path: str, build: bool = False) -> TinyB:
|
|
19
18
|
"""Get a Tinybird client connected to the local environment."""
|
|
20
|
-
config = await get_tinybird_local_config(path)
|
|
19
|
+
config = await get_tinybird_local_config(path, build=build)
|
|
21
20
|
return config.get_client(host=TB_LOCAL_HOST)
|
|
22
21
|
|
|
23
22
|
|
|
24
|
-
async def get_tinybird_local_config(path:
|
|
23
|
+
async def get_tinybird_local_config(path: str, build: bool = False) -> CLIConfig:
|
|
25
24
|
"""Craft a client config with a workspace name based on the path of the project files
|
|
26
25
|
|
|
27
26
|
It uses the tokens from tinybird local
|
|
@@ -41,11 +40,15 @@ async def get_tinybird_local_config(path: Optional[str] = None) -> CLIConfig:
|
|
|
41
40
|
if path:
|
|
42
41
|
folder_hash = hashlib.sha256(path.encode()).hexdigest()
|
|
43
42
|
user_client = config.get_client(host=TB_LOCAL_HOST, token=user_token)
|
|
44
|
-
ws_name = f"Tinybird_Local_Build_{folder_hash}"
|
|
43
|
+
ws_name = f"Tinybird_Local_Build_{folder_hash}" if build else f"Tinybird_Local_{folder_hash}"
|
|
45
44
|
logging.debug(f"Workspace used for build: {ws_name}")
|
|
46
45
|
|
|
47
46
|
user_workspaces = requests.get(f"{TB_LOCAL_HOST}/v0/user/workspaces?token={user_token}").json()
|
|
48
|
-
local_workspaces =
|
|
47
|
+
local_workspaces = (
|
|
48
|
+
[ws for ws in user_workspaces["workspaces"] if ws["name"].startswith(ws_name)]
|
|
49
|
+
if user_workspaces.get("workspaces")
|
|
50
|
+
else []
|
|
51
|
+
)
|
|
49
52
|
local_workspaces = sorted(local_workspaces, key=lambda x: x["name"])
|
|
50
53
|
|
|
51
54
|
ws = None
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from tinybird.client import TinyB
|
|
7
|
+
from tinybird.tb.modules.cli import cli
|
|
8
|
+
from tinybird.tb.modules.common import (
|
|
9
|
+
coro,
|
|
10
|
+
create_tb_client,
|
|
11
|
+
echo_safe_humanfriendly_tables_format_smart_table,
|
|
12
|
+
wait_job,
|
|
13
|
+
)
|
|
14
|
+
from tinybird.tb.modules.datafile.common import PipeTypes, get_name_version
|
|
15
|
+
from tinybird.tb.modules.exceptions import CLIPipeException
|
|
16
|
+
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@cli.group()
|
|
20
|
+
@click.pass_context
|
|
21
|
+
def materialization(ctx):
|
|
22
|
+
"""Materialization commands"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@materialization.command(name="ls")
|
|
26
|
+
@click.option("--match", default=None, help="Retrieve any resourcing matching the pattern. eg --match _test")
|
|
27
|
+
@click.option(
|
|
28
|
+
"--format",
|
|
29
|
+
"format_",
|
|
30
|
+
type=click.Choice(["json"], case_sensitive=False),
|
|
31
|
+
default=None,
|
|
32
|
+
help="Force a type of the output",
|
|
33
|
+
)
|
|
34
|
+
@click.pass_context
|
|
35
|
+
@coro
|
|
36
|
+
async def materialization_ls(ctx: click.Context, match: str, format_: str):
|
|
37
|
+
"""List materializations"""
|
|
38
|
+
|
|
39
|
+
client: TinyB = ctx.ensure_object(dict)["client"]
|
|
40
|
+
pipes = await client.pipes(dependencies=True, node_attrs="name,materialized", attrs="name,updated_at,endpoint,type")
|
|
41
|
+
materializations = [p for p in pipes if p.get("type") == PipeTypes.MATERIALIZED]
|
|
42
|
+
materializations = sorted(materializations, key=lambda p: p["updated_at"])
|
|
43
|
+
datasources = await client.datasources()
|
|
44
|
+
columns = ["name", "updated at", "nodes", "target datasource"]
|
|
45
|
+
table_human_readable = []
|
|
46
|
+
table_machine_readable = []
|
|
47
|
+
pattern = re.compile(match) if match else None
|
|
48
|
+
for t in materializations:
|
|
49
|
+
tk = get_name_version(t["name"])
|
|
50
|
+
if pattern and not pattern.search(tk["name"]):
|
|
51
|
+
continue
|
|
52
|
+
target_datasource_id = next((n["materialized"] for n in t["nodes"] if n.get("materialized")), None)
|
|
53
|
+
target_datasource = next((d for d in datasources if d["id"] == target_datasource_id), None)
|
|
54
|
+
target_datasource_name = target_datasource.get("name", "") if target_datasource else ""
|
|
55
|
+
table_human_readable.append((tk["name"], t["updated_at"][:-7], len(t["nodes"]), target_datasource_name))
|
|
56
|
+
table_machine_readable.append(
|
|
57
|
+
{
|
|
58
|
+
"name": tk["name"],
|
|
59
|
+
"updated at": t["updated_at"][:-7],
|
|
60
|
+
"nodes": len(t["nodes"]),
|
|
61
|
+
"target datasource": target_datasource_name,
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if not format_:
|
|
66
|
+
click.echo(FeedbackManager.info_pipes())
|
|
67
|
+
echo_safe_humanfriendly_tables_format_smart_table(table_human_readable, column_names=columns)
|
|
68
|
+
click.echo("\n")
|
|
69
|
+
elif format_ == "json":
|
|
70
|
+
click.echo(json.dumps({"pipes": table_machine_readable}, indent=2))
|
|
71
|
+
else:
|
|
72
|
+
raise CLIPipeException(FeedbackManager.error_pipe_ls_type())
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@materialization.command(name="populate")
|
|
76
|
+
@click.argument("pipe_name")
|
|
77
|
+
@click.option("--node", type=str, help="Name of the materialized node.", default=None, required=False)
|
|
78
|
+
@click.option(
|
|
79
|
+
"--sql-condition",
|
|
80
|
+
type=str,
|
|
81
|
+
default=None,
|
|
82
|
+
help="Populate with a SQL condition to be applied to the trigger Data Source of the Materialized View. For instance, `--sql-condition='date == toYYYYMM(now())'` it'll populate taking all the rows from the trigger Data Source which `date` is the current month. Use it together with --populate. --sql-condition is not taken into account if the --subset param is present. Including in the ``sql_condition`` any column present in the Data Source ``engine_sorting_key`` will make the populate job process less data.",
|
|
83
|
+
)
|
|
84
|
+
@click.option(
|
|
85
|
+
"--truncate", is_flag=True, default=False, help="Truncates the materialized Data Source before populating it."
|
|
86
|
+
)
|
|
87
|
+
@click.option(
|
|
88
|
+
"--unlink-on-populate-error",
|
|
89
|
+
is_flag=True,
|
|
90
|
+
default=False,
|
|
91
|
+
help="If the populate job fails the Materialized View is unlinked and new data won't be ingested in the Materialized View. First time a populate job fails, the Materialized View is always unlinked.",
|
|
92
|
+
)
|
|
93
|
+
@click.option(
|
|
94
|
+
"--wait",
|
|
95
|
+
is_flag=True,
|
|
96
|
+
default=False,
|
|
97
|
+
help="Waits for populate jobs to finish, showing a progress bar. Disabled by default.",
|
|
98
|
+
)
|
|
99
|
+
@click.pass_context
|
|
100
|
+
@coro
|
|
101
|
+
async def pipe_populate(
|
|
102
|
+
ctx: click.Context,
|
|
103
|
+
pipe_name: str,
|
|
104
|
+
node: str,
|
|
105
|
+
sql_condition: str,
|
|
106
|
+
truncate: bool,
|
|
107
|
+
unlink_on_populate_error: bool,
|
|
108
|
+
wait: bool,
|
|
109
|
+
):
|
|
110
|
+
"""Populate the result of a Materialized Node into the target Materialized View"""
|
|
111
|
+
cl = create_tb_client(ctx)
|
|
112
|
+
|
|
113
|
+
pipe = await cl.pipe(pipe_name)
|
|
114
|
+
|
|
115
|
+
if pipe["type"] != PipeTypes.MATERIALIZED:
|
|
116
|
+
raise CLIPipeException(FeedbackManager.error_pipe_not_materialized(pipe=pipe_name))
|
|
117
|
+
|
|
118
|
+
if not node:
|
|
119
|
+
materialized_ids = [pipe_node["id"] for pipe_node in pipe["nodes"] if pipe_node.get("materialized") is not None]
|
|
120
|
+
|
|
121
|
+
if not materialized_ids:
|
|
122
|
+
raise CLIPipeException(FeedbackManager.error_populate_no_materialized_in_pipe(pipe=pipe_name))
|
|
123
|
+
|
|
124
|
+
elif len(materialized_ids) > 1:
|
|
125
|
+
raise CLIPipeException(FeedbackManager.error_populate_several_materialized_in_pipe(pipe=pipe_name))
|
|
126
|
+
|
|
127
|
+
node = materialized_ids[0]
|
|
128
|
+
|
|
129
|
+
response = await cl.populate_node(
|
|
130
|
+
pipe_name,
|
|
131
|
+
node,
|
|
132
|
+
populate_condition=sql_condition,
|
|
133
|
+
truncate=truncate,
|
|
134
|
+
unlink_on_populate_error=unlink_on_populate_error,
|
|
135
|
+
)
|
|
136
|
+
if "job" not in response:
|
|
137
|
+
raise CLIPipeException(response)
|
|
138
|
+
|
|
139
|
+
job_id = response["job"]["id"]
|
|
140
|
+
job_url = response["job"]["job_url"]
|
|
141
|
+
if sql_condition:
|
|
142
|
+
click.echo(FeedbackManager.info_populate_condition_job_url(url=job_url, populate_condition=sql_condition))
|
|
143
|
+
else:
|
|
144
|
+
click.echo(FeedbackManager.info_populate_job_url(url=job_url))
|
|
145
|
+
if wait:
|
|
146
|
+
await wait_job(cl, job_id, job_url, "Populating")
|
tinybird/tb/modules/mock.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import click
|
|
6
7
|
|
|
@@ -13,6 +14,7 @@ from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
|
13
14
|
from tinybird.tb.modules.llm import LLM
|
|
14
15
|
from tinybird.tb.modules.llm_utils import extract_xml
|
|
15
16
|
from tinybird.tb.modules.local_common import get_tinybird_local_client
|
|
17
|
+
from tinybird.tb.modules.project import Project
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
@cli.command()
|
|
@@ -21,24 +23,27 @@ from tinybird.tb.modules.local_common import get_tinybird_local_client
|
|
|
21
23
|
@click.option(
|
|
22
24
|
"--prompt",
|
|
23
25
|
type=str,
|
|
24
|
-
default="
|
|
26
|
+
default="",
|
|
25
27
|
help="Extra context to use for data generation",
|
|
26
28
|
)
|
|
27
|
-
@click.option("--
|
|
29
|
+
@click.option("--skip", is_flag=True, default=False, help="Skip following up on the generated data")
|
|
30
|
+
@click.pass_context
|
|
28
31
|
@coro
|
|
29
|
-
async def mock(datasource: str, rows: int, prompt: str,
|
|
30
|
-
"""
|
|
32
|
+
async def mock(ctx: click.Context, datasource: str, rows: int, prompt: str, skip: bool) -> None:
|
|
33
|
+
"""Generate sample data for a datasource.
|
|
31
34
|
|
|
32
35
|
Args:
|
|
33
36
|
datasource: Path to the datasource file to load sample data into
|
|
34
37
|
rows: Number of events to send
|
|
35
38
|
prompt: Extra context to use for data generation
|
|
36
|
-
|
|
39
|
+
skip: Skip following up on the generated data
|
|
37
40
|
"""
|
|
38
41
|
|
|
39
42
|
try:
|
|
43
|
+
project: Project = ctx.ensure_object(dict)["project"]
|
|
40
44
|
datasource_path = Path(datasource)
|
|
41
45
|
datasource_name = datasource
|
|
46
|
+
folder = project.folder
|
|
42
47
|
click.echo(FeedbackManager.highlight(message=f"\n» Creating fixture for {datasource_name}..."))
|
|
43
48
|
if datasource_path.suffix == ".datasource":
|
|
44
49
|
datasource_name = datasource_path.stem
|
|
@@ -46,6 +51,9 @@ async def mock(datasource: str, rows: int, prompt: str, folder: str) -> None:
|
|
|
46
51
|
datasource_path = Path("datasources", f"{datasource}.datasource")
|
|
47
52
|
datasource_path = Path(folder) / datasource_path
|
|
48
53
|
|
|
54
|
+
if not datasource_path.exists():
|
|
55
|
+
raise CLIException(f"Datasource '{datasource_path.stem}' not found")
|
|
56
|
+
|
|
49
57
|
prompt_path = Path(folder) / "fixtures" / f"{datasource_name}.prompt"
|
|
50
58
|
if not prompt or prompt == "Use the datasource schema to generate sample data":
|
|
51
59
|
# load the prompt from the fixture.prompt file if it exists
|
|
@@ -68,17 +76,49 @@ async def mock(datasource: str, rows: int, prompt: str, folder: str) -> None:
|
|
|
68
76
|
click.echo(FeedbackManager.error(message="This action requires authentication. Run 'tb login' first."))
|
|
69
77
|
return
|
|
70
78
|
llm = LLM(user_token=user_token, host=user_client.host)
|
|
71
|
-
tb_client = await get_tinybird_local_client(
|
|
79
|
+
tb_client = await get_tinybird_local_client(folder)
|
|
72
80
|
prompt = f"<datasource_schema>{datasource_content}</datasource_schema>\n<user_input>{prompt}</user_input>"
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
iterations = 0
|
|
82
|
+
history = ""
|
|
83
|
+
fixture_path: Optional[Path] = None
|
|
84
|
+
sql = ""
|
|
85
|
+
while iterations < 10:
|
|
86
|
+
feedback = ""
|
|
87
|
+
if iterations > 0:
|
|
88
|
+
feedback = click.prompt("\nFollow-up instructions or continue", default="continue")
|
|
89
|
+
if iterations > 0 and (not feedback or feedback in ("continue", "ok", "exit", "quit", "q")):
|
|
90
|
+
break
|
|
91
|
+
else:
|
|
92
|
+
if iterations > 0:
|
|
93
|
+
if fixture_path:
|
|
94
|
+
fixture_path.unlink()
|
|
95
|
+
fixture_path = None
|
|
96
|
+
click.echo(FeedbackManager.highlight(message=f"\n» Creating fixture for {datasource_name}..."))
|
|
97
|
+
|
|
98
|
+
response = llm.ask(system_prompt=mock_prompt(rows, feedback, history), prompt=prompt)
|
|
99
|
+
sql = extract_xml(response, "sql")
|
|
100
|
+
result = await tb_client.query(f"{sql} FORMAT JSON")
|
|
101
|
+
data = result.get("data", [])[:rows]
|
|
102
|
+
fixture_name = build_fixture_name(str(datasource_path), datasource_name, datasource_content)
|
|
103
|
+
fixture_path = persist_fixture(fixture_name, data, folder)
|
|
104
|
+
click.echo(FeedbackManager.info(message=f"✓ /fixtures/{fixture_name}.ndjson created"))
|
|
105
|
+
|
|
106
|
+
if os.environ.get("TB_DEBUG", "") != "":
|
|
107
|
+
logging.debug(sql)
|
|
108
|
+
|
|
109
|
+
history = (
|
|
110
|
+
history
|
|
111
|
+
+ f"""
|
|
112
|
+
<result_iteration_{iterations}>
|
|
113
|
+
{response}
|
|
114
|
+
</result_iteration_{iterations}>
|
|
115
|
+
"""
|
|
116
|
+
)
|
|
117
|
+
if skip:
|
|
118
|
+
break
|
|
119
|
+
iterations += 1
|
|
120
|
+
|
|
121
|
+
click.echo(FeedbackManager.success(message=f"✓ Sample data for {datasource_name} created with {rows} rows"))
|
|
82
122
|
|
|
83
123
|
except Exception as e:
|
|
84
|
-
|
|
124
|
+
click.echo(FeedbackManager.error_exception(error=f"Error: {e}"))
|