tinybird 0.0.1.dev1__py3-none-any.whl → 0.0.1.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinybird/__cli__.py +2 -2
- tinybird/client.py +8 -5
- tinybird/config.py +1 -1
- tinybird/datafile.py +540 -6
- tinybird/feedback_manager.py +6 -0
- tinybird/tb_cli.py +2 -0
- tinybird/tb_cli_modules/build.py +221 -0
- tinybird/tb_cli_modules/cli.py +67 -0
- tinybird/tb_cli_modules/common.py +2 -3
- tinybird/tb_cli_modules/create.py +226 -0
- tinybird/tb_cli_modules/datasource.py +10 -2
- tinybird/tb_cli_modules/prompts.py +133 -0
- {tinybird-0.0.1.dev1.dist-info → tinybird-0.0.1.dev2.dist-info}/METADATA +4 -1
- {tinybird-0.0.1.dev1.dist-info → tinybird-0.0.1.dev2.dist-info}/RECORD +17 -14
- {tinybird-0.0.1.dev1.dist-info → tinybird-0.0.1.dev2.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev1.dist-info → tinybird-0.0.1.dev2.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev1.dist-info → tinybird-0.0.1.dev2.dist-info}/top_level.txt +0 -0
tinybird/feedback_manager.py
CHANGED
|
@@ -671,6 +671,7 @@ Ready? """
|
|
|
671
671
|
info_building_dependencies = info_message("** Building dependencies")
|
|
672
672
|
info_processing_new_resource = info_message("** Running '{name}' {version}")
|
|
673
673
|
info_dry_processing_new_resource = info_message("** [DRY RUN] Running '{name}' {version}")
|
|
674
|
+
info_building_resource = info_message("** Building {name}")
|
|
674
675
|
info_processing_resource = info_message(
|
|
675
676
|
"** Running '{name}' => v{version} (remote latest version: v{latest_version})"
|
|
676
677
|
)
|
|
@@ -1020,3 +1021,8 @@ Ready? """
|
|
|
1020
1021
|
success_tag_removed = success_message("** Tag '{tag_name}' removed!")
|
|
1021
1022
|
|
|
1022
1023
|
debug_running_file = print_message("** Running {file}", bcolors.CGREY)
|
|
1024
|
+
|
|
1025
|
+
success = success_message("{message}")
|
|
1026
|
+
info = info_message("{message}")
|
|
1027
|
+
highlight = info_highlight_message("{message}")
|
|
1028
|
+
error = error_message("{message}")
|
tinybird/tb_cli.py
CHANGED
|
@@ -6,9 +6,11 @@ if sys.platform == "win32":
|
|
|
6
6
|
|
|
7
7
|
import tinybird.tb_cli_modules.auth
|
|
8
8
|
import tinybird.tb_cli_modules.branch
|
|
9
|
+
import tinybird.tb_cli_modules.build
|
|
9
10
|
import tinybird.tb_cli_modules.cli
|
|
10
11
|
import tinybird.tb_cli_modules.common
|
|
11
12
|
import tinybird.tb_cli_modules.connection
|
|
13
|
+
import tinybird.tb_cli_modules.create
|
|
12
14
|
import tinybird.tb_cli_modules.datasource
|
|
13
15
|
import tinybird.tb_cli_modules.fmt
|
|
14
16
|
import tinybird.tb_cli_modules.job
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Awaitable, Callable, Dict, List, Union
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
from watchdog.events import FileSystemEventHandler
|
|
10
|
+
from watchdog.observers import Observer
|
|
11
|
+
|
|
12
|
+
import tinybird.context as context
|
|
13
|
+
from tinybird.client import TinyB
|
|
14
|
+
from tinybird.config import FeatureFlags
|
|
15
|
+
from tinybird.datafile import (
|
|
16
|
+
ParseException,
|
|
17
|
+
folder_build,
|
|
18
|
+
get_project_filenames,
|
|
19
|
+
has_internal_datafiles,
|
|
20
|
+
parse_datasource,
|
|
21
|
+
parse_pipe,
|
|
22
|
+
)
|
|
23
|
+
from tinybird.feedback_manager import FeedbackManager, info_highlight_message, success_message
|
|
24
|
+
from tinybird.tb_cli_modules.cli import cli
|
|
25
|
+
from tinybird.tb_cli_modules.common import (
|
|
26
|
+
coro,
|
|
27
|
+
echo_safe_humanfriendly_tables_format_smart_table,
|
|
28
|
+
)
|
|
29
|
+
from tinybird.tb_cli_modules.create import generate_sample_data_from_columns
|
|
30
|
+
from tinybird.tb_cli_modules.local import (
|
|
31
|
+
get_docker_client,
|
|
32
|
+
get_tinybird_local_client,
|
|
33
|
+
remove_tinybird_local,
|
|
34
|
+
start_tinybird_local,
|
|
35
|
+
stop_tinybird_local,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class FileChangeHandler(FileSystemEventHandler):
|
|
40
|
+
def __init__(self, filenames: List[str], process: Callable[[List[str]], None]):
|
|
41
|
+
self.filenames = filenames
|
|
42
|
+
self.process = process
|
|
43
|
+
|
|
44
|
+
def on_modified(self, event: Any) -> None:
|
|
45
|
+
if not event.is_directory and any(event.src_path.endswith(ext) for ext in [".datasource", ".pipe"]):
|
|
46
|
+
filename = event.src_path.split("/")[-1]
|
|
47
|
+
click.echo(info_highlight_message(f"\n⟲ Changes detected in {filename}\n")())
|
|
48
|
+
try:
|
|
49
|
+
self.process([event.src_path])
|
|
50
|
+
except Exception as e:
|
|
51
|
+
click.echo(FeedbackManager.error_exception(error=e))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def watch_files(
|
|
55
|
+
filenames: List[str],
|
|
56
|
+
process: Union[Callable[[List[str]], None], Callable[[List[str]], Awaitable[None]]],
|
|
57
|
+
) -> None:
|
|
58
|
+
# Handle both sync and async process functions
|
|
59
|
+
async def process_wrapper(files: List[str]) -> None:
|
|
60
|
+
click.echo("⚡ Rebuilding...")
|
|
61
|
+
time_start = time.time()
|
|
62
|
+
if asyncio.iscoroutinefunction(process):
|
|
63
|
+
await process(files, watch=True)
|
|
64
|
+
else:
|
|
65
|
+
process(files, watch=True)
|
|
66
|
+
time_end = time.time()
|
|
67
|
+
elapsed_time = time_end - time_start
|
|
68
|
+
click.echo(success_message(f"\n✓ Rebuild completed in {elapsed_time:.1f}s")())
|
|
69
|
+
|
|
70
|
+
event_handler = FileChangeHandler(filenames, lambda f: asyncio.run(process_wrapper(f)))
|
|
71
|
+
observer = Observer()
|
|
72
|
+
|
|
73
|
+
# Watch each provided path
|
|
74
|
+
for filename in filenames:
|
|
75
|
+
path = filename if os.path.isdir(filename) else os.path.dirname(filename)
|
|
76
|
+
observer.schedule(event_handler, path=path, recursive=True)
|
|
77
|
+
|
|
78
|
+
observer.start()
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
while True:
|
|
82
|
+
time.sleep(1)
|
|
83
|
+
except KeyboardInterrupt:
|
|
84
|
+
observer.stop()
|
|
85
|
+
|
|
86
|
+
observer.join()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@cli.command()
|
|
90
|
+
@click.option(
|
|
91
|
+
"--folder",
|
|
92
|
+
default=".",
|
|
93
|
+
help="Folder from where to execute the command. By default the current folder",
|
|
94
|
+
hidden=True,
|
|
95
|
+
type=click.types.STRING,
|
|
96
|
+
)
|
|
97
|
+
@click.option(
|
|
98
|
+
"--watch",
|
|
99
|
+
is_flag=True,
|
|
100
|
+
help="Watch for changes in the files and re-check them.",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--restart",
|
|
104
|
+
is_flag=True,
|
|
105
|
+
help="Restart the Tinybird development environment before building the first time.",
|
|
106
|
+
)
|
|
107
|
+
@coro
|
|
108
|
+
async def build(
|
|
109
|
+
folder: str,
|
|
110
|
+
watch: bool,
|
|
111
|
+
restart: bool,
|
|
112
|
+
) -> None:
|
|
113
|
+
"""
|
|
114
|
+
Watch for changes in the files and re-check them.
|
|
115
|
+
"""
|
|
116
|
+
docker_client = get_docker_client()
|
|
117
|
+
if restart:
|
|
118
|
+
remove_tinybird_local(docker_client)
|
|
119
|
+
start_tinybird_local(docker_client)
|
|
120
|
+
ignore_sql_errors = FeatureFlags.ignore_sql_errors()
|
|
121
|
+
context.disable_template_security_validation.set(True)
|
|
122
|
+
is_internal = has_internal_datafiles(folder)
|
|
123
|
+
tb_client = get_tinybird_local_client()
|
|
124
|
+
workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces_and_branches()).get("workspaces", [])
|
|
125
|
+
datasources: List[Dict[str, Any]] = await tb_client.datasources()
|
|
126
|
+
pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
|
|
127
|
+
|
|
128
|
+
def check_filenames(filenames: List[str]):
|
|
129
|
+
parser_matrix = {".pipe": parse_pipe, ".datasource": parse_datasource}
|
|
130
|
+
incl_suffix = ".incl"
|
|
131
|
+
|
|
132
|
+
for filename in filenames:
|
|
133
|
+
if os.path.isdir(filename):
|
|
134
|
+
process(filenames=get_project_filenames(filename))
|
|
135
|
+
|
|
136
|
+
file_suffix = Path(filename).suffix
|
|
137
|
+
if file_suffix == incl_suffix:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
parser = parser_matrix.get(file_suffix)
|
|
141
|
+
if not parser:
|
|
142
|
+
raise ParseException(FeedbackManager.error_unsupported_datafile(extension=file_suffix))
|
|
143
|
+
|
|
144
|
+
parser(filename)
|
|
145
|
+
|
|
146
|
+
async def process(filenames: List[str], watch: bool = False, only_pipes: bool = False):
|
|
147
|
+
check_filenames(filenames=filenames)
|
|
148
|
+
await folder_build(
|
|
149
|
+
tb_client,
|
|
150
|
+
workspaces,
|
|
151
|
+
datasources,
|
|
152
|
+
pipes,
|
|
153
|
+
filenames,
|
|
154
|
+
ignore_sql_errors=ignore_sql_errors,
|
|
155
|
+
is_internal=is_internal,
|
|
156
|
+
only_pipes=only_pipes,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
for filename in filenames:
|
|
160
|
+
if filename.endswith(".datasource"):
|
|
161
|
+
ds_path = Path(filename)
|
|
162
|
+
ds_name = ds_path.stem
|
|
163
|
+
datasource_content = ds_path.read_text()
|
|
164
|
+
sample_data = await generate_sample_data_from_columns(tb_client, datasource_content)
|
|
165
|
+
ndjson_data = "\n".join([json.dumps(row) for row in sample_data])
|
|
166
|
+
await tb_client.datasource_events(ds_name, ndjson_data)
|
|
167
|
+
|
|
168
|
+
if watch:
|
|
169
|
+
filename = filenames[0]
|
|
170
|
+
if filename.endswith(".pipe"):
|
|
171
|
+
await build_and_print_pipe(tb_client, filename)
|
|
172
|
+
|
|
173
|
+
filenames = get_project_filenames(folder)
|
|
174
|
+
|
|
175
|
+
async def build_once(filenames: List[str]):
|
|
176
|
+
try:
|
|
177
|
+
click.echo("⚡ Building project...")
|
|
178
|
+
time_start = time.time()
|
|
179
|
+
await process(filenames=filenames, watch=False)
|
|
180
|
+
time_end = time.time()
|
|
181
|
+
elapsed_time = time_end - time_start
|
|
182
|
+
click.echo(FeedbackManager.success(message=f"\n✓ Build completed in {elapsed_time:.1f}s\n"))
|
|
183
|
+
except Exception as e:
|
|
184
|
+
click.echo(FeedbackManager.error(message=str(e)))
|
|
185
|
+
|
|
186
|
+
await build_once(filenames)
|
|
187
|
+
|
|
188
|
+
if watch:
|
|
189
|
+
click.echo(FeedbackManager.highlight(message="◎ Watching for changes..."))
|
|
190
|
+
watch_files(filenames, process)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
async def build_and_print_pipe(tb_client: TinyB, filename: str):
|
|
194
|
+
pipe_name = os.path.basename(filename.split(".")[0])
|
|
195
|
+
res = await tb_client.query(f"SELECT * FROM {pipe_name} LIMIT 5 FORMAT JSON", pipeline=pipe_name)
|
|
196
|
+
data = []
|
|
197
|
+
for d in res["data"]:
|
|
198
|
+
data.append(d.values())
|
|
199
|
+
meta = res["meta"]
|
|
200
|
+
column_names = [col["name"] for col in meta]
|
|
201
|
+
echo_safe_humanfriendly_tables_format_smart_table(data, column_names=column_names)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@cli.command()
|
|
205
|
+
@coro
|
|
206
|
+
async def stop() -> None:
|
|
207
|
+
"""Stop Tinybird development environment"""
|
|
208
|
+
click.echo(FeedbackManager.info(message="Shutting down Tinybird development environment..."))
|
|
209
|
+
docker_client = get_docker_client()
|
|
210
|
+
stop_tinybird_local(docker_client)
|
|
211
|
+
click.echo(FeedbackManager.success(message="Tinybird development environment stopped"))
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@cli.command()
|
|
215
|
+
@coro
|
|
216
|
+
async def start() -> None:
|
|
217
|
+
"""Start Tinybird development environment"""
|
|
218
|
+
click.echo(FeedbackManager.info(message="Starting Tinybird development environment..."))
|
|
219
|
+
docker_client = get_docker_client()
|
|
220
|
+
start_tinybird_local(docker_client)
|
|
221
|
+
click.echo(FeedbackManager.success(message="Tinybird development environment started"))
|
tinybird/tb_cli_modules/cli.py
CHANGED
|
@@ -10,12 +10,14 @@ import pprint
|
|
|
10
10
|
import re
|
|
11
11
|
import shutil
|
|
12
12
|
import sys
|
|
13
|
+
from datetime import datetime
|
|
13
14
|
from os import environ, getcwd
|
|
14
15
|
from pathlib import Path
|
|
15
16
|
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
|
16
17
|
|
|
17
18
|
import click
|
|
18
19
|
import humanfriendly
|
|
20
|
+
import requests
|
|
19
21
|
from click import Context
|
|
20
22
|
from packaging import version
|
|
21
23
|
|
|
@@ -67,6 +69,7 @@ from tinybird.tb_cli_modules.common import (
|
|
|
67
69
|
try_update_config_with_remote,
|
|
68
70
|
)
|
|
69
71
|
from tinybird.tb_cli_modules.config import CLIConfig
|
|
72
|
+
from tinybird.tb_cli_modules.prompts import sample_data_prompt
|
|
70
73
|
from tinybird.tb_cli_modules.telemetry import add_telemetry_event
|
|
71
74
|
|
|
72
75
|
__old_click_echo = click.echo
|
|
@@ -1569,3 +1572,67 @@ async def deploy(
|
|
|
1569
1572
|
raise
|
|
1570
1573
|
except Exception as e:
|
|
1571
1574
|
raise CLIException(str(e))
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
@cli.command()
|
|
1578
|
+
@click.argument("datasource_file", type=click.Path(exists=True))
|
|
1579
|
+
@click.option("--count", type=int, default=10, help="Number of events to send")
|
|
1580
|
+
@click.option("--model", type=str, default=None, help="Model to use for data generation")
|
|
1581
|
+
@click.option("--print-data", is_flag=True, default=False, help="Print the data being sent")
|
|
1582
|
+
@click.pass_context
|
|
1583
|
+
def load_sample_data(ctx: Context, datasource_file: str, count: int, model: Optional[str], print_data: bool) -> None:
|
|
1584
|
+
"""Load sample data into a datasource.
|
|
1585
|
+
|
|
1586
|
+
Args:
|
|
1587
|
+
ctx: Click context object
|
|
1588
|
+
datasource_file: Path to the datasource file to load sample data into
|
|
1589
|
+
"""
|
|
1590
|
+
import llm
|
|
1591
|
+
|
|
1592
|
+
try:
|
|
1593
|
+
# TODO(eclbg): allow passing a datasource name instead of a file
|
|
1594
|
+
datasource_path = Path(datasource_file)
|
|
1595
|
+
if datasource_path.suffix != ".datasource":
|
|
1596
|
+
raise CLIException(FeedbackManager.error_file_extension(filename=datasource_file))
|
|
1597
|
+
|
|
1598
|
+
datasource_name = datasource_path.stem
|
|
1599
|
+
|
|
1600
|
+
response = requests.get("http://localhost:80/tokens")
|
|
1601
|
+
token = response.json()["workspace_admin_token"]
|
|
1602
|
+
|
|
1603
|
+
with open(datasource_file) as f:
|
|
1604
|
+
content = f.read()
|
|
1605
|
+
schema_start = next(i for i, line in enumerate(content.splitlines()) if line.strip().startswith("SCHEMA >"))
|
|
1606
|
+
schema_end = next(
|
|
1607
|
+
i
|
|
1608
|
+
for i, line in enumerate(content.splitlines()[schema_start + 1 :], schema_start + 1)
|
|
1609
|
+
if not line.strip()
|
|
1610
|
+
)
|
|
1611
|
+
schema = "\n".join(content.splitlines()[schema_start:schema_end])
|
|
1612
|
+
llm_model = llm.get_model(model)
|
|
1613
|
+
click.echo(f"Using model: {model}")
|
|
1614
|
+
prompt = sample_data_prompt.format(current_datetime=datetime.now().isoformat(), row_count=count)
|
|
1615
|
+
# prompt = sample_data_with_errors_prompt.format(current_datetime=datetime.now().isoformat()) # This prompt will generate data with errors
|
|
1616
|
+
full_prompt = prompt + "\n\n" + schema
|
|
1617
|
+
sent_events = 0
|
|
1618
|
+
while sent_events < count:
|
|
1619
|
+
click.echo(f"Generating data for '{datasource_name}'")
|
|
1620
|
+
data = llm_model.prompt(full_prompt)
|
|
1621
|
+
|
|
1622
|
+
click.echo(f"Sending data to '{datasource_name}'")
|
|
1623
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
1624
|
+
if print_data:
|
|
1625
|
+
click.echo(f"Data: {data}")
|
|
1626
|
+
response = requests.post(
|
|
1627
|
+
f"http://localhost:80/v0/events?name={datasource_name}",
|
|
1628
|
+
data=data,
|
|
1629
|
+
headers=headers,
|
|
1630
|
+
)
|
|
1631
|
+
if response.status_code not in (200, 202):
|
|
1632
|
+
raise CLIException(f"Failed to send data: {response.text}")
|
|
1633
|
+
click.echo(f"Response: {response.text}")
|
|
1634
|
+
sent_events += 10
|
|
1635
|
+
click.echo(f"Sent 10 events to datasource '{datasource_name}'")
|
|
1636
|
+
|
|
1637
|
+
except Exception as e:
|
|
1638
|
+
raise CLIException(FeedbackManager.error_exception(error=str(e)))
|
|
@@ -153,7 +153,7 @@ def generate_datafile(
|
|
|
153
153
|
if not f.exists() or force:
|
|
154
154
|
with open(f"{f}", "w") as ds_file:
|
|
155
155
|
ds_file.write(datafile)
|
|
156
|
-
click.echo(FeedbackManager.
|
|
156
|
+
click.echo(FeedbackManager.success(message=f"** Generated {f}"))
|
|
157
157
|
|
|
158
158
|
if data and (base / "fixtures").exists():
|
|
159
159
|
# Generating a fixture for Parquet files is not so trivial, since Parquet format
|
|
@@ -166,7 +166,6 @@ def generate_datafile(
|
|
|
166
166
|
newline = b"\n" # TODO: guess
|
|
167
167
|
with open(f, "wb") as fixture_file:
|
|
168
168
|
fixture_file.write(data[: data.rfind(newline)])
|
|
169
|
-
click.echo(FeedbackManager.success_generated_fixture(fixture=f))
|
|
170
169
|
else:
|
|
171
170
|
click.echo(FeedbackManager.error_file_already_exists(file=f))
|
|
172
171
|
|
|
@@ -1043,6 +1042,7 @@ def get_format_from_filename_or_url(filename_or_url: str) -> str:
|
|
|
1043
1042
|
|
|
1044
1043
|
async def push_data(
|
|
1045
1044
|
ctx: Context,
|
|
1045
|
+
client: TinyB,
|
|
1046
1046
|
datasource_name: str,
|
|
1047
1047
|
url,
|
|
1048
1048
|
connector: Optional[str],
|
|
@@ -1055,7 +1055,6 @@ async def push_data(
|
|
|
1055
1055
|
):
|
|
1056
1056
|
if url and type(url) is tuple:
|
|
1057
1057
|
url = url[0]
|
|
1058
|
-
client: TinyB = ctx.obj["client"]
|
|
1059
1058
|
|
|
1060
1059
|
if connector and sql:
|
|
1061
1060
|
load_connector_config(ctx, connector, False, check_uninstalled=False)
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from os import getcwd
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from click import Context
|
|
9
|
+
from openai import OpenAI
|
|
10
|
+
|
|
11
|
+
from tinybird.client import TinyB
|
|
12
|
+
from tinybird.datafile import folder_build
|
|
13
|
+
from tinybird.feedback_manager import FeedbackManager
|
|
14
|
+
from tinybird.tb_cli_modules.cli import cli
|
|
15
|
+
from tinybird.tb_cli_modules.common import _generate_datafile, coro, generate_datafile, push_data
|
|
16
|
+
from tinybird.tb_cli_modules.config import CLIConfig
|
|
17
|
+
from tinybird.tb_cli_modules.exceptions import CLIDatasourceException
|
|
18
|
+
from tinybird.tb_cli_modules.llm import LLM
|
|
19
|
+
from tinybird.tb_cli_modules.local import get_docker_client, set_up_tinybird_local
|
|
20
|
+
from tinybird.tb_cli_modules.prompts import sample_data_sql_prompt
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@cli.command()
|
|
24
|
+
@click.option(
|
|
25
|
+
"--data",
|
|
26
|
+
type=click.Path(exists=True),
|
|
27
|
+
default=None,
|
|
28
|
+
help="Initial data to be used to create the project",
|
|
29
|
+
)
|
|
30
|
+
@click.option(
|
|
31
|
+
"--prompt",
|
|
32
|
+
type=str,
|
|
33
|
+
default=None,
|
|
34
|
+
help="Prompt to be used to create the project",
|
|
35
|
+
)
|
|
36
|
+
@click.option(
|
|
37
|
+
"--folder",
|
|
38
|
+
default=None,
|
|
39
|
+
type=click.Path(exists=True, file_okay=False),
|
|
40
|
+
help="Folder where datafiles will be placed",
|
|
41
|
+
)
|
|
42
|
+
@click.pass_context
|
|
43
|
+
@coro
|
|
44
|
+
async def create(
|
|
45
|
+
ctx: Context,
|
|
46
|
+
data: Optional[str],
|
|
47
|
+
prompt: Optional[str],
|
|
48
|
+
folder: Optional[str],
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Initialize a new project."""
|
|
51
|
+
click.echo(FeedbackManager.highlight(message="Setting up Tinybird development environment..."))
|
|
52
|
+
folder = folder or getcwd()
|
|
53
|
+
try:
|
|
54
|
+
docker_client = get_docker_client()
|
|
55
|
+
tb_client = set_up_tinybird_local(docker_client)
|
|
56
|
+
await project_create(tb_client, data, prompt, folder)
|
|
57
|
+
workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces()).get("workspaces", [])
|
|
58
|
+
datasources = await tb_client.datasources()
|
|
59
|
+
pipes = await tb_client.pipes(dependencies=True)
|
|
60
|
+
await folder_build(
|
|
61
|
+
tb_client,
|
|
62
|
+
workspaces,
|
|
63
|
+
datasources,
|
|
64
|
+
pipes,
|
|
65
|
+
)
|
|
66
|
+
if data:
|
|
67
|
+
ds_name = os.path.basename(data.split(".")[0])
|
|
68
|
+
await append_datasource(ctx, tb_client, ds_name, data, None, None, False, 1)
|
|
69
|
+
elif prompt:
|
|
70
|
+
datasource_files = [f for f in os.listdir(Path(folder) / "datasources") if f.endswith(".datasource")]
|
|
71
|
+
for datasource_file in datasource_files:
|
|
72
|
+
datasource_content = Path(folder) / "datasources" / datasource_file
|
|
73
|
+
sample_data = await generate_sample_data_from_columns(tb_client, datasource_content)
|
|
74
|
+
ndjson_data = "\n".join([json.dumps(row) for row in sample_data])
|
|
75
|
+
await tb_client.datasource_events(datasource_file, ndjson_data)
|
|
76
|
+
click.echo(FeedbackManager.success(message="\n✔ Tinybird development environment is ready"))
|
|
77
|
+
except Exception as e:
|
|
78
|
+
click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
async def project_create(
|
|
82
|
+
client: TinyB,
|
|
83
|
+
data: Optional[str],
|
|
84
|
+
prompt: Optional[str],
|
|
85
|
+
folder: str,
|
|
86
|
+
):
|
|
87
|
+
project_paths = ["datasources", "endpoints", "copies", "sinks", "playgrounds", "materializations"]
|
|
88
|
+
force = True
|
|
89
|
+
for x in project_paths:
|
|
90
|
+
try:
|
|
91
|
+
f = Path(folder) / x
|
|
92
|
+
f.mkdir()
|
|
93
|
+
click.echo(FeedbackManager.info_path_created(path=x))
|
|
94
|
+
except FileExistsError:
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
def generate_pipe_file(name: str, content: str):
|
|
98
|
+
base = Path("endpoints")
|
|
99
|
+
if not base.exists():
|
|
100
|
+
base = Path()
|
|
101
|
+
f = base / (f"{name}.pipe")
|
|
102
|
+
with open(f"{f}", "w") as file:
|
|
103
|
+
file.write(content)
|
|
104
|
+
click.echo(FeedbackManager.success(message=f"** Generated {f}"))
|
|
105
|
+
|
|
106
|
+
if data:
|
|
107
|
+
path = Path(folder) / data
|
|
108
|
+
format = path.suffix.lstrip(".")
|
|
109
|
+
await _generate_datafile(str(path), client, format=format, force=force)
|
|
110
|
+
name = data.split(".")[0]
|
|
111
|
+
generate_pipe_file(
|
|
112
|
+
f"{name}_endpoint",
|
|
113
|
+
f"""
|
|
114
|
+
NODE endpoint
|
|
115
|
+
SQL >
|
|
116
|
+
SELECT * from {name}
|
|
117
|
+
TYPE ENDPOINT
|
|
118
|
+
""",
|
|
119
|
+
)
|
|
120
|
+
elif prompt:
|
|
121
|
+
try:
|
|
122
|
+
config = CLIConfig.get_project_config()
|
|
123
|
+
model = config.get("llms", {}).get("openai", {}).get("model", "gpt-4o-mini")
|
|
124
|
+
api_key = config.get("llms", {}).get("openai", {}).get("api_key", None)
|
|
125
|
+
llm = LLM(model=model, key=api_key)
|
|
126
|
+
result = await llm.create_project(prompt)
|
|
127
|
+
for ds in result.datasources:
|
|
128
|
+
content = ds.content.replace("```", "")
|
|
129
|
+
generate_datafile(content, filename=f"{ds.name}.datasource", data=None, _format="ndjson", force=force)
|
|
130
|
+
|
|
131
|
+
for pipe in result.pipes:
|
|
132
|
+
content = pipe.content.replace("```", "")
|
|
133
|
+
generate_pipe_file(pipe.name, content)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
|
|
136
|
+
else:
|
|
137
|
+
events_ds = """
|
|
138
|
+
SCHEMA >
|
|
139
|
+
`age` Int16 `json:$.age`,
|
|
140
|
+
`airline` String `json:$.airline`,
|
|
141
|
+
`email` String `json:$.email`,
|
|
142
|
+
`extra_bags` Int16 `json:$.extra_bags`,
|
|
143
|
+
`flight_from` String `json:$.flight_from`,
|
|
144
|
+
`flight_to` String `json:$.flight_to`,
|
|
145
|
+
`meal_choice` String `json:$.meal_choice`,
|
|
146
|
+
`name` String `json:$.name`,
|
|
147
|
+
`passport_number` Int32 `json:$.passport_number`,
|
|
148
|
+
`priority_boarding` UInt8 `json:$.priority_boarding`,
|
|
149
|
+
`timestamp` DateTime `json:$.timestamp`,
|
|
150
|
+
`transaction_id` String `json:$.transaction_id`
|
|
151
|
+
|
|
152
|
+
ENGINE "MergeTree"
|
|
153
|
+
ENGINE_PARTITION_KEY "toYear(timestamp)"
|
|
154
|
+
ENGINE_SORTING_KEY "airline, timestamp"
|
|
155
|
+
"""
|
|
156
|
+
top_airlines = """
|
|
157
|
+
NODE endpoint
|
|
158
|
+
SQL >
|
|
159
|
+
SELECT airline, count() as bookings FROM events
|
|
160
|
+
GROUP BY airline
|
|
161
|
+
ORDER BY bookings DESC
|
|
162
|
+
LIMIT 5
|
|
163
|
+
TYPE ENDPOINT
|
|
164
|
+
"""
|
|
165
|
+
generate_datafile(events_ds, filename="events.datasource", data=None, _format="ndjson", force=force)
|
|
166
|
+
generate_pipe_file("top_airlines", top_airlines)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
async def append_datasource(
|
|
170
|
+
ctx: Context,
|
|
171
|
+
tb_client: TinyB,
|
|
172
|
+
datasource_name: str,
|
|
173
|
+
url: str,
|
|
174
|
+
sql: Optional[str],
|
|
175
|
+
incremental: Optional[str],
|
|
176
|
+
ignore_empty: bool,
|
|
177
|
+
concurrency: int,
|
|
178
|
+
):
|
|
179
|
+
if incremental:
|
|
180
|
+
date = None
|
|
181
|
+
source_column = incremental.split(":")[0]
|
|
182
|
+
dest_column = incremental.split(":")[-1]
|
|
183
|
+
result = await tb_client.query(f"SELECT max({dest_column}) as inc from {datasource_name} FORMAT JSON")
|
|
184
|
+
try:
|
|
185
|
+
date = result["data"][0]["inc"]
|
|
186
|
+
except Exception as e:
|
|
187
|
+
raise CLIDatasourceException(f"{str(e)}")
|
|
188
|
+
if date:
|
|
189
|
+
sql = f"{sql} WHERE {source_column} > '{date}'"
|
|
190
|
+
await push_data(
|
|
191
|
+
ctx,
|
|
192
|
+
tb_client,
|
|
193
|
+
datasource_name,
|
|
194
|
+
url,
|
|
195
|
+
None,
|
|
196
|
+
sql,
|
|
197
|
+
mode="append",
|
|
198
|
+
ignore_empty=ignore_empty,
|
|
199
|
+
concurrency=concurrency,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def generate_sql_sample_data(datasource_content: str, row_count: int, model: str, api_key: str) -> str:
|
|
204
|
+
client = OpenAI(api_key=api_key)
|
|
205
|
+
|
|
206
|
+
response = client.chat.completions.create(
|
|
207
|
+
model=model,
|
|
208
|
+
messages=[
|
|
209
|
+
{"role": "system", "content": sample_data_sql_prompt.format(row_count=row_count)},
|
|
210
|
+
{"role": "user", "content": datasource_content},
|
|
211
|
+
],
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return response.choices[0].message.content or ""
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def generate_sample_data_from_columns(
|
|
218
|
+
tb_client: TinyB, datasource_content: str, row_count: int = 20
|
|
219
|
+
) -> List[Dict[str, Any]]:
|
|
220
|
+
config = CLIConfig.get_project_config()
|
|
221
|
+
model = config.get("llms", {}).get("openai", {}).get("model", "gpt-4o-mini")
|
|
222
|
+
api_key = config.get("llms", {}).get("openai", {}).get("api_key", None)
|
|
223
|
+
sql = generate_sql_sample_data(datasource_content, row_count, model, api_key)
|
|
224
|
+
result = await tb_client.query(f"{sql} FORMAT JSON")
|
|
225
|
+
data = result.get("data", [])
|
|
226
|
+
return data
|
|
@@ -174,11 +174,11 @@ async def datasource_append(
|
|
|
174
174
|
if incremental and not connector:
|
|
175
175
|
raise CLIDatasourceException(FeedbackManager.error_incremental_not_supported())
|
|
176
176
|
|
|
177
|
+
client: TinyB = ctx.obj["client"]
|
|
177
178
|
if incremental:
|
|
178
179
|
date = None
|
|
179
180
|
source_column = incremental.split(":")[0]
|
|
180
181
|
dest_column = incremental.split(":")[-1]
|
|
181
|
-
client: TinyB = ctx.obj["client"]
|
|
182
182
|
result = await client.query(f"SELECT max({dest_column}) as inc from {datasource_name} FORMAT JSON")
|
|
183
183
|
try:
|
|
184
184
|
date = result["data"][0]["inc"]
|
|
@@ -187,7 +187,15 @@ async def datasource_append(
|
|
|
187
187
|
if date:
|
|
188
188
|
sql = f"{sql} WHERE {source_column} > '{date}'"
|
|
189
189
|
await push_data(
|
|
190
|
-
ctx,
|
|
190
|
+
ctx,
|
|
191
|
+
client,
|
|
192
|
+
datasource_name,
|
|
193
|
+
url,
|
|
194
|
+
connector,
|
|
195
|
+
sql,
|
|
196
|
+
mode="append",
|
|
197
|
+
ignore_empty=ignore_empty,
|
|
198
|
+
concurrency=concurrency,
|
|
191
199
|
)
|
|
192
200
|
|
|
193
201
|
|