tinybird 0.0.1.dev1__py3-none-any.whl → 0.0.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -671,6 +671,7 @@ Ready? """
671
671
  info_building_dependencies = info_message("** Building dependencies")
672
672
  info_processing_new_resource = info_message("** Running '{name}' {version}")
673
673
  info_dry_processing_new_resource = info_message("** [DRY RUN] Running '{name}' {version}")
674
+ info_building_resource = info_message("** Building {name}")
674
675
  info_processing_resource = info_message(
675
676
  "** Running '{name}' => v{version} (remote latest version: v{latest_version})"
676
677
  )
@@ -1020,3 +1021,8 @@ Ready? """
1020
1021
  success_tag_removed = success_message("** Tag '{tag_name}' removed!")
1021
1022
 
1022
1023
  debug_running_file = print_message("** Running {file}", bcolors.CGREY)
1024
+
1025
+ success = success_message("{message}")
1026
+ info = info_message("{message}")
1027
+ highlight = info_highlight_message("{message}")
1028
+ error = error_message("{message}")
tinybird/tb_cli.py CHANGED
@@ -6,9 +6,11 @@ if sys.platform == "win32":
6
6
 
7
7
  import tinybird.tb_cli_modules.auth
8
8
  import tinybird.tb_cli_modules.branch
9
+ import tinybird.tb_cli_modules.build
9
10
  import tinybird.tb_cli_modules.cli
10
11
  import tinybird.tb_cli_modules.common
11
12
  import tinybird.tb_cli_modules.connection
13
+ import tinybird.tb_cli_modules.create
12
14
  import tinybird.tb_cli_modules.datasource
13
15
  import tinybird.tb_cli_modules.fmt
14
16
  import tinybird.tb_cli_modules.job
@@ -0,0 +1,221 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Any, Awaitable, Callable, Dict, List, Union
7
+
8
+ import click
9
+ from watchdog.events import FileSystemEventHandler
10
+ from watchdog.observers import Observer
11
+
12
+ import tinybird.context as context
13
+ from tinybird.client import TinyB
14
+ from tinybird.config import FeatureFlags
15
+ from tinybird.datafile import (
16
+ ParseException,
17
+ folder_build,
18
+ get_project_filenames,
19
+ has_internal_datafiles,
20
+ parse_datasource,
21
+ parse_pipe,
22
+ )
23
+ from tinybird.feedback_manager import FeedbackManager, info_highlight_message, success_message
24
+ from tinybird.tb_cli_modules.cli import cli
25
+ from tinybird.tb_cli_modules.common import (
26
+ coro,
27
+ echo_safe_humanfriendly_tables_format_smart_table,
28
+ )
29
+ from tinybird.tb_cli_modules.create import generate_sample_data_from_columns
30
+ from tinybird.tb_cli_modules.local import (
31
+ get_docker_client,
32
+ get_tinybird_local_client,
33
+ remove_tinybird_local,
34
+ start_tinybird_local,
35
+ stop_tinybird_local,
36
+ )
37
+
38
+
39
+ class FileChangeHandler(FileSystemEventHandler):
40
+ def __init__(self, filenames: List[str], process: Callable[[List[str]], None]):
41
+ self.filenames = filenames
42
+ self.process = process
43
+
44
+ def on_modified(self, event: Any) -> None:
45
+ if not event.is_directory and any(event.src_path.endswith(ext) for ext in [".datasource", ".pipe"]):
46
+ filename = event.src_path.split("/")[-1]
47
+ click.echo(info_highlight_message(f"\n⟲ Changes detected in {filename}\n")())
48
+ try:
49
+ self.process([event.src_path])
50
+ except Exception as e:
51
+ click.echo(FeedbackManager.error_exception(error=e))
52
+
53
+
54
+ def watch_files(
55
+ filenames: List[str],
56
+ process: Union[Callable[[List[str]], None], Callable[[List[str]], Awaitable[None]]],
57
+ ) -> None:
58
+ # Handle both sync and async process functions
59
+ async def process_wrapper(files: List[str]) -> None:
60
+ click.echo("⚡ Rebuilding...")
61
+ time_start = time.time()
62
+ if asyncio.iscoroutinefunction(process):
63
+ await process(files, watch=True)
64
+ else:
65
+ process(files, watch=True)
66
+ time_end = time.time()
67
+ elapsed_time = time_end - time_start
68
+ click.echo(success_message(f"\n✓ Rebuild completed in {elapsed_time:.1f}s")())
69
+
70
+ event_handler = FileChangeHandler(filenames, lambda f: asyncio.run(process_wrapper(f)))
71
+ observer = Observer()
72
+
73
+ # Watch each provided path
74
+ for filename in filenames:
75
+ path = filename if os.path.isdir(filename) else os.path.dirname(filename)
76
+ observer.schedule(event_handler, path=path, recursive=True)
77
+
78
+ observer.start()
79
+
80
+ try:
81
+ while True:
82
+ time.sleep(1)
83
+ except KeyboardInterrupt:
84
+ observer.stop()
85
+
86
+ observer.join()
87
+
88
+
89
+ @cli.command()
90
+ @click.option(
91
+ "--folder",
92
+ default=".",
93
+ help="Folder from where to execute the command. By default the current folder",
94
+ hidden=True,
95
+ type=click.types.STRING,
96
+ )
97
+ @click.option(
98
+ "--watch",
99
+ is_flag=True,
100
+ help="Watch for changes in the files and re-check them.",
101
+ )
102
+ @click.option(
103
+ "--restart",
104
+ is_flag=True,
105
+ help="Restart the Tinybird development environment before building the first time.",
106
+ )
107
+ @coro
108
+ async def build(
109
+ folder: str,
110
+ watch: bool,
111
+ restart: bool,
112
+ ) -> None:
113
+ """
114
+ Watch for changes in the files and re-check them.
115
+ """
116
+ docker_client = get_docker_client()
117
+ if restart:
118
+ remove_tinybird_local(docker_client)
119
+ start_tinybird_local(docker_client)
120
+ ignore_sql_errors = FeatureFlags.ignore_sql_errors()
121
+ context.disable_template_security_validation.set(True)
122
+ is_internal = has_internal_datafiles(folder)
123
+ tb_client = get_tinybird_local_client()
124
+ workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces_and_branches()).get("workspaces", [])
125
+ datasources: List[Dict[str, Any]] = await tb_client.datasources()
126
+ pipes: List[Dict[str, Any]] = await tb_client.pipes(dependencies=True)
127
+
128
+ def check_filenames(filenames: List[str]):
129
+ parser_matrix = {".pipe": parse_pipe, ".datasource": parse_datasource}
130
+ incl_suffix = ".incl"
131
+
132
+ for filename in filenames:
133
+ if os.path.isdir(filename):
134
+ process(filenames=get_project_filenames(filename))
135
+
136
+ file_suffix = Path(filename).suffix
137
+ if file_suffix == incl_suffix:
138
+ continue
139
+
140
+ parser = parser_matrix.get(file_suffix)
141
+ if not parser:
142
+ raise ParseException(FeedbackManager.error_unsupported_datafile(extension=file_suffix))
143
+
144
+ parser(filename)
145
+
146
+ async def process(filenames: List[str], watch: bool = False, only_pipes: bool = False):
147
+ check_filenames(filenames=filenames)
148
+ await folder_build(
149
+ tb_client,
150
+ workspaces,
151
+ datasources,
152
+ pipes,
153
+ filenames,
154
+ ignore_sql_errors=ignore_sql_errors,
155
+ is_internal=is_internal,
156
+ only_pipes=only_pipes,
157
+ )
158
+
159
+ for filename in filenames:
160
+ if filename.endswith(".datasource"):
161
+ ds_path = Path(filename)
162
+ ds_name = ds_path.stem
163
+ datasource_content = ds_path.read_text()
164
+ sample_data = await generate_sample_data_from_columns(tb_client, datasource_content)
165
+ ndjson_data = "\n".join([json.dumps(row) for row in sample_data])
166
+ await tb_client.datasource_events(ds_name, ndjson_data)
167
+
168
+ if watch:
169
+ filename = filenames[0]
170
+ if filename.endswith(".pipe"):
171
+ await build_and_print_pipe(tb_client, filename)
172
+
173
+ filenames = get_project_filenames(folder)
174
+
175
+ async def build_once(filenames: List[str]):
176
+ try:
177
+ click.echo("⚡ Building project...")
178
+ time_start = time.time()
179
+ await process(filenames=filenames, watch=False)
180
+ time_end = time.time()
181
+ elapsed_time = time_end - time_start
182
+ click.echo(FeedbackManager.success(message=f"\n✓ Build completed in {elapsed_time:.1f}s\n"))
183
+ except Exception as e:
184
+ click.echo(FeedbackManager.error(message=str(e)))
185
+
186
+ await build_once(filenames)
187
+
188
+ if watch:
189
+ click.echo(FeedbackManager.highlight(message="◎ Watching for changes..."))
190
+ watch_files(filenames, process)
191
+
192
+
193
+ async def build_and_print_pipe(tb_client: TinyB, filename: str):
194
+ pipe_name = os.path.basename(filename.split(".")[0])
195
+ res = await tb_client.query(f"SELECT * FROM {pipe_name} LIMIT 5 FORMAT JSON", pipeline=pipe_name)
196
+ data = []
197
+ for d in res["data"]:
198
+ data.append(d.values())
199
+ meta = res["meta"]
200
+ column_names = [col["name"] for col in meta]
201
+ echo_safe_humanfriendly_tables_format_smart_table(data, column_names=column_names)
202
+
203
+
204
+ @cli.command()
205
+ @coro
206
+ async def stop() -> None:
207
+ """Stop Tinybird development environment"""
208
+ click.echo(FeedbackManager.info(message="Shutting down Tinybird development environment..."))
209
+ docker_client = get_docker_client()
210
+ stop_tinybird_local(docker_client)
211
+ click.echo(FeedbackManager.success(message="Tinybird development environment stopped"))
212
+
213
+
214
+ @cli.command()
215
+ @coro
216
+ async def start() -> None:
217
+ """Start Tinybird development environment"""
218
+ click.echo(FeedbackManager.info(message="Starting Tinybird development environment..."))
219
+ docker_client = get_docker_client()
220
+ start_tinybird_local(docker_client)
221
+ click.echo(FeedbackManager.success(message="Tinybird development environment started"))
@@ -10,12 +10,14 @@ import pprint
10
10
  import re
11
11
  import shutil
12
12
  import sys
13
+ from datetime import datetime
13
14
  from os import environ, getcwd
14
15
  from pathlib import Path
15
16
  from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
16
17
 
17
18
  import click
18
19
  import humanfriendly
20
+ import requests
19
21
  from click import Context
20
22
  from packaging import version
21
23
 
@@ -67,6 +69,7 @@ from tinybird.tb_cli_modules.common import (
67
69
  try_update_config_with_remote,
68
70
  )
69
71
  from tinybird.tb_cli_modules.config import CLIConfig
72
+ from tinybird.tb_cli_modules.prompts import sample_data_prompt
70
73
  from tinybird.tb_cli_modules.telemetry import add_telemetry_event
71
74
 
72
75
  __old_click_echo = click.echo
@@ -1569,3 +1572,67 @@ async def deploy(
1569
1572
  raise
1570
1573
  except Exception as e:
1571
1574
  raise CLIException(str(e))
1575
+
1576
+
1577
+ @cli.command()
1578
+ @click.argument("datasource_file", type=click.Path(exists=True))
1579
+ @click.option("--count", type=int, default=10, help="Number of events to send")
1580
+ @click.option("--model", type=str, default=None, help="Model to use for data generation")
1581
+ @click.option("--print-data", is_flag=True, default=False, help="Print the data being sent")
1582
+ @click.pass_context
1583
+ def load_sample_data(ctx: Context, datasource_file: str, count: int, model: Optional[str], print_data: bool) -> None:
1584
+ """Load sample data into a datasource.
1585
+
1586
+ Args:
1587
+ ctx: Click context object
1588
+ datasource_file: Path to the datasource file to load sample data into
1589
+ """
1590
+ import llm
1591
+
1592
+ try:
1593
+ # TODO(eclbg): allow passing a datasource name instead of a file
1594
+ datasource_path = Path(datasource_file)
1595
+ if datasource_path.suffix != ".datasource":
1596
+ raise CLIException(FeedbackManager.error_file_extension(filename=datasource_file))
1597
+
1598
+ datasource_name = datasource_path.stem
1599
+
1600
+ response = requests.get("http://localhost:80/tokens")
1601
+ token = response.json()["workspace_admin_token"]
1602
+
1603
+ with open(datasource_file) as f:
1604
+ content = f.read()
1605
+ schema_start = next(i for i, line in enumerate(content.splitlines()) if line.strip().startswith("SCHEMA >"))
1606
+ schema_end = next(
1607
+ i
1608
+ for i, line in enumerate(content.splitlines()[schema_start + 1 :], schema_start + 1)
1609
+ if not line.strip()
1610
+ )
1611
+ schema = "\n".join(content.splitlines()[schema_start:schema_end])
1612
+ llm_model = llm.get_model(model)
1613
+ click.echo(f"Using model: {model}")
1614
+ prompt = sample_data_prompt.format(current_datetime=datetime.now().isoformat(), row_count=count)
1615
+ # prompt = sample_data_with_errors_prompt.format(current_datetime=datetime.now().isoformat()) # This prompt will generate data with errors
1616
+ full_prompt = prompt + "\n\n" + schema
1617
+ sent_events = 0
1618
+ while sent_events < count:
1619
+ click.echo(f"Generating data for '{datasource_name}'")
1620
+ data = llm_model.prompt(full_prompt)
1621
+
1622
+ click.echo(f"Sending data to '{datasource_name}'")
1623
+ headers = {"Authorization": f"Bearer {token}"}
1624
+ if print_data:
1625
+ click.echo(f"Data: {data}")
1626
+ response = requests.post(
1627
+ f"http://localhost:80/v0/events?name={datasource_name}",
1628
+ data=data,
1629
+ headers=headers,
1630
+ )
1631
+ if response.status_code not in (200, 202):
1632
+ raise CLIException(f"Failed to send data: {response.text}")
1633
+ click.echo(f"Response: {response.text}")
1634
+ sent_events += 10
1635
+ click.echo(f"Sent 10 events to datasource '{datasource_name}'")
1636
+
1637
+ except Exception as e:
1638
+ raise CLIException(FeedbackManager.error_exception(error=str(e)))
@@ -153,7 +153,7 @@ def generate_datafile(
153
153
  if not f.exists() or force:
154
154
  with open(f"{f}", "w") as ds_file:
155
155
  ds_file.write(datafile)
156
- click.echo(FeedbackManager.success_generated_file(file=f, stem=datasource_name, filename=filename))
156
+ click.echo(FeedbackManager.success(message=f"** Generated {f}"))
157
157
 
158
158
  if data and (base / "fixtures").exists():
159
159
  # Generating a fixture for Parquet files is not so trivial, since Parquet format
@@ -166,7 +166,6 @@ def generate_datafile(
166
166
  newline = b"\n" # TODO: guess
167
167
  with open(f, "wb") as fixture_file:
168
168
  fixture_file.write(data[: data.rfind(newline)])
169
- click.echo(FeedbackManager.success_generated_fixture(fixture=f))
170
169
  else:
171
170
  click.echo(FeedbackManager.error_file_already_exists(file=f))
172
171
 
@@ -1043,6 +1042,7 @@ def get_format_from_filename_or_url(filename_or_url: str) -> str:
1043
1042
 
1044
1043
  async def push_data(
1045
1044
  ctx: Context,
1045
+ client: TinyB,
1046
1046
  datasource_name: str,
1047
1047
  url,
1048
1048
  connector: Optional[str],
@@ -1055,7 +1055,6 @@ async def push_data(
1055
1055
  ):
1056
1056
  if url and type(url) is tuple:
1057
1057
  url = url[0]
1058
- client: TinyB = ctx.obj["client"]
1059
1058
 
1060
1059
  if connector and sql:
1061
1060
  load_connector_config(ctx, connector, False, check_uninstalled=False)
@@ -0,0 +1,226 @@
1
+ import json
2
+ import os
3
+ from os import getcwd
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ import click
8
+ from click import Context
9
+ from openai import OpenAI
10
+
11
+ from tinybird.client import TinyB
12
+ from tinybird.datafile import folder_build
13
+ from tinybird.feedback_manager import FeedbackManager
14
+ from tinybird.tb_cli_modules.cli import cli
15
+ from tinybird.tb_cli_modules.common import _generate_datafile, coro, generate_datafile, push_data
16
+ from tinybird.tb_cli_modules.config import CLIConfig
17
+ from tinybird.tb_cli_modules.exceptions import CLIDatasourceException
18
+ from tinybird.tb_cli_modules.llm import LLM
19
+ from tinybird.tb_cli_modules.local import get_docker_client, set_up_tinybird_local
20
+ from tinybird.tb_cli_modules.prompts import sample_data_sql_prompt
21
+
22
+
23
+ @cli.command()
24
+ @click.option(
25
+ "--data",
26
+ type=click.Path(exists=True),
27
+ default=None,
28
+ help="Initial data to be used to create the project",
29
+ )
30
+ @click.option(
31
+ "--prompt",
32
+ type=str,
33
+ default=None,
34
+ help="Prompt to be used to create the project",
35
+ )
36
+ @click.option(
37
+ "--folder",
38
+ default=None,
39
+ type=click.Path(exists=True, file_okay=False),
40
+ help="Folder where datafiles will be placed",
41
+ )
42
+ @click.pass_context
43
+ @coro
44
+ async def create(
45
+ ctx: Context,
46
+ data: Optional[str],
47
+ prompt: Optional[str],
48
+ folder: Optional[str],
49
+ ) -> None:
50
+ """Initialize a new project."""
51
+ click.echo(FeedbackManager.highlight(message="Setting up Tinybird development environment..."))
52
+ folder = folder or getcwd()
53
+ try:
54
+ docker_client = get_docker_client()
55
+ tb_client = set_up_tinybird_local(docker_client)
56
+ await project_create(tb_client, data, prompt, folder)
57
+ workspaces: List[Dict[str, Any]] = (await tb_client.user_workspaces()).get("workspaces", [])
58
+ datasources = await tb_client.datasources()
59
+ pipes = await tb_client.pipes(dependencies=True)
60
+ await folder_build(
61
+ tb_client,
62
+ workspaces,
63
+ datasources,
64
+ pipes,
65
+ )
66
+ if data:
67
+ ds_name = os.path.basename(data.split(".")[0])
68
+ await append_datasource(ctx, tb_client, ds_name, data, None, None, False, 1)
69
+ elif prompt:
70
+ datasource_files = [f for f in os.listdir(Path(folder) / "datasources") if f.endswith(".datasource")]
71
+ for datasource_file in datasource_files:
72
+ datasource_content = Path(folder) / "datasources" / datasource_file
73
+ sample_data = await generate_sample_data_from_columns(tb_client, datasource_content)
74
+ ndjson_data = "\n".join([json.dumps(row) for row in sample_data])
75
+ await tb_client.datasource_events(datasource_file, ndjson_data)
76
+ click.echo(FeedbackManager.success(message="\n✔ Tinybird development environment is ready"))
77
+ except Exception as e:
78
+ click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
79
+
80
+
81
+ async def project_create(
82
+ client: TinyB,
83
+ data: Optional[str],
84
+ prompt: Optional[str],
85
+ folder: str,
86
+ ):
87
+ project_paths = ["datasources", "endpoints", "copies", "sinks", "playgrounds", "materializations"]
88
+ force = True
89
+ for x in project_paths:
90
+ try:
91
+ f = Path(folder) / x
92
+ f.mkdir()
93
+ click.echo(FeedbackManager.info_path_created(path=x))
94
+ except FileExistsError:
95
+ pass
96
+
97
+ def generate_pipe_file(name: str, content: str):
98
+ base = Path("endpoints")
99
+ if not base.exists():
100
+ base = Path()
101
+ f = base / (f"{name}.pipe")
102
+ with open(f"{f}", "w") as file:
103
+ file.write(content)
104
+ click.echo(FeedbackManager.success(message=f"** Generated {f}"))
105
+
106
+ if data:
107
+ path = Path(folder) / data
108
+ format = path.suffix.lstrip(".")
109
+ await _generate_datafile(str(path), client, format=format, force=force)
110
+ name = data.split(".")[0]
111
+ generate_pipe_file(
112
+ f"{name}_endpoint",
113
+ f"""
114
+ NODE endpoint
115
+ SQL >
116
+ SELECT * from {name}
117
+ TYPE ENDPOINT
118
+ """,
119
+ )
120
+ elif prompt:
121
+ try:
122
+ config = CLIConfig.get_project_config()
123
+ model = config.get("llms", {}).get("openai", {}).get("model", "gpt-4o-mini")
124
+ api_key = config.get("llms", {}).get("openai", {}).get("api_key", None)
125
+ llm = LLM(model=model, key=api_key)
126
+ result = await llm.create_project(prompt)
127
+ for ds in result.datasources:
128
+ content = ds.content.replace("```", "")
129
+ generate_datafile(content, filename=f"{ds.name}.datasource", data=None, _format="ndjson", force=force)
130
+
131
+ for pipe in result.pipes:
132
+ content = pipe.content.replace("```", "")
133
+ generate_pipe_file(pipe.name, content)
134
+ except Exception as e:
135
+ click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
136
+ else:
137
+ events_ds = """
138
+ SCHEMA >
139
+ `age` Int16 `json:$.age`,
140
+ `airline` String `json:$.airline`,
141
+ `email` String `json:$.email`,
142
+ `extra_bags` Int16 `json:$.extra_bags`,
143
+ `flight_from` String `json:$.flight_from`,
144
+ `flight_to` String `json:$.flight_to`,
145
+ `meal_choice` String `json:$.meal_choice`,
146
+ `name` String `json:$.name`,
147
+ `passport_number` Int32 `json:$.passport_number`,
148
+ `priority_boarding` UInt8 `json:$.priority_boarding`,
149
+ `timestamp` DateTime `json:$.timestamp`,
150
+ `transaction_id` String `json:$.transaction_id`
151
+
152
+ ENGINE "MergeTree"
153
+ ENGINE_PARTITION_KEY "toYear(timestamp)"
154
+ ENGINE_SORTING_KEY "airline, timestamp"
155
+ """
156
+ top_airlines = """
157
+ NODE endpoint
158
+ SQL >
159
+ SELECT airline, count() as bookings FROM events
160
+ GROUP BY airline
161
+ ORDER BY bookings DESC
162
+ LIMIT 5
163
+ TYPE ENDPOINT
164
+ """
165
+ generate_datafile(events_ds, filename="events.datasource", data=None, _format="ndjson", force=force)
166
+ generate_pipe_file("top_airlines", top_airlines)
167
+
168
+
169
+ async def append_datasource(
170
+ ctx: Context,
171
+ tb_client: TinyB,
172
+ datasource_name: str,
173
+ url: str,
174
+ sql: Optional[str],
175
+ incremental: Optional[str],
176
+ ignore_empty: bool,
177
+ concurrency: int,
178
+ ):
179
+ if incremental:
180
+ date = None
181
+ source_column = incremental.split(":")[0]
182
+ dest_column = incremental.split(":")[-1]
183
+ result = await tb_client.query(f"SELECT max({dest_column}) as inc from {datasource_name} FORMAT JSON")
184
+ try:
185
+ date = result["data"][0]["inc"]
186
+ except Exception as e:
187
+ raise CLIDatasourceException(f"{str(e)}")
188
+ if date:
189
+ sql = f"{sql} WHERE {source_column} > '{date}'"
190
+ await push_data(
191
+ ctx,
192
+ tb_client,
193
+ datasource_name,
194
+ url,
195
+ None,
196
+ sql,
197
+ mode="append",
198
+ ignore_empty=ignore_empty,
199
+ concurrency=concurrency,
200
+ )
201
+
202
+
203
+ def generate_sql_sample_data(datasource_content: str, row_count: int, model: str, api_key: str) -> str:
204
+ client = OpenAI(api_key=api_key)
205
+
206
+ response = client.chat.completions.create(
207
+ model=model,
208
+ messages=[
209
+ {"role": "system", "content": sample_data_sql_prompt.format(row_count=row_count)},
210
+ {"role": "user", "content": datasource_content},
211
+ ],
212
+ )
213
+
214
+ return response.choices[0].message.content or ""
215
+
216
+
217
+ async def generate_sample_data_from_columns(
218
+ tb_client: TinyB, datasource_content: str, row_count: int = 20
219
+ ) -> List[Dict[str, Any]]:
220
+ config = CLIConfig.get_project_config()
221
+ model = config.get("llms", {}).get("openai", {}).get("model", "gpt-4o-mini")
222
+ api_key = config.get("llms", {}).get("openai", {}).get("api_key", None)
223
+ sql = generate_sql_sample_data(datasource_content, row_count, model, api_key)
224
+ result = await tb_client.query(f"{sql} FORMAT JSON")
225
+ data = result.get("data", [])
226
+ return data
@@ -174,11 +174,11 @@ async def datasource_append(
174
174
  if incremental and not connector:
175
175
  raise CLIDatasourceException(FeedbackManager.error_incremental_not_supported())
176
176
 
177
+ client: TinyB = ctx.obj["client"]
177
178
  if incremental:
178
179
  date = None
179
180
  source_column = incremental.split(":")[0]
180
181
  dest_column = incremental.split(":")[-1]
181
- client: TinyB = ctx.obj["client"]
182
182
  result = await client.query(f"SELECT max({dest_column}) as inc from {datasource_name} FORMAT JSON")
183
183
  try:
184
184
  date = result["data"][0]["inc"]
@@ -187,7 +187,15 @@ async def datasource_append(
187
187
  if date:
188
188
  sql = f"{sql} WHERE {source_column} > '{date}'"
189
189
  await push_data(
190
- ctx, datasource_name, url, connector, sql, mode="append", ignore_empty=ignore_empty, concurrency=concurrency
190
+ ctx,
191
+ client,
192
+ datasource_name,
193
+ url,
194
+ connector,
195
+ sql,
196
+ mode="append",
197
+ ignore_empty=ignore_empty,
198
+ concurrency=concurrency,
191
199
  )
192
200
 
193
201