tinybird 0.0.1.dev26__py3-none-any.whl → 0.0.1.dev28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (36) hide show
  1. tinybird/config.py +1 -1
  2. tinybird/datatypes.py +46 -57
  3. tinybird/git_settings.py +4 -4
  4. tinybird/prompts.py +647 -0
  5. tinybird/sql.py +9 -0
  6. tinybird/sql_toolset.py +17 -3
  7. tinybird/syncasync.py +1 -1
  8. tinybird/tb/__cli__.py +2 -2
  9. tinybird/tb/cli.py +2 -0
  10. tinybird/tb/modules/build.py +44 -16
  11. tinybird/tb/modules/build_server.py +75 -0
  12. tinybird/tb/modules/cli.py +22 -0
  13. tinybird/tb/modules/common.py +2 -2
  14. tinybird/tb/modules/config.py +13 -14
  15. tinybird/tb/modules/create.py +145 -134
  16. tinybird/tb/modules/datafile/build.py +28 -0
  17. tinybird/tb/modules/datafile/common.py +1 -0
  18. tinybird/tb/modules/datafile/fixture.py +10 -6
  19. tinybird/tb/modules/datafile/parse_pipe.py +2 -0
  20. tinybird/tb/modules/datasource.py +1 -1
  21. tinybird/tb/modules/deploy.py +254 -0
  22. tinybird/tb/modules/llm.py +32 -16
  23. tinybird/tb/modules/llm_utils.py +24 -0
  24. tinybird/tb/modules/local.py +2 -2
  25. tinybird/tb/modules/login.py +8 -6
  26. tinybird/tb/modules/mock.py +11 -6
  27. tinybird/tb/modules/test.py +69 -47
  28. tinybird/tb/modules/watch.py +1 -1
  29. tinybird/tb_cli_modules/common.py +2 -2
  30. tinybird/tb_cli_modules/config.py +5 -5
  31. tinybird/tornado_template.py +1 -3
  32. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/METADATA +1 -1
  33. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/RECORD +36 -33
  34. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/WHEEL +0 -0
  35. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/entry_points.txt +0 -0
  36. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/top_level.txt +0 -0
@@ -4,9 +4,9 @@ from pathlib import Path
4
4
  from typing import Optional
5
5
 
6
6
  import click
7
- import requests
8
7
 
9
8
  from tinybird.client import TinyB
9
+ from tinybird.prompts import create_prompt, mock_prompt
10
10
  from tinybird.tb.modules.cicd import init_cicd
11
11
  from tinybird.tb.modules.cli import cli
12
12
  from tinybird.tb.modules.common import _generate_datafile, check_user_token_with_client, coro, generate_datafile
@@ -15,15 +15,11 @@ from tinybird.tb.modules.datafile.fixture import build_fixture_name, persist_fix
15
15
  from tinybird.tb.modules.exceptions import CLIException
16
16
  from tinybird.tb.modules.feedback_manager import FeedbackManager
17
17
  from tinybird.tb.modules.llm import LLM
18
+ from tinybird.tb.modules.llm_utils import extract_xml, parse_xml
18
19
  from tinybird.tb.modules.local_common import get_tinybird_local_client
19
20
 
20
21
 
21
22
  @cli.command()
22
- @click.option(
23
- "--demo",
24
- is_flag=True,
25
- help="Demo data and files to get started",
26
- )
27
23
  @click.option(
28
24
  "--data",
29
25
  type=click.Path(exists=True),
@@ -39,13 +35,12 @@ from tinybird.tb.modules.local_common import get_tinybird_local_client
39
35
  @click.option(
40
36
  "--folder",
41
37
  default=".",
42
- type=click.Path(exists=True, file_okay=False),
38
+ type=click.Path(exists=False, file_okay=False),
43
39
  help="Folder where datafiles will be placed",
44
40
  )
45
41
  @click.option("--rows", type=int, default=10, help="Number of events to send")
46
42
  @coro
47
43
  async def create(
48
- demo: bool,
49
44
  data: Optional[str],
50
45
  prompt: Optional[str],
51
46
  folder: Optional[str],
@@ -53,6 +48,10 @@ async def create(
53
48
  ) -> None:
54
49
  """Initialize a new project."""
55
50
  folder = folder or getcwd()
51
+ folder_path = Path(folder)
52
+ if not folder_path.exists():
53
+ folder_path.mkdir()
54
+
56
55
  try:
57
56
  config = CLIConfig.get_project_config(folder)
58
57
  tb_client = config.get_client()
@@ -71,132 +70,103 @@ async def create(
71
70
  )
72
71
  return
73
72
  local_client = await get_tinybird_local_client(folder)
74
- click.echo(FeedbackManager.highlight(message="\n» Creating new project structure..."))
75
- await project_create(local_client, tb_client, user_token, data, prompt, folder)
76
- click.echo(FeedbackManager.success(message="✓ Scaffolding completed!\n"))
77
73
 
78
- click.echo(FeedbackManager.highlight(message="\n» Creating CI/CD files for GitHub and GitLab..."))
79
- init_git(folder)
80
- await init_cicd(data_project_dir=os.path.relpath(folder))
81
- click.echo(FeedbackManager.success(message="✓ Done!\n"))
74
+ if not validate_project_structure(folder):
75
+ click.echo(FeedbackManager.highlight(message="\n» Creating new project structure..."))
76
+ create_project_structure(folder)
77
+ click.echo(FeedbackManager.success(message="✓ Scaffolding completed!\n"))
82
78
 
83
- click.echo(FeedbackManager.highlight(message="\n» Building fixtures..."))
79
+ click.echo(FeedbackManager.highlight(message="\n» Creating resources..."))
80
+ datasources_created = await create_resources(local_client, tb_client, user_token, data, prompt, folder)
81
+ click.echo(FeedbackManager.success(message="✓ Done!\n"))
84
82
 
85
- if demo:
86
- # Users datasource
87
- ds_name = "users"
88
- datasource_path = Path(folder) / "datasources" / f"{ds_name}.datasource"
89
- datasource_content = fetch_gist_content(
90
- "https://gist.githubusercontent.com/gnzjgo/b48fb9c92825ed27c04e3104b9e871e1/raw/1f33c20eefbabc4903f38e234329e028d8ef9def/users.datasource"
91
- )
92
- datasource_path.write_text(datasource_content)
93
- click.echo(FeedbackManager.info(message=f"✓ /datasources/{ds_name}.datasource"))
83
+ if not already_has_cicd(folder):
84
+ click.echo(FeedbackManager.highlight(message="\n» Creating CI/CD files for GitHub and GitLab..."))
85
+ init_git(folder)
86
+ await init_cicd(data_project_dir=os.path.relpath(folder))
87
+ click.echo(FeedbackManager.success(message="✓ Done!\n"))
94
88
 
95
- # Users fixtures
96
- fixture_content = fetch_gist_content(
97
- "https://gist.githubusercontent.com/gnzjgo/8e8f66a39d7576ce3a2529bf773334a8/raw/9cab636767990e97d44a141867e5f226e992de8c/users.ndjson"
98
- )
99
- fixture_name = build_fixture_name(
100
- datasource_path.absolute().as_posix(), ds_name, datasource_path.read_text()
101
- )
102
- persist_fixture(fixture_name, fixture_content)
103
- click.echo(FeedbackManager.info(message=f"✓ /fixtures/{ds_name}"))
104
-
105
- # Events datasource
106
- ds_name = "events"
107
- datasource_path = Path(folder) / "datasources" / f"{ds_name}.datasource"
108
- datasource_content = fetch_gist_content(
109
- "https://gist.githubusercontent.com/gnzjgo/f8ca37b5b1f6707c75206b618de26bc9/raw/cd625da0dcd1ba8de29f12bc1c8600b9ff7c809c/events.datasource"
110
- )
111
- datasource_path.write_text(datasource_content)
112
- click.echo(FeedbackManager.info(message=f"✓ /datasources/{ds_name}.datasource"))
89
+ if validate_fixtures(folder) and datasources_created:
90
+ click.echo(FeedbackManager.highlight(message="\n» Generating fixtures..."))
113
91
 
114
- # Events fixtures
115
- fixture_content = fetch_gist_content(
116
- "https://gist.githubusercontent.com/gnzjgo/859ab9439c17e77241d0c14a5a532809/raw/251f2f3f00a968f8759ec4068cebde915256b054/events.ndjson"
117
- )
118
- fixture_name = build_fixture_name(
119
- datasource_path.absolute().as_posix(), ds_name, datasource_path.read_text()
120
- )
121
- persist_fixture(fixture_name, fixture_content)
122
- click.echo(FeedbackManager.info(message=f"✓ /fixtures/{ds_name}"))
123
-
124
- # Create sample endpoint
125
- pipe_name = "api_token_usage"
126
- pipe_path = Path(folder) / "endpoints" / f"{pipe_name}.pipe"
127
- pipe_content = fetch_gist_content(
128
- "https://gist.githubusercontent.com/gnzjgo/68ecc47472c2b754b0ae0c1187022963/raw/52cc3aa3afdf939e58d43355bfe4ddc739989ddd/api_token_usage.pipe"
129
- )
130
- pipe_path.write_text(pipe_content)
131
- click.echo(FeedbackManager.info(message=f"✓ /endpoints/{pipe_name}.pipe"))
132
-
133
- # Create sample test
134
- test_name = "api_token_usage"
135
- test_path = Path(folder) / "tests" / f"{test_name}.yaml"
136
- test_content = fetch_gist_content(
137
- "https://gist.githubusercontent.com/gnzjgo/e58620bbb977d6f42f1d0c2a7b46ac8f/raw/a3a1cd0ce3a90bcd2f6dfce00da51e6051443612/api_token_usage.yaml"
138
- )
139
- test_path.write_text(test_content)
140
- click.echo(FeedbackManager.info(message=f"✓ /tests/{test_name}.yaml"))
141
-
142
- elif data:
143
- ds_name = os.path.basename(data.split(".")[0])
144
- data_content = Path(data).read_text()
145
- datasource_path = Path(folder) / "datasources" / f"{ds_name}.datasource"
146
- fixture_name = build_fixture_name(
147
- datasource_path.absolute().as_posix(), ds_name, datasource_path.read_text()
148
- )
149
- click.echo(FeedbackManager.info(message=f"✓ /fixtures/{ds_name}"))
150
- persist_fixture(fixture_name, data_content)
151
- elif prompt and user_token:
152
- datasource_files = [f for f in os.listdir(Path(folder) / "datasources") if f.endswith(".datasource")]
153
- for datasource_file in datasource_files:
154
- datasource_path = Path(folder) / "datasources" / datasource_file
155
- llm = LLM(user_token=user_token, client=tb_client)
156
- datasource_name = datasource_path.stem
157
- datasource_content = datasource_path.read_text()
158
- has_json_path = "`json:" in datasource_content
159
- if has_json_path:
160
- sql = await llm.generate_sql_sample_data(schema=datasource_content, rows=rows, prompt=prompt)
161
- result = await local_client.query(f"{sql} FORMAT JSON")
162
- data = result.get("data", [])
163
- fixture_name = build_fixture_name(
164
- datasource_path.absolute().as_posix(), datasource_name, datasource_content
165
- )
166
- if data:
167
- persist_fixture(fixture_name, data)
168
- click.echo(FeedbackManager.info(message=f"✓ /fixtures/{datasource_name}"))
92
+ if data:
93
+ ds_name = os.path.basename(data.split(".")[0])
94
+ data_content = Path(data).read_text()
95
+ datasource_path = Path(folder) / "datasources" / f"{ds_name}.datasource"
96
+ fixture_name = build_fixture_name(
97
+ datasource_path.absolute().as_posix(), ds_name, datasource_path.read_text()
98
+ )
99
+ click.echo(FeedbackManager.info(message=f"✓ /fixtures/{ds_name}"))
100
+ persist_fixture(fixture_name, data_content, folder)
101
+ elif prompt and user_token:
102
+ datasource_files = [f for f in os.listdir(Path(folder) / "datasources") if f.endswith(".datasource")]
103
+ for datasource_file in datasource_files:
104
+ datasource_path = Path(folder) / "datasources" / datasource_file
105
+ llm = LLM(user_token=user_token, client=tb_client)
106
+ datasource_name = datasource_path.stem
107
+ datasource_content = datasource_path.read_text()
108
+ has_json_path = "`json:" in datasource_content
109
+ if has_json_path:
110
+ prompt = f"<datasource_schema>{datasource_content}</datasource_schema>\n<user_input>{prompt}</user_input>"
111
+ response = await llm.ask(prompt, system_prompt=mock_prompt(rows))
112
+ sql = extract_xml(response, "sql")
113
+ sql = sql.split("FORMAT")[0]
114
+ result = await local_client.query(f"{sql} FORMAT JSON")
115
+ data = result.get("data", [])
116
+ fixture_name = build_fixture_name(
117
+ datasource_path.absolute().as_posix(), datasource_name, datasource_content
118
+ )
119
+ if data:
120
+ persist_fixture(fixture_name, data, folder)
121
+ click.echo(FeedbackManager.info(message=f"✓ /fixtures/{datasource_name}"))
169
122
 
170
- click.echo(FeedbackManager.success(message="✓ Done!\n"))
123
+ click.echo(FeedbackManager.success(message="✓ Done!\n"))
171
124
  except Exception as e:
172
125
  click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
173
126
 
174
127
 
175
- async def project_create(
128
+ PROJECT_PATHS = ("datasources", "endpoints", "materializations", "copies", "sinks", "fixtures", "tests")
129
+
130
+
131
+ def validate_project_structure(folder: str) -> bool:
132
+ return all((Path(folder) / path).exists() for path in PROJECT_PATHS)
133
+
134
+
135
+ def validate_fixtures(folder: str) -> bool:
136
+ datasource_files = [f for f in os.listdir(Path(folder) / "datasources") if f.endswith(".datasource")]
137
+ return len(datasource_files) > 0
138
+
139
+
140
+ def already_has_cicd(folder: str) -> bool:
141
+ ci_cd_paths = (".gitlab", ".github")
142
+ return any((Path(folder) / path).exists() for path in ci_cd_paths)
143
+
144
+
145
+ def create_project_structure(folder: str):
146
+ folder_path = Path(folder)
147
+ for x in PROJECT_PATHS:
148
+ try:
149
+ f = folder_path / x
150
+ f.mkdir()
151
+ except FileExistsError:
152
+ pass
153
+ click.echo(FeedbackManager.info_path_created(path=x))
154
+
155
+
156
+ async def create_resources(
176
157
  local_client: TinyB,
177
- client: TinyB,
158
+ tb_client: TinyB,
178
159
  user_token: Optional[str],
179
160
  data: Optional[str],
180
161
  prompt: Optional[str],
181
162
  folder: str,
182
163
  ):
183
- project_paths = ["datasources", "endpoints", "materializations", "copies", "sinks", "fixtures", "tests"]
184
164
  force = True
185
- for x in project_paths:
186
- try:
187
- f = Path(folder) / x
188
- f.mkdir()
189
- except FileExistsError:
190
- pass
191
- click.echo(FeedbackManager.info_path_created(path=x))
192
-
165
+ folder_path = Path(folder)
193
166
  if data:
194
- path = Path(folder) / data
167
+ path = folder_path / data
195
168
  format = path.suffix.lstrip(".")
196
- try:
197
- await _generate_datafile(str(path), local_client, format=format, force=force)
198
- except Exception as e:
199
- click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
169
+ await _generate_datafile(str(path), local_client, format=format, force=force)
200
170
  name = data.split(".")[0]
201
171
  generate_pipe_file(
202
172
  f"{name}_endpoint",
@@ -208,21 +178,68 @@ TYPE ENDPOINT
208
178
  """,
209
179
  folder,
210
180
  )
181
+ return True
211
182
  elif prompt and user_token:
212
- try:
213
- llm = LLM(user_token=user_token, client=client)
214
- result = await llm.create_project(prompt)
215
- for ds in result.datasources:
216
- content = ds.content.replace("```", "")
217
- generate_datafile(
218
- content, filename=f"{ds.name}.datasource", data=None, _format="ndjson", force=force, folder=folder
219
- )
183
+ datasource_paths = [
184
+ Path(folder) / "datasources" / f
185
+ for f in os.listdir(Path(folder) / "datasources")
186
+ if f.endswith(".datasource")
187
+ ]
188
+ pipes_paths = [
189
+ Path(folder) / "endpoints" / f for f in os.listdir(Path(folder) / "endpoints") if f.endswith(".pipe")
190
+ ]
191
+ resources_xml = "\n".join(
192
+ [
193
+ f"<resource><type>{resource_type}</type><name>{resource_name}</name><content>{resource_content}</content></resource>"
194
+ for resource_type, resource_name, resource_content in [
195
+ ("datasource", ds.stem, ds.read_text()) for ds in datasource_paths
196
+ ]
197
+ + [
198
+ (
199
+ "pipe",
200
+ pipe.stem,
201
+ pipe.read_text(),
202
+ )
203
+ for pipe in pipes_paths
204
+ ]
205
+ ]
206
+ )
207
+ llm = LLM(user_token=user_token, client=tb_client)
208
+ result = await llm.ask(prompt, system_prompt=create_prompt(resources_xml))
209
+ result = extract_xml(result, "response")
210
+ resources = parse_xml(result, "resource")
211
+ datasources = []
212
+ pipes = []
213
+ for resource_xml in resources:
214
+ resource_type = extract_xml(resource_xml, "type")
215
+ name = extract_xml(resource_xml, "name")
216
+ content = extract_xml(resource_xml, "content")
217
+ resource = {
218
+ "name": name,
219
+ "content": content,
220
+ }
221
+ if resource_type.lower() == "datasource":
222
+ datasources.append(resource)
223
+ elif resource_type.lower() == "pipe":
224
+ pipes.append(resource)
225
+
226
+ for ds in datasources:
227
+ content = ds["content"].replace("```", "")
228
+ filename = f"{ds['name']}.datasource"
229
+ generate_datafile(
230
+ content,
231
+ filename=filename,
232
+ data=None,
233
+ _format="ndjson",
234
+ force=force,
235
+ folder=folder,
236
+ )
220
237
 
221
- for pipe in result.pipes:
222
- content = pipe.content.replace("```", "")
223
- generate_pipe_file(pipe.name, content, folder)
224
- except Exception as e:
225
- click.echo(FeedbackManager.error(message=f"Error: {str(e)}"))
238
+ for pipe in pipes:
239
+ content = pipe["content"].replace("```", "")
240
+ generate_pipe_file(pipe["name"], content, folder)
241
+
242
+ return len(datasources) > 0
226
243
 
227
244
 
228
245
  def init_git(folder: str):
@@ -250,9 +267,3 @@ def generate_pipe_file(name: str, content: str, folder: str):
250
267
  with open(f"{f}", "w") as file:
251
268
  file.write(content)
252
269
  click.echo(FeedbackManager.info_file_created(file=f.relative_to(folder)))
253
-
254
-
255
- def fetch_gist_content(url: str) -> str: # TODO: replace this with a function that fetches the content from a repo
256
- response = requests.get(url)
257
- response.raise_for_status()
258
- return response.text
@@ -722,6 +722,34 @@ async def process(
722
722
  except Exception as e:
723
723
  raise click.ClickException(str(e))
724
724
 
725
+ # datasource
726
+ # {
727
+ # "resource": "datasources",
728
+ # "resource_name": name,
729
+ # "version": doc.version,
730
+ # "params": params,
731
+ # "filename": filename,
732
+ # "deps": deps,
733
+ # "tokens": doc.tokens,
734
+ # "shared_with": doc.shared_with,
735
+ # "filtering_tags": doc.filtering_tags,
736
+ # }
737
+ # pipe
738
+ # {
739
+ # "resource": "pipes",
740
+ # "resource_name": name,
741
+ # "version": doc.version,
742
+ # "filename": filename,
743
+ # "name": name + version,
744
+ # "nodes": nodes,
745
+ # "deps": [x for x in set(deps)],
746
+ # "tokens": doc.tokens,
747
+ # "description": description,
748
+ # "warnings": doc.warnings,
749
+ # "filtering_tags": doc.filtering_tags,
750
+ # }
751
+
752
+ # r is essentially a Datasource or a Pipe in dict shape, like in the comment above
725
753
  for r in res:
726
754
  resource_name = r["resource_name"]
727
755
  warnings = r.get("warnings", [])
@@ -1340,6 +1340,7 @@ def parse(
1340
1340
  "export_compression": assign_var("export_compression"),
1341
1341
  "export_write_strategy": assign_var("export_write_strategy"),
1342
1342
  "export_kafka_topic": assign_var("export_kafka_topic"),
1343
+ "forward_query": sql("forward_query"),
1343
1344
  }
1344
1345
 
1345
1346
  engine_vars = set()
@@ -31,22 +31,26 @@ def build_fixture_name(filename: str, datasource_name: str, datasource_content:
31
31
  return f"{datasource_name}_{hash_str}"
32
32
 
33
33
 
34
- def get_fixture_dir() -> Path:
35
- fixture_dir = Path("fixtures")
34
+ def get_fixture_dir(folder: str) -> Path:
35
+ fixture_dir = Path(folder) / "fixtures"
36
36
  if not fixture_dir.exists():
37
37
  fixture_dir.mkdir()
38
38
  return fixture_dir
39
39
 
40
40
 
41
- def persist_fixture(fixture_name: str, data: Union[List[Dict[str, Any]], str], format="ndjson") -> Path:
42
- fixture_dir = get_fixture_dir()
41
+ def persist_fixture(fixture_name: str, data: Union[List[Dict[str, Any]], str], folder: str, format="ndjson") -> Path:
42
+ fixture_dir = get_fixture_dir(folder)
43
43
  fixture_file = fixture_dir / f"{fixture_name}.{format}"
44
44
  fixture_file.write_text(data if isinstance(data, str) else format_data_to_ndjson(data))
45
45
  return fixture_file
46
46
 
47
47
 
48
- def load_fixture(fixture_name: str, format="ndjson") -> Union[Path, None]:
49
- fixture_dir = get_fixture_dir()
48
+ def load_fixture(
49
+ fixture_name: str,
50
+ folder: str,
51
+ format="ndjson",
52
+ ) -> Union[Path, None]:
53
+ fixture_dir = get_fixture_dir(folder)
50
54
  fixture_file = fixture_dir / f"{fixture_name}.{format}"
51
55
  if not fixture_file.exists():
52
56
  return None
@@ -45,6 +45,8 @@ def parse_pipe(
45
45
  for node in doc.nodes:
46
46
  sql = node.get("sql", "")
47
47
  if sql.strip()[0] == "%":
48
+ # Note(eclbg): not sure what test_mode is for. I think it does something like using placeholder values
49
+ # for the variables in the template.
48
50
  sql, _, variable_warnings = render_sql_template(sql[1:], test_mode=True, name=node["name"])
49
51
  doc.warnings = variable_warnings
50
52
  # it'll fail with a ModuleNotFoundError when the toolset is not available but it returns the parsed doc
@@ -453,7 +453,7 @@ async def generate_datasource(ctx: Context, connector: str, filenames, force: bo
453
453
  """Generate a data source file based on a sample CSV file from local disk or url"""
454
454
  client: TinyB = ctx.ensure_object(dict)["client"]
455
455
 
456
- _connector: Optional["Connector"] = None
456
+ _connector: Optional[Connector] = None
457
457
  if connector:
458
458
  load_connector_config(ctx, connector, False, check_uninstalled=False)
459
459
  if connector not in ctx.ensure_object(dict):