tinybird 0.0.1.dev267__py3-none-any.whl → 0.0.1.dev269__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/tb/__cli__.py +2 -2
- tinybird/tb/modules/agent/agent.py +46 -32
- tinybird/tb/modules/agent/command_agent.py +8 -1
- tinybird/tb/modules/agent/compactor.py +311 -0
- tinybird/tb/modules/agent/explore_agent.py +86 -0
- tinybird/tb/modules/agent/memory.py +11 -1
- tinybird/tb/modules/agent/prompts.py +51 -36
- tinybird/tb/modules/agent/testing_agent.py +8 -1
- tinybird/tb/modules/agent/tools/append.py +16 -6
- tinybird/tb/modules/agent/tools/create_datafile.py +17 -4
- tinybird/tb/modules/agent/tools/execute_query.py +138 -11
- tinybird/tb/modules/agent/tools/mock.py +30 -22
- tinybird/tb/modules/agent/tools/request_endpoint.py +16 -3
- tinybird/tb/modules/agent/tools/run_command.py +3 -1
- tinybird/tb/modules/agent/utils.py +42 -0
- tinybird/tb/modules/cli.py +3 -5
- {tinybird-0.0.1.dev267.dist-info → tinybird-0.0.1.dev269.dist-info}/METADATA +2 -1
- {tinybird-0.0.1.dev267.dist-info → tinybird-0.0.1.dev269.dist-info}/RECORD +21 -21
- tinybird/tb/modules/agent/tools/explore.py +0 -15
- tinybird/tb/modules/agent/tools/preview_datafile.py +0 -24
- {tinybird-0.0.1.dev267.dist-info → tinybird-0.0.1.dev269.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev267.dist-info → tinybird-0.0.1.dev269.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev267.dist-info → tinybird-0.0.1.dev269.dist-info}/top_level.txt +0 -0
|
@@ -51,6 +51,18 @@ Resource dependencies:
|
|
|
51
51
|
sql_instructions = """
|
|
52
52
|
<sql_instructions>
|
|
53
53
|
- The SQL query must be a valid ClickHouse SQL query that mixes ClickHouse syntax and Tinybird templating syntax (Tornado templating language under the hood).
|
|
54
|
+
- Do not use CTEs, only if they return a escalar value, use instead subqueries or nodes if possible.
|
|
55
|
+
- Create multiple nodes to reuse the same query logic instead of using CTEs. Example:
|
|
56
|
+
<example_cte_query_not_do_this> # This is wrong. Create a node instead of the cte first and then reuse it
|
|
57
|
+
WITH my_cte AS (
|
|
58
|
+
SELECT * FROM events WHERE session_id={{String(my_param, "default_value")}}
|
|
59
|
+
)
|
|
60
|
+
SELECT * FROM my_cte
|
|
61
|
+
</example_cte_query_not_do_this>
|
|
62
|
+
- Reusing a node means to query that node as a table in the query. Example:
|
|
63
|
+
<example_not_cte_query_do_this> # This is correct. Create a node instead of the cte first and then reuse it
|
|
64
|
+
SELECT * FROM my_node_1
|
|
65
|
+
</example_not_cte_query_do_this>
|
|
54
66
|
- SQL queries with parameters must start with "%" character and a newline on top of every query to be able to use the parameters. Examples:
|
|
55
67
|
<invalid_query_with_parameters_no_%_on_top>
|
|
56
68
|
SELECT * FROM events WHERE session_id={{String(my_param, "default_value")}}
|
|
@@ -67,21 +79,21 @@ sql_instructions = """
|
|
|
67
79
|
AND timestamp BETWEEN {{DateTime(start_date, now() - interval 30 day)}} AND {{DateTime(end_date, now())}}
|
|
68
80
|
</invalid_condition_with_now>
|
|
69
81
|
<valid_condition_without_now>
|
|
70
|
-
{
|
|
82
|
+
{%if not defined(start_date)%}
|
|
71
83
|
timestamp BETWEEN now() - interval 30 day
|
|
72
|
-
{
|
|
84
|
+
{%else%}
|
|
73
85
|
timestamp BETWEEN {{DateTime(start_date)}}
|
|
74
|
-
{
|
|
75
|
-
{
|
|
86
|
+
{%end%}
|
|
87
|
+
{%if not defined(end_date)%}
|
|
76
88
|
AND now()
|
|
77
|
-
{
|
|
89
|
+
{%else%}
|
|
78
90
|
AND {{DateTime(end_date)}}
|
|
79
|
-
{
|
|
91
|
+
{%end%}
|
|
80
92
|
</valid_condition_without_now>
|
|
81
93
|
- Parameters must not be quoted.
|
|
82
94
|
- When you use defined function with a paremeter inside, do NOT add quotes around the parameter:
|
|
83
|
-
<invalid_defined_function_with_parameter>{
|
|
84
|
-
<valid_defined_function_without_parameter>{
|
|
95
|
+
<invalid_defined_function_with_parameter>{% if defined('my_param') %}</invalid_defined_function_with_parameter>
|
|
96
|
+
<valid_defined_function_without_parameter>{% if defined(my_param) %}</valid_defined_function_without_parameter>
|
|
85
97
|
- Use datasource names as table names when doing SELECT statements.
|
|
86
98
|
- Do not use pipe names as table names.
|
|
87
99
|
- The available datasource names to use in the SQL are the ones present in the existing_resources section or the ones you will create.
|
|
@@ -194,6 +206,18 @@ def get_resource_type(path: Path) -> str:
|
|
|
194
206
|
return "unknown"
|
|
195
207
|
|
|
196
208
|
|
|
209
|
+
explore_data_instructions = """
|
|
210
|
+
# When executing a query:
|
|
211
|
+
- Avoid using the `*` wildcard to avoid returning too much data.
|
|
212
|
+
# When executing a query or calling an endpoint:
|
|
213
|
+
- You need to be sure that the selected resource is updated to the last version in the environment you are working on.
|
|
214
|
+
- Use `diff_resource` tool to compare the content of the resource to compare the differences between environments.
|
|
215
|
+
- Project local file is the source of truth.
|
|
216
|
+
- If the resource is not present or updated to the last version in Tinybird Local, it means you need to build the project.
|
|
217
|
+
- If the resource is not present or updated to the last version in Tinybird Cloud, it means you need to deploy the project.
|
|
218
|
+
- If exploring an endpoint, the response is empty. You can query the tables to understand what data is available.
|
|
219
|
+
"""
|
|
220
|
+
|
|
197
221
|
endpoint_optimization_instructions = """
|
|
198
222
|
<endpoint_optimization_instructions>
|
|
199
223
|
## Endpoint Optimization Instructions
|
|
@@ -626,34 +650,32 @@ You are an interactive CLI tool that helps users with data engineering tasks. Us
|
|
|
626
650
|
|
|
627
651
|
# Tools
|
|
628
652
|
You have access to the following tools:
|
|
629
|
-
1. `
|
|
630
|
-
2. `
|
|
631
|
-
3. `
|
|
632
|
-
4. `
|
|
633
|
-
5. `
|
|
634
|
-
6. `
|
|
635
|
-
7. `
|
|
636
|
-
8. `
|
|
637
|
-
9. `analyze_url` - Analyze the content of an external url.
|
|
653
|
+
1. `create_datafile` - Create a datafile (datasource, endpoint, materialized, sink, copy, connection) in the project folder. Confirmation will be asked by the tool before creating the file.
|
|
654
|
+
2. `plan` - Plan the creation or update of resources.
|
|
655
|
+
3. `build` - Build the project.
|
|
656
|
+
4. `deploy` - Deploy the project to Tinybird Cloud.
|
|
657
|
+
5. `deploy_check` - Check if the project can be deployed to Tinybird Cloud before deploying it.
|
|
658
|
+
6. `mock` - Create mock data for a landing datasource in Tinybird Cloud or Local.
|
|
659
|
+
7. `analyze_file` - Analyze the content of a fixture file present in the project folder.
|
|
660
|
+
8. `analyze_url` - Analyze the content of an external url.
|
|
638
661
|
9. `append_file` - Append a file present in the project to a datasource in Tinybird Cloud or Local.
|
|
639
662
|
10. `append_url` - Append an external url to a datasource in Tinybird Cloud or Local.
|
|
640
663
|
11. `get_endpoint_stats` - Get metrics of the requests to an endpoint.
|
|
641
664
|
12. `get_openapi_definition` - Get the OpenAPI definition for an endpoint in Tinybird Cloud or Local.
|
|
642
|
-
13. `
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
665
|
+
13. `explore_data` - Execute a query or request an endpoint against Tinybird Cloud or Local.
|
|
666
|
+
14. `manage_tests` - Create, update or run tests for an endpoint.
|
|
667
|
+
15. `run_command` - Run a command using the Tinybird CLI.
|
|
668
|
+
16. `diff_resource` - Diff the content of a resource in Tinybird Cloud vs Tinybird Local vs Project local file.
|
|
669
|
+
17. `rename_datafile_or_fixture` - Rename a datafile or fixture.
|
|
647
670
|
|
|
648
671
|
# When creating or updating datafiles:
|
|
649
672
|
1. Use `plan` tool to plan the creation, update or rename of resources.
|
|
650
673
|
2. If the user confirms the plan, go from 3 to 7 steps until all the resources are created, updated or skipped.
|
|
651
|
-
3.
|
|
652
|
-
4.
|
|
653
|
-
5.
|
|
654
|
-
6. If the datafile was created
|
|
655
|
-
7. If the datafile was
|
|
656
|
-
8. If the datafile was created successfully, but the built failed, try to fix the error and repeat the process.
|
|
674
|
+
3. Without asking, use the `create_datafile` tool to create the datafile, because it will ask for confirmation before creating the file.
|
|
675
|
+
4. Check the result of the `create_datafile` tool to see if the datafile was created successfully.
|
|
676
|
+
5. If the datafile was created successfully, report the result to the user.
|
|
677
|
+
6. If the datafile was not created, finish the process and just wait for a new user prompt.
|
|
678
|
+
7. If the datafile was created successfully, but the built failed, try to fix the error and repeat the process.
|
|
657
679
|
|
|
658
680
|
# When creating a landing datasource given a .ndjson file:
|
|
659
681
|
- If the user does not specify anything about the desired schema, create a schema like this (sorting key not needed in this case)
|
|
@@ -706,14 +728,7 @@ Kafka: {kafka_connection_example}
|
|
|
706
728
|
S3: {s3_connection_example}
|
|
707
729
|
GCS: {gcs_connection_example}
|
|
708
730
|
|
|
709
|
-
|
|
710
|
-
- You need to be sure that the selected resource is updated to the last version in the environment you are working on.
|
|
711
|
-
- Use `diff_resource` tool to compare the content of the resource to compare the differences between environments.
|
|
712
|
-
- Project local file is the source of truth.
|
|
713
|
-
- If the resource is not present or updated to the last version in Tinybird Local, it means you need to build the project.
|
|
714
|
-
- If the resource is not present or updated to the last version in Tinybird Cloud, it means you need to deploy the project.
|
|
715
|
-
- If exploring an endpoint, the response is empty. You can query the tables to understand what data is available.
|
|
716
|
-
|
|
731
|
+
{explore_data_instructions}
|
|
717
732
|
|
|
718
733
|
# How to use apppend tools:
|
|
719
734
|
- Use append as part of the creation of a new landing datasource if the user provided a file or an external url
|
|
@@ -26,6 +26,7 @@ class TestingAgent:
|
|
|
26
26
|
self.token = token
|
|
27
27
|
self.user_token = user_token
|
|
28
28
|
self.host = host
|
|
29
|
+
self.workspace_id = workspace_id
|
|
29
30
|
self.dangerously_skip_permissions = dangerously_skip_permissions or prompt_mode
|
|
30
31
|
self.project = project
|
|
31
32
|
self.thinking_animation = thinking_animation
|
|
@@ -56,7 +57,13 @@ You can do the following:
|
|
|
56
57
|
return tests_files_prompt(self.project)
|
|
57
58
|
|
|
58
59
|
def run(self, task: str, deps: TinybirdAgentContext, usage: Usage):
|
|
59
|
-
result = self.agent.run_sync(
|
|
60
|
+
result = self.agent.run_sync(
|
|
61
|
+
task,
|
|
62
|
+
deps=deps,
|
|
63
|
+
usage=usage,
|
|
64
|
+
message_history=self.messages,
|
|
65
|
+
model=create_model(self.user_token, self.host, self.workspace_id, run_id=deps.run_id),
|
|
66
|
+
)
|
|
60
67
|
new_messages = result.new_messages()
|
|
61
68
|
self.messages.extend(new_messages)
|
|
62
69
|
return result
|
|
@@ -10,6 +10,7 @@ from tinybird.tb.modules.agent.utils import (
|
|
|
10
10
|
show_confirmation,
|
|
11
11
|
show_input,
|
|
12
12
|
)
|
|
13
|
+
from tinybird.tb.modules.common import echo_safe_humanfriendly_tables_format_pretty_table
|
|
13
14
|
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
14
15
|
|
|
15
16
|
|
|
@@ -113,20 +114,29 @@ def append_url(
|
|
|
113
114
|
return f"Error appending URL {fixture_url} to {datasource_name} in Tinybird {cloud_or_local}: {error_message}"
|
|
114
115
|
|
|
115
116
|
|
|
116
|
-
def handle_quarantine_error(
|
|
117
|
+
def handle_quarantine_error(
|
|
118
|
+
ctx: RunContext[TinybirdAgentContext], error_message: str, datasource_name: str, cloud: bool = False
|
|
119
|
+
) -> str:
|
|
117
120
|
try:
|
|
118
121
|
if "in quarantine" in error_message:
|
|
122
|
+
cloud_or_local = "Cloud" if cloud else "Local"
|
|
119
123
|
click.echo(FeedbackManager.highlight(message=f"» Looking for errors in {datasource_name}_quarantine..."))
|
|
120
|
-
query =
|
|
121
|
-
|
|
124
|
+
query = f"select * from {datasource_name}_quarantine order by insertion_date desc limit 5 FORMAT JSON"
|
|
125
|
+
result = ctx.deps.execute_query_cloud(query=query) if cloud else ctx.deps.execute_query_local(query=query)
|
|
126
|
+
quarantine_data = result["data"] or []
|
|
127
|
+
quarantine_meta = result["meta"] or []
|
|
128
|
+
column_names = [c["name"] for c in quarantine_meta]
|
|
129
|
+
echo_safe_humanfriendly_tables_format_pretty_table(
|
|
130
|
+
data=[d.values() for d in quarantine_data], column_names=column_names
|
|
122
131
|
)
|
|
123
|
-
quarantine_data = ctx.deps.execute_query_local(query=query)
|
|
124
132
|
error_message = (
|
|
125
133
|
error_message
|
|
126
|
-
+ f"\nThese are the first 5 rows of the quarantine table for datasource '{datasource_name}':\n{quarantine_data}
|
|
134
|
+
+ f"\nThese are the first 5 rows of the quarantine table for datasource '{datasource_name}' in {cloud_or_local}:\n{quarantine_data}"
|
|
127
135
|
)
|
|
128
136
|
|
|
129
137
|
except Exception as quarantine_error:
|
|
130
|
-
error_message =
|
|
138
|
+
error_message = (
|
|
139
|
+
error_message + f"\nError accessing to {datasource_name}_quarantine in {cloud_or_local}: {quarantine_error}"
|
|
140
|
+
)
|
|
131
141
|
|
|
132
142
|
return error_message
|
|
@@ -16,17 +16,30 @@ from tinybird.tb.modules.exceptions import CLIBuildException
|
|
|
16
16
|
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def create_datafile(
|
|
20
|
-
|
|
19
|
+
def create_datafile(
|
|
20
|
+
ctx: RunContext[TinybirdAgentContext], name: str, type: str, description: str, content: str, pathname: str
|
|
21
|
+
) -> str:
|
|
22
|
+
"""Given a resource representation, create or update a datafile in the project folder
|
|
21
23
|
|
|
22
24
|
Args:
|
|
23
|
-
|
|
25
|
+
name (str): The name of the datafile. Required.
|
|
26
|
+
type (str): The type of the datafile. Options: datasource, endpoint, materialized, sink, copy, connection. Required.
|
|
27
|
+
description (str): The description of the datafile. Required.
|
|
28
|
+
content (str): The content of the datafile. Required.
|
|
29
|
+
pathname (str): The pathname of the datafile where the file will be created or it is already located. If it is a new datafile, always include the parent folder depending on the type of the datafile. Required.
|
|
24
30
|
|
|
25
31
|
Returns:
|
|
26
32
|
str: If the resource was created or not.
|
|
27
33
|
"""
|
|
28
34
|
try:
|
|
29
35
|
ctx.deps.thinking_animation.stop()
|
|
36
|
+
resource = Datafile(
|
|
37
|
+
type=type.lower(),
|
|
38
|
+
name=name,
|
|
39
|
+
content=content,
|
|
40
|
+
description=description,
|
|
41
|
+
pathname=pathname,
|
|
42
|
+
)
|
|
30
43
|
resource.pathname = resource.pathname.removeprefix("/")
|
|
31
44
|
path = Path(ctx.deps.folder) / resource.pathname
|
|
32
45
|
content = resource.content
|
|
@@ -63,7 +76,7 @@ def create_datafile(ctx: RunContext[TinybirdAgentContext], resource: Datafile) -
|
|
|
63
76
|
ctx.deps.thinking_animation.stop()
|
|
64
77
|
click.echo(FeedbackManager.error(message=e))
|
|
65
78
|
ctx.deps.thinking_animation.start()
|
|
66
|
-
return f"Error building project: {e}"
|
|
79
|
+
return f"Error building project: {e}. If the error is related to another resource, fix it and try again."
|
|
67
80
|
except Exception as e:
|
|
68
81
|
ctx.deps.thinking_animation.stop()
|
|
69
82
|
click.echo(FeedbackManager.error(message=e))
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
1
7
|
import click
|
|
2
8
|
import humanfriendly
|
|
3
9
|
from pydantic_ai import RunContext
|
|
@@ -20,55 +26,115 @@ forbidden_commands = [
|
|
|
20
26
|
"information_schema.tables",
|
|
21
27
|
]
|
|
22
28
|
|
|
29
|
+
forbidden_commands_start_with = [
|
|
30
|
+
"describe",
|
|
31
|
+
]
|
|
32
|
+
|
|
23
33
|
|
|
24
|
-
def execute_query(
|
|
25
|
-
|
|
34
|
+
def execute_query(
|
|
35
|
+
ctx: RunContext[TinybirdAgentContext],
|
|
36
|
+
query: str,
|
|
37
|
+
task: str,
|
|
38
|
+
cloud: bool = False,
|
|
39
|
+
script: Optional[str] = None,
|
|
40
|
+
export_format: Optional[str] = None,
|
|
41
|
+
):
|
|
42
|
+
"""Execute a query and return the result as a table, chart or exported file.
|
|
26
43
|
|
|
27
44
|
Args:
|
|
28
45
|
query (str): The query to execute. Required.
|
|
29
46
|
task (str): The purpose of the query. Required.
|
|
30
47
|
cloud (bool): Whether to execute the query on cloud or local. Optional.
|
|
48
|
+
script (str): Python script using plotext to render the query results as a chart. The script will have access to 'data' (list of dicts), 'meta' (list of column info dicts), 'terminal_width' and 'terminal_height' variables. Always use plt.theme("clear") for transparent background and plt.plot_size(terminal_width, terminal_height) for proper sizing. For bar charts, use the simple versions: plt.simple_bar(), plt.simple_multiple_bar(), and plt.simple_stacked_bar(). Optional.
|
|
49
|
+
export_format (str): The format to export the query results to. Options: csv, json, ndjson. Optional.
|
|
31
50
|
|
|
32
51
|
Returns:
|
|
33
52
|
str: The result of the query.
|
|
34
53
|
"""
|
|
54
|
+
|
|
35
55
|
try:
|
|
36
56
|
for forbidden_command in forbidden_commands:
|
|
37
57
|
if forbidden_command in query.lower():
|
|
38
58
|
return f"Error executing query: {forbidden_command} is not allowed."
|
|
39
59
|
|
|
60
|
+
for forbidden_command in forbidden_commands_start_with:
|
|
61
|
+
if query.lower().startswith(forbidden_command):
|
|
62
|
+
return f"Error executing query: {forbidden_command} is not allowed."
|
|
63
|
+
|
|
40
64
|
cloud_or_local = "cloud" if cloud else "local"
|
|
41
65
|
ctx.deps.thinking_animation.stop()
|
|
42
66
|
|
|
43
67
|
click.echo(FeedbackManager.highlight(message=f"» Executing query to {cloud_or_local}:\n{query}\n"))
|
|
44
68
|
|
|
45
69
|
is_templating = query.strip().startswith("%")
|
|
46
|
-
query_format = "
|
|
70
|
+
query_format = "JSON"
|
|
71
|
+
if export_format == "csv":
|
|
72
|
+
query_format = "CSVWithNames"
|
|
73
|
+
elif export_format == "ndjson":
|
|
74
|
+
query_format = "JSONEachRow"
|
|
75
|
+
elif export_format == "json":
|
|
76
|
+
query_format = "JSON"
|
|
77
|
+
|
|
47
78
|
if is_templating:
|
|
48
79
|
query = query.strip()
|
|
49
|
-
query = f"%\nSELECT * FROM ({query}) {query_format}"
|
|
80
|
+
query = f"%\nSELECT * FROM ({query}) FORMAT {query_format}"
|
|
50
81
|
else:
|
|
51
|
-
query = f"SELECT * FROM ({query}) {query_format}"
|
|
82
|
+
query = f"SELECT * FROM ({query}) FORMAT {query_format}"
|
|
52
83
|
|
|
53
84
|
execute_query = ctx.deps.execute_query_cloud if cloud else ctx.deps.execute_query_local
|
|
54
85
|
result = execute_query(query=query)
|
|
86
|
+
if export_format:
|
|
87
|
+
file_extension = f".{export_format}"
|
|
88
|
+
filename = f"export_{export_format}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
89
|
+
if not filename.endswith(file_extension):
|
|
90
|
+
filename = f"{filename}{file_extension}"
|
|
91
|
+
|
|
92
|
+
file_path = Path(ctx.deps.folder) / filename
|
|
93
|
+
|
|
94
|
+
# Ensure directory exists
|
|
95
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
|
|
97
|
+
# Write raw ClickHouse formatted data directly to file
|
|
98
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
99
|
+
if export_format == "json":
|
|
100
|
+
content = json.dumps(result)
|
|
101
|
+
else:
|
|
102
|
+
content = str(result)
|
|
103
|
+
f.write(content)
|
|
104
|
+
ctx.deps.thinking_animation.start()
|
|
105
|
+
return f"Successfully exported data to {file_path} ({export_format.upper()} format)"
|
|
106
|
+
|
|
55
107
|
stats = result["statistics"]
|
|
56
108
|
seconds = stats["elapsed"]
|
|
57
109
|
rows_read = humanfriendly.format_number(stats["rows_read"])
|
|
58
110
|
bytes_read = humanfriendly.format_size(stats["bytes_read"])
|
|
59
111
|
|
|
60
112
|
click.echo(FeedbackManager.info_query_stats(seconds=seconds, rows=rows_read, bytes=bytes_read))
|
|
113
|
+
click.echo()
|
|
61
114
|
|
|
62
115
|
if not result["data"]:
|
|
63
116
|
click.echo(FeedbackManager.info_no_rows())
|
|
64
117
|
else:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
118
|
+
if script:
|
|
119
|
+
try:
|
|
120
|
+
# Execute the LLM-generated plotext script
|
|
121
|
+
chart_output = _execute_plotext_script(script, result["data"], result["meta"])
|
|
122
|
+
click.echo(chart_output)
|
|
123
|
+
except Exception as script_error:
|
|
124
|
+
click.echo(
|
|
125
|
+
FeedbackManager.error(message=f"There was an error rendering the chart.\n{script_error}")
|
|
126
|
+
)
|
|
127
|
+
ctx.deps.thinking_animation.start()
|
|
128
|
+
return f"After executing the query: {query}, there was an error rendering the chart: {script_error}. Fix the script and render the chart again."
|
|
129
|
+
else:
|
|
130
|
+
echo_safe_humanfriendly_tables_format_pretty_table(
|
|
131
|
+
data=[d.values() for d in result["data"]], column_names=result["data"][0].keys()
|
|
132
|
+
)
|
|
133
|
+
click.echo("Showing first 10 results\n")
|
|
134
|
+
|
|
69
135
|
ctx.deps.thinking_animation.start()
|
|
70
|
-
|
|
71
|
-
return f"Result for task '{task}' in {cloud_or_local} environment: {result}. The user is being shown the
|
|
136
|
+
display_format = "chart" if script else "table"
|
|
137
|
+
return f"Result for task '{task}' in {cloud_or_local} environment: {result}. The user is being shown the result as a {display_format} in the console, so do not render that again."
|
|
72
138
|
except Exception as e:
|
|
73
139
|
error = str(e)
|
|
74
140
|
ctx.deps.thinking_animation.stop()
|
|
@@ -78,3 +144,64 @@ def execute_query(ctx: RunContext[TinybirdAgentContext], query: str, task: str,
|
|
|
78
144
|
return f"Error executing query: {error}. Please run the query against Tinybird local instead of cloud."
|
|
79
145
|
else:
|
|
80
146
|
return f"Error executing query: {error}. Please try again."
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _execute_plotext_script(script: str, data: List[Dict[str, Any]], meta: List[Dict[str, str]]) -> str:
|
|
150
|
+
"""Execute a plotext script with the provided data using exec().
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
script: Python script using plotext
|
|
154
|
+
data: Query result data
|
|
155
|
+
meta: Query result metadata
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Chart output as string
|
|
159
|
+
"""
|
|
160
|
+
import io
|
|
161
|
+
from contextlib import redirect_stdout
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
# Capture stdout
|
|
165
|
+
output = io.StringIO()
|
|
166
|
+
|
|
167
|
+
# Prepare globals with data and required imports
|
|
168
|
+
script_globals = {
|
|
169
|
+
"data": data,
|
|
170
|
+
"meta": meta,
|
|
171
|
+
"__builtins__": __builtins__,
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# Import required modules into the script namespace
|
|
175
|
+
exec("import plotext as plt", script_globals)
|
|
176
|
+
exec("import json", script_globals)
|
|
177
|
+
exec("from datetime import datetime", script_globals)
|
|
178
|
+
exec("import re", script_globals)
|
|
179
|
+
exec("import os", script_globals)
|
|
180
|
+
|
|
181
|
+
# Clear any previous plot data to prevent chart reuse
|
|
182
|
+
exec("plt.clear_data()", script_globals)
|
|
183
|
+
|
|
184
|
+
# Get terminal dimensions and make them available
|
|
185
|
+
try:
|
|
186
|
+
terminal_size = os.get_terminal_size()
|
|
187
|
+
terminal_width = terminal_size.columns
|
|
188
|
+
terminal_height = max(20, terminal_size.lines // 3) # Use 1/3 of terminal height, min 20
|
|
189
|
+
except:
|
|
190
|
+
terminal_width = 80
|
|
191
|
+
terminal_height = 20
|
|
192
|
+
|
|
193
|
+
script_globals["terminal_width"] = terminal_width
|
|
194
|
+
script_globals["terminal_height"] = terminal_height
|
|
195
|
+
|
|
196
|
+
# Execute the user script with stdout capture
|
|
197
|
+
with redirect_stdout(output):
|
|
198
|
+
exec(script, script_globals)
|
|
199
|
+
|
|
200
|
+
# Clean up after rendering to prevent state leakage
|
|
201
|
+
exec("plt.clear_data()", script_globals)
|
|
202
|
+
exec("plt.clear_figure()", script_globals)
|
|
203
|
+
|
|
204
|
+
return output.getvalue()
|
|
205
|
+
|
|
206
|
+
except Exception as e:
|
|
207
|
+
raise Exception(f"Script execution error: {str(e)}")
|
|
@@ -1,36 +1,34 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
1
|
import click
|
|
4
2
|
from pydantic_ai import RunContext
|
|
5
3
|
|
|
6
|
-
from tinybird.tb.modules.agent.utils import
|
|
4
|
+
from tinybird.tb.modules.agent.utils import (
|
|
5
|
+
AgentRunCancelled,
|
|
6
|
+
TinybirdAgentContext,
|
|
7
|
+
show_confirmation,
|
|
8
|
+
show_input,
|
|
9
|
+
)
|
|
10
|
+
from tinybird.tb.modules.common import echo_safe_humanfriendly_tables_format_pretty_table
|
|
7
11
|
from tinybird.tb.modules.datafile.fixture import persist_fixture
|
|
8
12
|
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
def mock(
|
|
12
|
-
ctx: RunContext[TinybirdAgentContext],
|
|
13
|
-
datasource_name: str,
|
|
14
|
-
data_format: str,
|
|
15
|
-
rows: int,
|
|
16
|
-
context: Optional[str] = None,
|
|
17
|
-
cloud: bool = False,
|
|
16
|
+
ctx: RunContext[TinybirdAgentContext], datasource_name: str, data_format: str, rows: int, description: str
|
|
18
17
|
) -> str:
|
|
19
18
|
"""Create mock data for a datasource
|
|
20
19
|
|
|
21
20
|
Args:
|
|
22
|
-
datasource_name: Name of the datasource to create mock data for
|
|
23
|
-
data_format: Format of the mock data to create. Options: ndjson, csv
|
|
24
|
-
rows: Number of rows to create. If not provided, the default is 10
|
|
25
|
-
|
|
26
|
-
cloud: Whether to generate the mock data in the cloud or local environment. Optional.
|
|
21
|
+
datasource_name (str): Name of the datasource to create mock data for. Required.
|
|
22
|
+
data_format (str): Format of the mock data to create. Options: ndjson, csv. Required.
|
|
23
|
+
rows (int): Number of rows to create. If not provided, the default is 10. Required.
|
|
24
|
+
description (str): Extra details about how to generate the mock data (nested json if any, sample row to help with the generation, etc). You can use this to fix issues with the mock data generation. Required.
|
|
27
25
|
|
|
28
26
|
Returns:
|
|
29
27
|
str: Message indicating the success or failure of the mock data generation
|
|
30
28
|
"""
|
|
31
29
|
try:
|
|
32
30
|
ctx.deps.thinking_animation.stop()
|
|
33
|
-
cloud_or_local = "
|
|
31
|
+
cloud_or_local = "Local"
|
|
34
32
|
confirmation = show_confirmation(
|
|
35
33
|
title=f"Generate mock data for datasource '{datasource_name}' in Tinybird {cloud_or_local}?",
|
|
36
34
|
skip_confirmation=ctx.deps.dangerously_skip_permissions,
|
|
@@ -42,12 +40,11 @@ def mock(
|
|
|
42
40
|
return f"User did not confirm mock data for datasource '{datasource_name}' in Tinybird {cloud_or_local} and gave the following feedback: {feedback}"
|
|
43
41
|
|
|
44
42
|
click.echo(FeedbackManager.highlight(message=f"» Generating mock data for {datasource_name}..."))
|
|
45
|
-
data = ctx.deps.mock_data(
|
|
43
|
+
data = ctx.deps.mock_data(
|
|
44
|
+
datasource_name=datasource_name, data_format=data_format, rows=rows, context=description
|
|
45
|
+
)
|
|
46
46
|
fixture_path = persist_fixture(datasource_name, data, ctx.deps.folder, format=data_format)
|
|
47
|
-
|
|
48
|
-
ctx.deps.append_data_cloud(datasource_name=datasource_name, path=str(fixture_path))
|
|
49
|
-
else:
|
|
50
|
-
ctx.deps.append_data_local(datasource_name=datasource_name, path=str(fixture_path))
|
|
47
|
+
ctx.deps.append_data_local(datasource_name=datasource_name, path=str(fixture_path))
|
|
51
48
|
click.echo(FeedbackManager.success(message=f"✓ Data generated for {datasource_name}"))
|
|
52
49
|
ctx.deps.thinking_animation.start()
|
|
53
50
|
return f"Mock data generated successfully for datasource '{datasource_name}' in Tinybird {cloud_or_local}"
|
|
@@ -62,8 +59,19 @@ def mock(
|
|
|
62
59
|
click.echo(
|
|
63
60
|
FeedbackManager.highlight(message=f"» Looking for errors in {datasource_name}_quarantine...")
|
|
64
61
|
)
|
|
65
|
-
query = f"select * from {datasource_name}_quarantine order by insertion_date desc limit 5 FORMAT
|
|
66
|
-
|
|
62
|
+
query = f"select * from {datasource_name}_quarantine order by insertion_date desc limit 5 FORMAT JSON"
|
|
63
|
+
quarantine_result = ctx.deps.execute_query_local(query=query)
|
|
64
|
+
quarantine_data = quarantine_result["data"] or []
|
|
65
|
+
quarantine_meta = quarantine_result["meta"] or []
|
|
66
|
+
column_names = [c["name"] for c in quarantine_meta]
|
|
67
|
+
echo_safe_humanfriendly_tables_format_pretty_table(
|
|
68
|
+
data=[d.values() for d in quarantine_data], column_names=column_names
|
|
69
|
+
)
|
|
70
|
+
click.echo(
|
|
71
|
+
FeedbackManager.info(
|
|
72
|
+
message=f"These are the first 5 rows of the quarantine table for datasource '{datasource_name}':"
|
|
73
|
+
)
|
|
74
|
+
)
|
|
67
75
|
error_message = (
|
|
68
76
|
error_message
|
|
69
77
|
+ f"\nThese are the first 5 rows of the quarantine table for datasource '{datasource_name}':\n{quarantine_data}. Use again `mock` tool but add this issue to the context."
|
|
@@ -4,7 +4,7 @@ import click
|
|
|
4
4
|
import humanfriendly
|
|
5
5
|
from pydantic_ai import RunContext
|
|
6
6
|
|
|
7
|
-
from tinybird.tb.modules.agent.utils import TinybirdAgentContext
|
|
7
|
+
from tinybird.tb.modules.agent.utils import TinybirdAgentContext, limit_result_output
|
|
8
8
|
from tinybird.tb.modules.common import echo_safe_humanfriendly_tables_format_pretty_table
|
|
9
9
|
from tinybird.tb.modules.feedback_manager import FeedbackManager
|
|
10
10
|
|
|
@@ -37,6 +37,10 @@ def request_endpoint(
|
|
|
37
37
|
|
|
38
38
|
request_endpoint = ctx.deps.request_endpoint_cloud if cloud else ctx.deps.request_endpoint_local
|
|
39
39
|
result = request_endpoint(endpoint_name=endpoint_name, params=params)
|
|
40
|
+
|
|
41
|
+
# Apply output limiting using the utility function
|
|
42
|
+
result, truncated_columns = limit_result_output(result)
|
|
43
|
+
|
|
40
44
|
stats = result["statistics"]
|
|
41
45
|
seconds = stats["elapsed"]
|
|
42
46
|
rows_read = humanfriendly.format_number(stats["rows_read"])
|
|
@@ -49,11 +53,20 @@ def request_endpoint(
|
|
|
49
53
|
click.echo(FeedbackManager.info_no_rows())
|
|
50
54
|
else:
|
|
51
55
|
echo_safe_humanfriendly_tables_format_pretty_table(
|
|
52
|
-
data=[d.values() for d in result["data"]
|
|
56
|
+
data=[d.values() for d in result["data"]], column_names=result["data"][0].keys()
|
|
53
57
|
)
|
|
54
58
|
click.echo("Showing first 10 results\n")
|
|
59
|
+
|
|
60
|
+
# Prepare return message with truncation info
|
|
61
|
+
truncation_info = ""
|
|
62
|
+
if truncated_columns:
|
|
63
|
+
truncated_list = ", ".join(sorted(truncated_columns))
|
|
64
|
+
truncation_info = (
|
|
65
|
+
f" Note: The following columns had values truncated due to length > 200 characters: {truncated_list}."
|
|
66
|
+
)
|
|
67
|
+
|
|
55
68
|
ctx.deps.thinking_animation.start()
|
|
56
|
-
return f"Result for endpoint {endpoint_name} with params {params} in {cloud_or_local} environment: {result}. Do not show result is already shown in the console."
|
|
69
|
+
return f"Result for endpoint {endpoint_name} with params {params} in {cloud_or_local} environment: {result}. Do not show result is already shown in the console.{truncation_info}"
|
|
57
70
|
except Exception as e:
|
|
58
71
|
error = str(e)
|
|
59
72
|
ctx.deps.thinking_animation.stop()
|
|
@@ -15,8 +15,10 @@ def run_command(ctx: RunContext[TinybirdAgentContext], command: str):
|
|
|
15
15
|
"""
|
|
16
16
|
try:
|
|
17
17
|
ctx.deps.thinking_animation.stop()
|
|
18
|
+
is_deploy = " deploy " in command.lower()
|
|
18
19
|
confirmation = show_confirmation(
|
|
19
|
-
title=f"Run command: {command}?",
|
|
20
|
+
title=f"Run command: {command}?",
|
|
21
|
+
skip_confirmation=ctx.deps.dangerously_skip_permissions and not is_deploy,
|
|
20
22
|
)
|
|
21
23
|
|
|
22
24
|
if confirmation == "review":
|
|
@@ -36,6 +36,7 @@ except ImportError:
|
|
|
36
36
|
|
|
37
37
|
class TinybirdAgentContext(BaseModel):
|
|
38
38
|
folder: str
|
|
39
|
+
workspace_id: str
|
|
39
40
|
workspace_name: str
|
|
40
41
|
thinking_animation: Any
|
|
41
42
|
get_project_files: Callable[[], List[str]]
|
|
@@ -66,6 +67,7 @@ class TinybirdAgentContext(BaseModel):
|
|
|
66
67
|
host: str
|
|
67
68
|
local_host: str
|
|
68
69
|
local_token: str
|
|
70
|
+
run_id: Optional[str] = None
|
|
69
71
|
|
|
70
72
|
|
|
71
73
|
default_style = PromptStyle.from_dict(
|
|
@@ -792,3 +794,43 @@ def _is_path_inside_project(file_path: Path, project_path: Path) -> bool:
|
|
|
792
794
|
return True
|
|
793
795
|
except ValueError:
|
|
794
796
|
return False
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def limit_result_output(
|
|
800
|
+
result: dict[str, Any], max_rows: int = 10, max_column_length: int = 200
|
|
801
|
+
) -> tuple[dict[str, Any], set[str]]:
|
|
802
|
+
"""
|
|
803
|
+
Limit result output by truncating column values and limiting number of rows.
|
|
804
|
+
Modifies the result dict in place and returns truncation info.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
result: Result dictionary containing 'data' key with list of row dictionaries
|
|
808
|
+
max_rows: Maximum number of rows to return
|
|
809
|
+
max_column_length: Maximum length for column values before truncation
|
|
810
|
+
|
|
811
|
+
Returns:
|
|
812
|
+
Tuple of (modified_result, truncated_columns_set)
|
|
813
|
+
"""
|
|
814
|
+
truncated_columns: set[str] = set()
|
|
815
|
+
|
|
816
|
+
# Handle case where data doesn't exist or is empty
|
|
817
|
+
if not result.get("data"):
|
|
818
|
+
return result, truncated_columns
|
|
819
|
+
|
|
820
|
+
result_data = result["data"]
|
|
821
|
+
|
|
822
|
+
# Limit to max_rows
|
|
823
|
+
limited_data = result_data[:max_rows]
|
|
824
|
+
|
|
825
|
+
# Truncate column values and track which columns were truncated
|
|
826
|
+
for row in limited_data:
|
|
827
|
+
for column, value in row.items():
|
|
828
|
+
value_str = str(value)
|
|
829
|
+
if len(value_str) > max_column_length:
|
|
830
|
+
row[column] = value_str[:max_column_length] + "..."
|
|
831
|
+
truncated_columns.add(column)
|
|
832
|
+
|
|
833
|
+
# Update the result dict with limited data
|
|
834
|
+
result["data"] = limited_data
|
|
835
|
+
|
|
836
|
+
return result, truncated_columns
|