tinybird 0.0.1.dev29__py3-none-any.whl → 0.0.1.dev30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/client.py +1 -1
- tinybird/prompts.py +218 -325
- tinybird/tb/__cli__.py +2 -2
- tinybird/tb/cli.py +2 -1
- tinybird/tb/modules/build.py +61 -201
- tinybird/tb/modules/build_client.py +219 -0
- tinybird/tb/modules/cli.py +52 -16
- tinybird/tb/modules/common.py +1 -26
- tinybird/tb/modules/config.py +0 -8
- tinybird/tb/modules/create.py +20 -1
- tinybird/tb/modules/datafile/build.py +2 -2
- tinybird/tb/modules/datafile/build_pipe.py +13 -1
- tinybird/tb/modules/datasource.py +1 -1
- tinybird/tb/modules/llm.py +19 -4
- tinybird/tb/modules/local.py +1 -1
- tinybird/tb/modules/login.py +7 -2
- tinybird/tb/modules/shell.py +2 -2
- tinybird/tb/modules/test.py +41 -22
- tinybird/tb/modules/update.py +182 -0
- {tinybird-0.0.1.dev29.dist-info → tinybird-0.0.1.dev30.dist-info}/METADATA +2 -1
- {tinybird-0.0.1.dev29.dist-info → tinybird-0.0.1.dev30.dist-info}/RECORD +24 -23
- tinybird/tb/modules/build_server.py +0 -75
- {tinybird-0.0.1.dev29.dist-info → tinybird-0.0.1.dev30.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev29.dist-info → tinybird-0.0.1.dev30.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev29.dist-info → tinybird-0.0.1.dev30.dist-info}/top_level.txt +0 -0
tinybird/client.py
CHANGED
tinybird/prompts.py
CHANGED
|
@@ -358,91 +358,75 @@ aggregate_functions = [
|
|
|
358
358
|
]
|
|
359
359
|
|
|
360
360
|
|
|
361
|
-
|
|
362
|
-
You are a Tinybird expert. You will be given a
|
|
363
|
-
<
|
|
364
|
-
name
|
|
365
|
-
content
|
|
361
|
+
test_create_prompt = """
|
|
362
|
+
You are a Tinybird expert. You will be given a pipe containing different nodes with SQL and Tinybird templating syntax. You will generate URLs to test it with different parameters combinations.
|
|
363
|
+
<pipe>
|
|
364
|
+
<name>{name}</name>
|
|
365
|
+
<content>{content}</content>
|
|
366
|
+
<parameters>{parameters}</parameters>
|
|
367
|
+
</pipe>
|
|
366
368
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
369
|
+
<instructions>
|
|
370
|
+
- Every test name must be unique.
|
|
371
|
+
- The test command must be a valid Tinybird command that can be run in the terminal.
|
|
372
|
+
- The test command can have as many parameters as are needed to test the pipe.
|
|
373
|
+
- The parameter within Tinybird templating syntax looks like this one {{String(my_param_name, default_value)}}.
|
|
374
|
+
- If there are no parameters, you can omit parameters and generate a single test command.
|
|
375
|
+
- The format of the parameters is the following: ?param1=value1¶m2=value2¶m3=value3
|
|
376
|
+
</instructions>
|
|
370
377
|
|
|
371
|
-
|
|
372
|
-
`<column_name_1>` <clickhouse_tinybird_compatible_data_type> `json:$.<column_name_1>`,
|
|
373
|
-
`<column_name_2>` <clickhouse_tinybird_compatible_data_type> `json:$.<column_name_2>`,
|
|
374
|
-
...
|
|
375
|
-
`<column_name_n>` <clickhouse_tinybird_compatible_data_type> `json:$.<column_name_n>`
|
|
378
|
+
Follow the instructions and generate the following response with no additional text:
|
|
376
379
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
</
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
```
|
|
386
|
-
DESCRIPTION >
|
|
387
|
-
Some meaningful description of the pipe
|
|
380
|
+
<response>
|
|
381
|
+
<test>
|
|
382
|
+
<name>[test name here]</name>
|
|
383
|
+
<description>[test description here]</description>
|
|
384
|
+
<parameters>[parameters here]</parameters>
|
|
385
|
+
</test>
|
|
386
|
+
</response>
|
|
387
|
+
"""
|
|
388
388
|
|
|
389
|
-
NODE node_1
|
|
390
|
-
SQL >
|
|
391
|
-
<sql_query_using_clickhouse_syntax_and_tinybird_templating_syntax>
|
|
392
389
|
|
|
393
|
-
|
|
390
|
+
def create_prompt(existing_resources: str) -> str:
|
|
391
|
+
return """
|
|
392
|
+
You are a Tinybird expert. You will be given a prompt to generate Tinybird resources: datasources and/or pipes.
|
|
393
|
+
<existing_resources>{existing_resources}</existing_resources>
|
|
394
|
+
{datasource_instructions}
|
|
395
|
+
{pipe_instructions}
|
|
396
|
+
{sql_instructions}
|
|
397
|
+
{datasource_example}
|
|
398
|
+
{pipe_example}
|
|
399
|
+
{copy_pipe_instructions}
|
|
400
|
+
{materialized_pipe_instructions}
|
|
401
|
+
Use the following format to generate the response and do not wrap it in any other text, including the <response> tag.
|
|
402
|
+
<response>
|
|
403
|
+
<resource>
|
|
404
|
+
<type>[datasource or pipe]</type>
|
|
405
|
+
<name>[resource name here]</name>
|
|
406
|
+
<content>[resource content here]</content>
|
|
407
|
+
</resource>
|
|
408
|
+
</response>
|
|
394
409
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
- The SQL query must be a valid ClickHouse SQL query that mixes ClickHouse syntax and Tinybird templating syntax.
|
|
406
|
-
- If you use dynamic parameters you MUST start ALWAYS the whole sql query with "%" symbol on top. e.g: SQL >\n %\n SELECT * FROM <table> WHERE <condition> LIMIT 10
|
|
407
|
-
- The Parameter functions like this one {{String(my_param_name,default_value)}} can be one of the following: String, DateTime, Date, Float32, Float64, Int, Integer, UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
|
|
408
|
-
- Parameter names must be different from column names. Pass always the param name and a default value to the function.
|
|
409
|
-
- Code inside the template {{code}} is python code but no module is allowed to be imported. So for example you can't use now() as default value for a DateTime parameter. You need an if else block like this:
|
|
410
|
-
```
|
|
411
|
-
(...)
|
|
412
|
-
AND timestamp BETWEEN {{DateTime(start_date, now() - interval 30 day)}} AND {{DateTime(end_date, now())}} --this is not valid
|
|
410
|
+
""".format(
|
|
411
|
+
existing_resources=existing_resources,
|
|
412
|
+
datasource_instructions=datasource_instructions,
|
|
413
|
+
pipe_instructions=pipe_instructions,
|
|
414
|
+
sql_instructions=sql_instructions,
|
|
415
|
+
datasource_example=datasource_example,
|
|
416
|
+
pipe_example=pipe_example,
|
|
417
|
+
copy_pipe_instructions=copy_pipe_instructions,
|
|
418
|
+
materialized_pipe_instructions=materialized_pipe_instructions,
|
|
419
|
+
)
|
|
413
420
|
|
|
414
|
-
{%if not defined(start_date)%}
|
|
415
|
-
timestamp BETWEEN now() - interval 30 day
|
|
416
|
-
{%else%}
|
|
417
|
-
timestamp BETWEEN {{DateTime(start_date)}}
|
|
418
|
-
{%end%}
|
|
419
|
-
{%if not defined(end_date)%}
|
|
420
|
-
AND now()
|
|
421
|
-
{%else%}
|
|
422
|
-
AND {{DateTime(end_date)}}
|
|
423
|
-
{%end%} --this is valid
|
|
424
|
-
```
|
|
425
|
-
- Nodes can't have the same exact name as the Pipe they belong to.
|
|
426
|
-
- Endpoints can export Prometehus format, Node sql must have name two columns:
|
|
427
|
-
name (String): The name of the metric
|
|
428
|
-
value (Number): The numeric value for the metric.
|
|
429
|
-
and then some optional columns:
|
|
430
|
-
help (String): A description of the metric.
|
|
431
|
-
timestamp (Number): A Unix timestamp for the metric.
|
|
432
|
-
type (String): Defines the metric type (counter, gauge, histogram, summary, untyped, or empty).
|
|
433
|
-
labels (Map(String, String)): A set of key-value pairs providing metric dimensions.
|
|
434
|
-
- Use prometheus format when you are asked to monitor something
|
|
435
|
-
- Nodes do NOT use the same name as the Pipe they belong to. So if the pipe name is "my_pipe", the nodes must be named "my_pipe_node_1", "my_pipe_node_2", etc.
|
|
436
|
-
</instructions>
|
|
437
|
-
"""
|
|
438
421
|
|
|
439
|
-
|
|
422
|
+
def mock_prompt(rows: int) -> str:
|
|
423
|
+
return f"""
|
|
440
424
|
Given the schema for a Tinybird datasource, return a can you create a clickhouse sql query to generate some random data that matches that schema.
|
|
441
425
|
|
|
442
426
|
Response format MUST be just a valid clickhouse sql query.
|
|
443
427
|
|
|
444
|
-
|
|
445
|
-
|
|
428
|
+
<example>
|
|
429
|
+
<example_datasource_schema>
|
|
446
430
|
SCHEMA >
|
|
447
431
|
experience_gained Int16 `json:$.experience_gained`,
|
|
448
432
|
level Int16 `json:$.level`,
|
|
@@ -451,9 +435,8 @@ SCHEMA >
|
|
|
451
435
|
pvp_kills Int16 `json:$.pvp_kills`,
|
|
452
436
|
quest_completions Int16 `json:$.quest_completions`,
|
|
453
437
|
timestamp DateTime `json:$.timestamp`
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
# Example output:
|
|
438
|
+
</example_datasource_schema>
|
|
439
|
+
<example_output>
|
|
457
440
|
|
|
458
441
|
SELECT
|
|
459
442
|
rand() % 1000 AS experience_gained, -- Random experience gained between 0 and 999
|
|
@@ -464,19 +447,27 @@ SELECT
|
|
|
464
447
|
rand() % 200 AS quest_completions, -- Random quest completions between 0 and 199
|
|
465
448
|
now() - rand() % 86400 AS timestamp -- Random timestamp within the last day
|
|
466
449
|
FROM numbers({rows})
|
|
450
|
+
</example_output>
|
|
451
|
+
</example>
|
|
467
452
|
|
|
468
|
-
|
|
469
|
-
|
|
453
|
+
<instructions>
|
|
470
454
|
- The query MUST return a random sample of data that matches the schema.
|
|
471
455
|
- The query MUST return a valid clickhouse sql query.
|
|
472
456
|
- The query MUST return a sample of EXACTLY {rows} rows.
|
|
473
457
|
- The query MUST be valid for clickhouse and Tinybird.
|
|
474
|
-
-
|
|
475
|
-
- Do NOT include ```clickhouse or ```sql or any other wrapping text.
|
|
458
|
+
- FROM numbers({rows}) part is mandatory.
|
|
459
|
+
- Do NOT include ```clickhouse or ```sql or any other wrapping text to the sql query.
|
|
476
460
|
- Do NOT use any of these functions: elementAt
|
|
477
461
|
- Do NOT add a semicolon at the end of the query
|
|
478
462
|
- Do NOT add any FORMAT at the end of the query, because it will be added later by Tinybird.
|
|
463
|
+
- General functions supported are: {general_functions}
|
|
464
|
+
- Character insensitive functions supported are: {general_functions_insensitive}
|
|
465
|
+
- Aggregate functions supported are: {aggregate_functions}
|
|
466
|
+
- Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
|
|
467
|
+
- If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
|
|
468
|
+
</instructions>
|
|
479
469
|
|
|
470
|
+
<more_examples>
|
|
480
471
|
# Examples with different schemas, like an array field or a nested JSON field:
|
|
481
472
|
|
|
482
473
|
## Example schema with an array field:
|
|
@@ -491,13 +482,13 @@ SCHEMA >
|
|
|
491
482
|
`items` Array(String) `json:$.items[:]` // This is an array field
|
|
492
483
|
|
|
493
484
|
### Desired final output of the query:
|
|
494
|
-
{
|
|
485
|
+
{{
|
|
495
486
|
"order_id": 123456,
|
|
496
487
|
"customer_id": 7890,
|
|
497
488
|
"order_date": "2024-11-30T10:30:00.000Z",
|
|
498
489
|
"total_amount": 150.0,
|
|
499
490
|
"items": ["item1", "item2", "item3"]
|
|
500
|
-
}
|
|
491
|
+
}}
|
|
501
492
|
|
|
502
493
|
### Example SQL output with an array field:
|
|
503
494
|
|
|
@@ -526,32 +517,32 @@ SCHEMA >
|
|
|
526
517
|
Note that the important part is generating the nested fields:
|
|
527
518
|
json:$.request.options.max_tokens > this means that the max_tokens field is nested inside the options field inside the request field.
|
|
528
519
|
|
|
529
|
-
{
|
|
520
|
+
{{
|
|
530
521
|
"request_id": "req_abc123",
|
|
531
522
|
"timestamp": "2024-11-30T10:30:00.000Z",
|
|
532
|
-
"request": {
|
|
523
|
+
"request": {{
|
|
533
524
|
"model": "gpt-4",
|
|
534
|
-
"options": {
|
|
525
|
+
"options": {{
|
|
535
526
|
"temperature": 0.7,
|
|
536
527
|
"max_tokens": 1000,
|
|
537
528
|
"stream": false
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
}
|
|
529
|
+
}}
|
|
530
|
+
}}
|
|
531
|
+
}}
|
|
541
532
|
|
|
542
533
|
### Example SQL output with nested fields:
|
|
543
534
|
|
|
544
535
|
SELECT
|
|
545
536
|
request_id,
|
|
546
537
|
timestamp,
|
|
547
|
-
CAST(concat('{
|
|
538
|
+
CAST(concat('{{
|
|
548
539
|
"model": "', model, '",
|
|
549
|
-
"options": {
|
|
540
|
+
"options": {{
|
|
550
541
|
"temperature": ', temperature, ',
|
|
551
542
|
"max_tokens": ', max_tokens, ',
|
|
552
543
|
"stream": ', IF(stream = 1, 'true', 'false'), '
|
|
553
|
-
}
|
|
554
|
-
}'), 'JSON') AS request
|
|
544
|
+
}}
|
|
545
|
+
}}'), 'JSON') AS request
|
|
555
546
|
FROM
|
|
556
547
|
(
|
|
557
548
|
SELECT
|
|
@@ -563,90 +554,139 @@ FROM
|
|
|
563
554
|
rand() % 2 AS stream
|
|
564
555
|
FROM numbers(ROWS)
|
|
565
556
|
)
|
|
557
|
+
</more_examples>
|
|
566
558
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
559
|
+
Follow the instructions and generate the following response with no additional text in the following format:
|
|
560
|
+
<response>
|
|
561
|
+
<sql>[raw sql query here]</sql>
|
|
562
|
+
</response>
|
|
570
563
|
"""
|
|
571
564
|
|
|
572
|
-
create_test_prompt = """
|
|
573
|
-
You are a Tinybird expert. You will be given a pipe endpoint containing different nodes with SQL and Tinybird templating syntax. You will generate URLs to test it with different parameters combinations.
|
|
574
|
-
|
|
575
|
-
<test>
|
|
576
|
-
<test_1>:
|
|
577
|
-
name: <test_name_1>
|
|
578
|
-
description: <description_1>
|
|
579
|
-
parameters: <url_encoded_parameters_1>
|
|
580
|
-
<test_2>:
|
|
581
|
-
name: <test_name_2>
|
|
582
|
-
description: <description_2>
|
|
583
|
-
parameters: <url_encoded_parameters_2>
|
|
584
|
-
</test>
|
|
585
|
-
<instructions>
|
|
586
|
-
- The test name must be unique.
|
|
587
|
-
- The test command must be a valid Tinybird command that can be run in the terminal.
|
|
588
|
-
- The test command can have as many parameters as are needed to test the pipe.
|
|
589
|
-
- The parameter within Tinybird templating syntax looks like this one {{String(my_param_name, default_value)}}.
|
|
590
|
-
- If there are no parameters in the , you can omit parametrs and generate a single test command.
|
|
591
|
-
- Extra context: {prompt}
|
|
592
|
-
</instructions>
|
|
593
|
-
"""
|
|
594
565
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
566
|
+
copy_pipe_instructions = """
|
|
567
|
+
<copy_pipe_instructions>
|
|
568
|
+
- Do not create copy pipes by default, unless the user asks for it.
|
|
569
|
+
- In a .pipe file you can define how to export the result of a Pipe to a Data Source, optionally with a schedule.
|
|
570
|
+
- Do not include COPY_SCHEDULE in the .pipe file if it is not requested by the user.
|
|
571
|
+
- COPY_SCHEDULE is a cron expression that defines the schedule of the copy pipe.
|
|
572
|
+
- COPY_SCHEDULE is optional and if not provided, the copy pipe will be executed only once.
|
|
573
|
+
- TARGET_DATASOURCE is the name of the Data Source to export the result to.
|
|
574
|
+
- TYPE COPY is the type of the pipe and it is mandatory for copy pipes.
|
|
575
|
+
- If the copy pipe uses parameters, you must include the % character and a newline on top of every query to be able to use the parameters.
|
|
576
|
+
- The content of the .pipe file must follow this format:
|
|
577
|
+
DESCRIPTION Copy Pipe to export sales hour every hour to the sales_hour_copy Data Source
|
|
578
|
+
|
|
579
|
+
NODE daily_sales
|
|
580
|
+
SQL >
|
|
581
|
+
%
|
|
582
|
+
SELECT toStartOfDay(starting_date) day, country, sum(sales) as total_sales
|
|
583
|
+
FROM teams
|
|
584
|
+
WHERE
|
|
585
|
+
day BETWEEN toStartOfDay(now()) - interval 1 day AND toStartOfDay(now())
|
|
586
|
+
and country = {{ String(country, 'US')}}
|
|
587
|
+
GROUP BY day, country
|
|
588
|
+
|
|
589
|
+
TYPE COPY
|
|
590
|
+
TARGET_DATASOURCE sales_hour_copy
|
|
591
|
+
COPY_SCHEDULE 0 * * * *
|
|
592
|
+
</copy_pipe_instructions>
|
|
593
|
+
"""
|
|
602
594
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
595
|
+
materialized_pipe_instructions = """
|
|
596
|
+
<materialized_pipe_instructions>
|
|
597
|
+
- Do not create materialized pipes by default, unless the user asks for it.
|
|
598
|
+
- In a .pipe file you can define how to materialize each row ingested in the earliest Data Source in the Pipe query to a materialized Data Source. Materialization happens at ingest.
|
|
599
|
+
- DATASOURCE: Required when TYPE is MATERIALIZED. Sets the destination Data Source for materialized nodes.
|
|
600
|
+
- TYPE MATERIALIZED is the type of the pipe and it is mandatory for materialized pipes.
|
|
601
|
+
- The content of the .pipe file must follow this format:
|
|
602
|
+
DESCRIPTION Materialized Pipe to aggregate sales per hour in the sales_by_hour Data Source
|
|
611
603
|
|
|
612
|
-
|
|
604
|
+
NODE daily_sales
|
|
605
|
+
SQL >
|
|
606
|
+
SELECT toStartOfDay(starting_date) day, country, sum(sales) as total_sales
|
|
607
|
+
FROM teams
|
|
608
|
+
GROUP BY day, country
|
|
613
609
|
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
<description>[test description here]</description>
|
|
618
|
-
<parameters>[parameters here]</parameters>
|
|
619
|
-
</test>
|
|
620
|
-
</response>
|
|
610
|
+
TYPE MATERIALIZED
|
|
611
|
+
DATASOURCE sales_by_hour
|
|
612
|
+
</materialized_pipe_instructions>
|
|
621
613
|
"""
|
|
622
614
|
|
|
623
615
|
|
|
624
|
-
def
|
|
616
|
+
def ask_prompt(existing_resources: str) -> str:
|
|
625
617
|
return """
|
|
626
|
-
You are a Tinybird expert. You will be given a prompt to
|
|
627
|
-
<existing_resources>
|
|
628
|
-
{
|
|
629
|
-
|
|
618
|
+
You are a Tinybird expert. You will be given a prompt to ask questions about Tinybird resources.
|
|
619
|
+
<existing_resources>{existing_resources}</existing_resources>
|
|
620
|
+
{datasource_instructions}
|
|
621
|
+
{pipe_instructions}
|
|
622
|
+
{sql_instructions}
|
|
623
|
+
{datasource_example}
|
|
624
|
+
{pipe_example}
|
|
625
|
+
{copy_pipe_instructions}
|
|
626
|
+
{materialized_pipe_instructions}
|
|
627
|
+
|
|
628
|
+
The previous instructions are explanations of how things work in Tinybird. Answer in natural language.
|
|
629
|
+
|
|
630
|
+
""".format(
|
|
631
|
+
existing_resources=existing_resources,
|
|
632
|
+
datasource_instructions=datasource_instructions,
|
|
633
|
+
datasource_example=datasource_example,
|
|
634
|
+
pipe_instructions=pipe_instructions,
|
|
635
|
+
pipe_example=pipe_example,
|
|
636
|
+
sql_instructions=sql_instructions,
|
|
637
|
+
copy_pipe_instructions=copy_pipe_instructions,
|
|
638
|
+
materialized_pipe_instructions=materialized_pipe_instructions,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
datasource_instructions = """
|
|
630
643
|
<datasource_file_instructions>
|
|
631
644
|
- The datasource names must be unique.
|
|
632
645
|
- No indentation is allowed for property names: DESCRIPTION, SCHEMA, ENGINE, ENGINE_PARTITION_KEY, ENGINE_SORTING_KEY, etc.
|
|
633
646
|
</datasource_file_instructions>
|
|
647
|
+
"""
|
|
648
|
+
|
|
649
|
+
datasource_example = """
|
|
650
|
+
<datasource_content>
|
|
651
|
+
DESCRIPTION >
|
|
652
|
+
Some meaningful description of the datasource
|
|
653
|
+
|
|
654
|
+
SCHEMA >
|
|
655
|
+
`column_name_1` clickhouse_tinybird_compatible_data_type `json:$.column_name_1`,
|
|
656
|
+
`column_name_2` clickhouse_tinybird_compatible_data_type `json:$.column_name_2`,
|
|
657
|
+
...
|
|
658
|
+
`column_name_n` clickhouse_tinybird_compatible_data_type `json:$.column_name_n`
|
|
659
|
+
|
|
660
|
+
ENGINE "MergeTree"
|
|
661
|
+
ENGINE_PARTITION_KEY "partition_key"
|
|
662
|
+
ENGINE_SORTING_KEY "sorting_key_1, sorting_key_2, ..."
|
|
663
|
+
</datasource_content>
|
|
664
|
+
"""
|
|
665
|
+
|
|
666
|
+
pipe_example = """
|
|
667
|
+
<pipe_content>
|
|
668
|
+
DESCRIPTION >
|
|
669
|
+
Some meaningful description of the pipe
|
|
670
|
+
|
|
671
|
+
NODE node_1
|
|
672
|
+
SQL >
|
|
673
|
+
[sql query using clickhouse syntax and tinybird templating syntax and starting always with SELECT or %\nSELECT]
|
|
674
|
+
|
|
675
|
+
</pipe_content>
|
|
676
|
+
"""
|
|
677
|
+
|
|
678
|
+
pipe_instructions = """
|
|
634
679
|
<pipe_file_instructions>
|
|
635
680
|
- The pipe names must be unique.
|
|
636
681
|
- Nodes do NOT use the same name as the Pipe they belong to. So if the pipe name is "my_pipe", the nodes must be named different like "my_pipe_node_1", "my_pipe_node_2", etc.
|
|
637
682
|
- Nodes can't have the same exact name as the Pipe they belong to.
|
|
638
683
|
- Avoid more than one node per pipe unless it is really necessary or requested by the user.
|
|
639
684
|
- No indentation is allowed for property names: DESCRIPTION, NODE, SQL, TYPE, etc.
|
|
640
|
-
-
|
|
641
|
-
- name (String): The name of the metric
|
|
642
|
-
- value (Number): The numeric value for the metric.
|
|
643
|
-
- and then some optional columns:
|
|
644
|
-
- help (String): A description of the metric.
|
|
645
|
-
- timestamp (Number): A Unix timestamp for the metric.
|
|
646
|
-
- type (String): Defines the metric type (counter, gauge, histogram, summary, untyped, or empty).
|
|
647
|
-
- labels (Map(String, String)): A set of key-value pairs providing metric dimensions.
|
|
648
|
-
- Use prometheus format when you are asked to monitor something
|
|
685
|
+
- Allowed TYPE values are: endpoint, copy, materialized, sink
|
|
649
686
|
</pipe_file_instructions>
|
|
687
|
+
"""
|
|
688
|
+
|
|
689
|
+
sql_instructions = """
|
|
650
690
|
<sql_instructions>
|
|
651
691
|
- The SQL query must be a valid ClickHouse SQL query that mixes ClickHouse syntax and Tinybird templating syntax (Tornado templating language under the hood).
|
|
652
692
|
- SQL queries with parameters must start with "%" character and a newline on top of every query to be able to use the parameters. Examples:
|
|
@@ -693,33 +733,26 @@ You are a Tinybird expert. You will be given a prompt to generate Tinybird resou
|
|
|
693
733
|
- General functions and aggregate functions are case sensitive.
|
|
694
734
|
- Character insensitive functions are case insensitive.
|
|
695
735
|
</sql_instructions>
|
|
736
|
+
""".format(
|
|
737
|
+
general_functions=general_functions,
|
|
738
|
+
general_functions_insensitive=general_functions_insensitive,
|
|
739
|
+
aggregate_functions=aggregate_functions,
|
|
740
|
+
)
|
|
696
741
|
|
|
697
|
-
<datasource_content>
|
|
698
|
-
DESCRIPTION >
|
|
699
|
-
Some meaningful description of the datasource
|
|
700
|
-
|
|
701
|
-
SCHEMA >
|
|
702
|
-
`column_name_1` clickhouse_tinybird_compatible_data_type `json:$.column_name_1`,
|
|
703
|
-
`column_name_2` clickhouse_tinybird_compatible_data_type `json:$.column_name_2`,
|
|
704
|
-
...
|
|
705
|
-
`column_name_n` clickhouse_tinybird_compatible_data_type `json:$.column_name_n`
|
|
706
|
-
|
|
707
|
-
ENGINE "MergeTree"
|
|
708
|
-
ENGINE_PARTITION_KEY "partition_key"
|
|
709
|
-
ENGINE_SORTING_KEY "sorting_key_1, sorting_key_2, ..."
|
|
710
|
-
</datasource_content>
|
|
711
|
-
<pipe_content>
|
|
712
|
-
DESCRIPTION >
|
|
713
|
-
Some meaningful description of the pipe
|
|
714
|
-
|
|
715
|
-
NODE node_1
|
|
716
|
-
SQL >
|
|
717
|
-
[sql query using clickhouse syntax and tinybird templating syntax and starting always with SELECT or %\nSELECT]
|
|
718
|
-
|
|
719
|
-
</pipe_content>
|
|
720
742
|
|
|
743
|
+
def update_prompt(existing_resources: str) -> str:
|
|
744
|
+
return """
|
|
745
|
+
You are a Tinybird expert. You will be given a prompt to update the existing Tinybird resources: datasources and/or pipes.
|
|
746
|
+
You will return the resources that need to be updated.
|
|
747
|
+
<existing_resources>{existing_resources}</existing_resources>
|
|
748
|
+
{datasource_instructions}
|
|
749
|
+
{pipe_instructions}
|
|
750
|
+
{sql_instructions}
|
|
751
|
+
{datasource_example}
|
|
752
|
+
{pipe_example}
|
|
753
|
+
{copy_pipe_instructions}
|
|
754
|
+
{materialized_pipe_instructions}
|
|
721
755
|
Use the following format to generate the response and do not wrap it in any other text, including the <response> tag.
|
|
722
|
-
|
|
723
756
|
<response>
|
|
724
757
|
<resource>
|
|
725
758
|
<type>[datasource or pipe]</type>
|
|
@@ -730,151 +763,11 @@ Use the following format to generate the response and do not wrap it in any othe
|
|
|
730
763
|
|
|
731
764
|
""".format(
|
|
732
765
|
existing_resources=existing_resources,
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
766
|
+
datasource_instructions=datasource_instructions,
|
|
767
|
+
pipe_instructions=pipe_instructions,
|
|
768
|
+
sql_instructions=sql_instructions,
|
|
769
|
+
datasource_example=datasource_example,
|
|
770
|
+
pipe_example=pipe_example,
|
|
771
|
+
copy_pipe_instructions=copy_pipe_instructions,
|
|
772
|
+
materialized_pipe_instructions=materialized_pipe_instructions,
|
|
736
773
|
)
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
def mock_prompt(rows: int) -> str:
|
|
740
|
-
return f"""
|
|
741
|
-
Given the schema for a Tinybird datasource, return a can you create a clickhouse sql query to generate some random data that matches that schema.
|
|
742
|
-
|
|
743
|
-
Response format MUST be just a valid clickhouse sql query.
|
|
744
|
-
|
|
745
|
-
<example>
|
|
746
|
-
<example_datasource_schema>
|
|
747
|
-
SCHEMA >
|
|
748
|
-
experience_gained Int16 `json:$.experience_gained`,
|
|
749
|
-
level Int16 `json:$.level`,
|
|
750
|
-
monster_kills Int16 `json:$.monster_kills`,
|
|
751
|
-
player_id String `json:$.player_id`,
|
|
752
|
-
pvp_kills Int16 `json:$.pvp_kills`,
|
|
753
|
-
quest_completions Int16 `json:$.quest_completions`,
|
|
754
|
-
timestamp DateTime `json:$.timestamp`
|
|
755
|
-
</example_datasource_schema>
|
|
756
|
-
<example_output>
|
|
757
|
-
|
|
758
|
-
SELECT
|
|
759
|
-
rand() % 1000 AS experience_gained, -- Random experience gained between 0 and 999
|
|
760
|
-
1 + rand() % 100 AS level, -- Random level between 1 and 100
|
|
761
|
-
rand() % 500 AS monster_kills, -- Random monster kills between 0 and 499
|
|
762
|
-
concat('player_', toString(rand() % 10000)) AS player_id, -- Random player IDs like "player_1234"
|
|
763
|
-
rand() % 50 AS pvp_kills, -- Random PvP kills between 0 and 49
|
|
764
|
-
rand() % 200 AS quest_completions, -- Random quest completions between 0 and 199
|
|
765
|
-
now() - rand() % 86400 AS timestamp -- Random timestamp within the last day
|
|
766
|
-
FROM numbers({rows})
|
|
767
|
-
</example_output>
|
|
768
|
-
</example>
|
|
769
|
-
|
|
770
|
-
<instructions>
|
|
771
|
-
- The query MUST return a random sample of data that matches the schema.
|
|
772
|
-
- The query MUST return a valid clickhouse sql query.
|
|
773
|
-
- The query MUST return a sample of EXACTLY {rows} rows.
|
|
774
|
-
- The query MUST be valid for clickhouse and Tinybird.
|
|
775
|
-
- FROM numbers({rows}) part is mandatory.
|
|
776
|
-
- Do NOT include ```clickhouse or ```sql or any other wrapping text to the sql query.
|
|
777
|
-
- Do NOT use any of these functions: elementAt
|
|
778
|
-
- Do NOT add a semicolon at the end of the query
|
|
779
|
-
- Do NOT add any FORMAT at the end of the query, because it will be added later by Tinybird.
|
|
780
|
-
- General functions supported are: {general_functions}
|
|
781
|
-
- Character insensitive functions supported are: {general_functions_insensitive}
|
|
782
|
-
- Aggregate functions supported are: {aggregate_functions}
|
|
783
|
-
- Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
|
|
784
|
-
- If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
|
|
785
|
-
</instructions>
|
|
786
|
-
|
|
787
|
-
<more_examples>
|
|
788
|
-
# Examples with different schemas, like an array field or a nested JSON field:
|
|
789
|
-
|
|
790
|
-
## Example schema with an array field:
|
|
791
|
-
|
|
792
|
-
### Schema:
|
|
793
|
-
|
|
794
|
-
SCHEMA >
|
|
795
|
-
`order_id` UInt64 `json:$.order_id`,
|
|
796
|
-
`customer_id` UInt64 `json:$.customer_id`,
|
|
797
|
-
`order_date` DateTime `json:$.order_date`,
|
|
798
|
-
`total_amount` Float64 `json:$.total_amount`,
|
|
799
|
-
`items` Array(String) `json:$.items[:]` // This is an array field
|
|
800
|
-
|
|
801
|
-
### Desired final output of the query:
|
|
802
|
-
{{
|
|
803
|
-
"order_id": 123456,
|
|
804
|
-
"customer_id": 7890,
|
|
805
|
-
"order_date": "2024-11-30T10:30:00.000Z",
|
|
806
|
-
"total_amount": 150.0,
|
|
807
|
-
"items": ["item1", "item2", "item3"]
|
|
808
|
-
}}
|
|
809
|
-
|
|
810
|
-
### Example SQL output with an array field:
|
|
811
|
-
|
|
812
|
-
SELECT
|
|
813
|
-
concat('ord_', toString(rand() % 10000)) AS order_id,
|
|
814
|
-
concat('cust_', toString(rand() % 10000)) AS customer_id,
|
|
815
|
-
now() - rand() % 86400 AS order_date,
|
|
816
|
-
rand() % 1000 AS total_amount,
|
|
817
|
-
arrayMap(x -> concat('item_', toString(x)), range(1, rand() % 5 + 1)) AS items
|
|
818
|
-
FROM numbers(ROWS)
|
|
819
|
-
|
|
820
|
-
## Example schema with a nested JSON field:
|
|
821
|
-
|
|
822
|
-
### Schema:
|
|
823
|
-
|
|
824
|
-
SCHEMA >
|
|
825
|
-
`request_id` String `json:$.request_id`,
|
|
826
|
-
`timestamp` DateTime `json:$.timestamp`,
|
|
827
|
-
`model` String `json:$.request.model`,
|
|
828
|
-
`temperature` Float32 `json:$.request.options.temperature`,
|
|
829
|
-
`max_tokens` UInt32 `json:$.request.options.max_tokens`,
|
|
830
|
-
`stream` UInt8 `json:$.request.options.stream`
|
|
831
|
-
|
|
832
|
-
### Desired final output of the query:
|
|
833
|
-
|
|
834
|
-
Note that the important part is generating the nested fields:
|
|
835
|
-
json:$.request.options.max_tokens > this means that the max_tokens field is nested inside the options field inside the request field.
|
|
836
|
-
|
|
837
|
-
{{
|
|
838
|
-
"request_id": "req_abc123",
|
|
839
|
-
"timestamp": "2024-11-30T10:30:00.000Z",
|
|
840
|
-
"request": {{
|
|
841
|
-
"model": "gpt-4",
|
|
842
|
-
"options": {{
|
|
843
|
-
"temperature": 0.7,
|
|
844
|
-
"max_tokens": 1000,
|
|
845
|
-
"stream": false
|
|
846
|
-
}}
|
|
847
|
-
}}
|
|
848
|
-
}}
|
|
849
|
-
|
|
850
|
-
### Example SQL output with nested fields:
|
|
851
|
-
|
|
852
|
-
SELECT
|
|
853
|
-
request_id,
|
|
854
|
-
timestamp,
|
|
855
|
-
CAST(concat('{{
|
|
856
|
-
"model": "', model, '",
|
|
857
|
-
"options": {{
|
|
858
|
-
"temperature": ', temperature, ',
|
|
859
|
-
"max_tokens": ', max_tokens, ',
|
|
860
|
-
"stream": ', IF(stream = 1, 'true', 'false'), '
|
|
861
|
-
}}
|
|
862
|
-
}}'), 'JSON') AS request
|
|
863
|
-
FROM
|
|
864
|
-
(
|
|
865
|
-
SELECT
|
|
866
|
-
concat('req_', lower(hex(randomString(6)))) AS request_id,
|
|
867
|
-
(now() - toIntervalDay(rand() % 30)) + toIntervalSecond(rand() % 86400) AS timestamp,
|
|
868
|
-
['gpt-4', 'gpt-3.5-turbo', 'gpt-4-turbo'][(rand() % 3) + 1] AS model,
|
|
869
|
-
round(rand() / 10, 2) AS temperature,
|
|
870
|
-
500 + (rand() % 2500) AS max_tokens,
|
|
871
|
-
rand() % 2 AS stream
|
|
872
|
-
FROM numbers(ROWS)
|
|
873
|
-
)
|
|
874
|
-
</more_examples>
|
|
875
|
-
|
|
876
|
-
Follow the instructions and generate the following response with no additional text in the following format:
|
|
877
|
-
<response>
|
|
878
|
-
<sql>[raw sql query here]</sql>
|
|
879
|
-
</response>
|
|
880
|
-
"""
|
tinybird/tb/__cli__.py
CHANGED
|
@@ -4,5 +4,5 @@ __description__ = 'Tinybird Command Line Tool'
|
|
|
4
4
|
__url__ = 'https://www.tinybird.co/docs/cli/introduction.html'
|
|
5
5
|
__author__ = 'Tinybird'
|
|
6
6
|
__author_email__ = 'support@tinybird.co'
|
|
7
|
-
__version__ = '0.0.1.
|
|
8
|
-
__revision__ = '
|
|
7
|
+
__version__ = '0.0.1.dev30'
|
|
8
|
+
__revision__ = 'f7bc1dc'
|