tinybird 0.0.1.dev26__py3-none-any.whl → 0.0.1.dev28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

Files changed (36) hide show
  1. tinybird/config.py +1 -1
  2. tinybird/datatypes.py +46 -57
  3. tinybird/git_settings.py +4 -4
  4. tinybird/prompts.py +647 -0
  5. tinybird/sql.py +9 -0
  6. tinybird/sql_toolset.py +17 -3
  7. tinybird/syncasync.py +1 -1
  8. tinybird/tb/__cli__.py +2 -2
  9. tinybird/tb/cli.py +2 -0
  10. tinybird/tb/modules/build.py +44 -16
  11. tinybird/tb/modules/build_server.py +75 -0
  12. tinybird/tb/modules/cli.py +22 -0
  13. tinybird/tb/modules/common.py +2 -2
  14. tinybird/tb/modules/config.py +13 -14
  15. tinybird/tb/modules/create.py +145 -134
  16. tinybird/tb/modules/datafile/build.py +28 -0
  17. tinybird/tb/modules/datafile/common.py +1 -0
  18. tinybird/tb/modules/datafile/fixture.py +10 -6
  19. tinybird/tb/modules/datafile/parse_pipe.py +2 -0
  20. tinybird/tb/modules/datasource.py +1 -1
  21. tinybird/tb/modules/deploy.py +254 -0
  22. tinybird/tb/modules/llm.py +32 -16
  23. tinybird/tb/modules/llm_utils.py +24 -0
  24. tinybird/tb/modules/local.py +2 -2
  25. tinybird/tb/modules/login.py +8 -6
  26. tinybird/tb/modules/mock.py +11 -6
  27. tinybird/tb/modules/test.py +69 -47
  28. tinybird/tb/modules/watch.py +1 -1
  29. tinybird/tb_cli_modules/common.py +2 -2
  30. tinybird/tb_cli_modules/config.py +5 -5
  31. tinybird/tornado_template.py +1 -3
  32. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/METADATA +1 -1
  33. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/RECORD +36 -33
  34. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/WHEEL +0 -0
  35. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/entry_points.txt +0 -0
  36. {tinybird-0.0.1.dev26.dist-info → tinybird-0.0.1.dev28.dist-info}/top_level.txt +0 -0
tinybird/prompts.py CHANGED
@@ -1,3 +1,363 @@
1
+ general_functions = [
2
+ "BLAKE3",
3
+ "CAST",
4
+ "CHARACTER_LENGTH",
5
+ "CHAR_LENGTH",
6
+ "CRC32",
7
+ "CRC32IEEE",
8
+ "CRC64",
9
+ "DATABASE",
10
+ "DATE",
11
+ "DATE_DIFF",
12
+ "DATE_FORMAT",
13
+ "DATE_TRUNC",
14
+ "DAY",
15
+ "DAYOFMONTH",
16
+ "DAYOFWEEK",
17
+ "DAYOFYEAR",
18
+ "FORMAT_BYTES",
19
+ "FQDN",
20
+ "FROM_BASE64",
21
+ "FROM_DAYS",
22
+ "FROM_UNIXTIME",
23
+ "HOUR",
24
+ "INET6_ATON",
25
+ "INET6_NTOA",
26
+ "INET_ATON",
27
+ "INET_NTOA",
28
+ "IPv4CIDRToRange",
29
+ "IPv4NumToString",
30
+ "IPv4NumToStringClassC",
31
+ "IPv4StringToNum",
32
+ "IPv4StringToNumOrDefault",
33
+ "IPv4StringToNumOrNull",
34
+ "IPv4ToIPv6",
35
+ "IPv6CIDRToRange",
36
+ "IPv6NumToString",
37
+ "IPv6StringToNum",
38
+ "IPv6StringToNumOrDefault",
39
+ "IPv6StringToNumOrNull",
40
+ "JSONArrayLength",
41
+ "JSONExtract",
42
+ "JSONExtractArrayRaw",
43
+ "JSONExtractBool",
44
+ "JSONExtractFloat",
45
+ "JSONExtractInt",
46
+ "JSONExtractKeys",
47
+ "JSONExtractKeysAndValues",
48
+ "JSONExtractKeysAndValuesRaw",
49
+ "JSONExtractRaw",
50
+ "JSONExtractString",
51
+ "JSONExtractUInt",
52
+ "JSONHas",
53
+ "JSONKey",
54
+ "JSONLength",
55
+ "JSONRemoveDynamoDBAnnotations",
56
+ "JSONType",
57
+ "JSON_ARRAY_LENGTH",
58
+ "JSON_EXISTS",
59
+ "JSON_QUERY",
60
+ "JSON_VALUE",
61
+ "L1Distance",
62
+ "L1Norm",
63
+ "L1Normalize",
64
+ "L2Distance",
65
+ "L2Norm",
66
+ "L2Normalize",
67
+ "L2SquaredDistance",
68
+ "L2SquaredNorm",
69
+ "LAST_DAY",
70
+ "LinfDistance",
71
+ "LinfNorm",
72
+ "LinfNormalize",
73
+ "LpDistance",
74
+ "LpNorm",
75
+ "LpNormalize",
76
+ "MACNumToString",
77
+ "MACStringToNum",
78
+ "MACStringToOUI",
79
+ "MAP_FROM_ARRAYS",
80
+ "MD4",
81
+ "MD5",
82
+ "MILLISECOND",
83
+ "MINUTE",
84
+ "MONTH",
85
+ "OCTET_LENGTH",
86
+ "QUARTER",
87
+ "REGEXP_EXTRACT",
88
+ "REGEXP_MATCHES",
89
+ "REGEXP_REPLACE",
90
+ "SCHEMA",
91
+ "SECOND",
92
+ "SHA1",
93
+ "SHA224",
94
+ "SHA256",
95
+ "SHA384",
96
+ "SHA512",
97
+ "SHA512_256",
98
+ "SUBSTRING_INDEX",
99
+ "SVG",
100
+ "TIMESTAMP_DIFF",
101
+ "TO_BASE64",
102
+ "TO_DAYS",
103
+ "TO_UNIXTIME",
104
+ "ULIDStringToDateTime",
105
+ "URLHash",
106
+ "URLHierarchy",
107
+ "URLPathHierarchy",
108
+ "UTCTimestamp",
109
+ "UTC_timestamp",
110
+ "UUIDNumToString",
111
+ "UUIDStringToNum",
112
+ "UUIDToNum",
113
+ "UUIDv7ToDateTime",
114
+ "YEAR",
115
+ "YYYYMMDDToDate",
116
+ "YYYYMMDDToDate32",
117
+ "YYYYMMDDhhmmssToDateTime",
118
+ "YYYYMMDDhhmmssToDateTime64",
119
+ ]
120
+
121
+ general_functions_insensitive = [
122
+ "cast",
123
+ "character_length",
124
+ "char_length",
125
+ "crc32",
126
+ "crc32ieee",
127
+ "crc64",
128
+ "database",
129
+ "date",
130
+ "date_format",
131
+ "date_trunc",
132
+ "day",
133
+ "dayofmonth",
134
+ "dayofweek",
135
+ "dayofyear",
136
+ "format_bytes",
137
+ "fqdn",
138
+ "from_base64",
139
+ "from_days",
140
+ "from_unixtime",
141
+ "hour",
142
+ "inet6_aton",
143
+ "inet6_ntoa",
144
+ "inet_aton",
145
+ "inet_ntoa",
146
+ "json_array_length",
147
+ "last_day",
148
+ "millisecond",
149
+ "minute",
150
+ "month",
151
+ "octet_length",
152
+ "quarter",
153
+ "regexp_extract",
154
+ "regexp_matches",
155
+ "regexp_replace",
156
+ "schema",
157
+ "second",
158
+ "substring_index",
159
+ "to_base64",
160
+ "to_days",
161
+ "to_unixtime",
162
+ "utctimestamp",
163
+ "utc_timestamp",
164
+ "year",
165
+ ]
166
+
167
+ aggregate_functions = [
168
+ "BIT_AND",
169
+ "BIT_OR",
170
+ "BIT_XOR",
171
+ "COVAR_POP",
172
+ "COVAR_SAMP",
173
+ "STD",
174
+ "STDDEV_POP",
175
+ "STDDEV_SAMP",
176
+ "VAR_POP",
177
+ "VAR_SAMP",
178
+ "aggThrow",
179
+ "analysisOfVariance",
180
+ "anova",
181
+ "any",
182
+ "anyHeavy",
183
+ "anyLast",
184
+ "anyLast_respect_nulls",
185
+ "any_respect_nulls",
186
+ "any_value",
187
+ "any_value_respect_nulls",
188
+ "approx_top_count",
189
+ "approx_top_k",
190
+ "approx_top_sum",
191
+ "argMax",
192
+ "argMin",
193
+ "array_agg",
194
+ "array_concat_agg",
195
+ "avg",
196
+ "avgWeighted",
197
+ "boundingRatio",
198
+ "categoricalInformationValue",
199
+ "contingency",
200
+ "corr",
201
+ "corrMatrix",
202
+ "corrStable",
203
+ "count",
204
+ "covarPop",
205
+ "covarPopMatrix",
206
+ "covarPopStable",
207
+ "covarSamp",
208
+ "covarSampMatrix",
209
+ "covarSampStable",
210
+ "cramersV",
211
+ "cramersVBiasCorrected",
212
+ "deltaSum",
213
+ "deltaSumTimestamp",
214
+ "dense_rank",
215
+ "entropy",
216
+ "exponentialMovingAverage",
217
+ "exponentialTimeDecayedAvg",
218
+ "exponentialTimeDecayedCount",
219
+ "exponentialTimeDecayedMax",
220
+ "exponentialTimeDecayedSum",
221
+ "first_value",
222
+ "first_value_respect_nulls",
223
+ "flameGraph",
224
+ "groupArray",
225
+ "groupArrayInsertAt",
226
+ "groupArrayIntersect",
227
+ "groupArrayLast",
228
+ "groupArrayMovingAvg",
229
+ "groupArrayMovingSum",
230
+ "groupArraySample",
231
+ "groupArraySorted",
232
+ "groupBitAnd",
233
+ "groupBitOr",
234
+ "groupBitXor",
235
+ "groupBitmap",
236
+ "groupBitmapAnd",
237
+ "groupBitmapOr",
238
+ "groupBitmapXor",
239
+ "groupUniqArray",
240
+ "histogram",
241
+ "intervalLengthSum",
242
+ "kolmogorovSmirnovTest",
243
+ "kurtPop",
244
+ "kurtSamp",
245
+ "lagInFrame",
246
+ "largestTriangleThreeBuckets",
247
+ "last_value",
248
+ "last_value_respect_nulls",
249
+ "leadInFrame",
250
+ "lttb",
251
+ "mannWhitneyUTest",
252
+ "max",
253
+ "maxIntersections",
254
+ "maxIntersectionsPosition",
255
+ "maxMappedArrays",
256
+ "meanZTest",
257
+ "median",
258
+ "medianBFloat16",
259
+ "medianBFloat16Weighted",
260
+ "medianDD",
261
+ "medianDeterministic",
262
+ "medianExact",
263
+ "medianExactHigh",
264
+ "medianExactLow",
265
+ "medianExactWeighted",
266
+ "medianGK",
267
+ "medianInterpolatedWeighted",
268
+ "medianTDigest",
269
+ "medianTDigestWeighted",
270
+ "medianTiming",
271
+ "medianTimingWeighted",
272
+ "min",
273
+ "minMappedArrays",
274
+ "nonNegativeDerivative",
275
+ "nothing",
276
+ "nothingNull",
277
+ "nothingUInt64",
278
+ "nth_value",
279
+ "ntile",
280
+ "quantile",
281
+ "quantileBFloat16",
282
+ "quantileBFloat16Weighted",
283
+ "quantileDD",
284
+ "quantileDeterministic",
285
+ "quantileExact",
286
+ "quantileExactExclusive",
287
+ "quantileExactHigh",
288
+ "quantileExactInclusive",
289
+ "quantileExactLow",
290
+ "quantileExactWeighted",
291
+ "quantileGK",
292
+ "quantileInterpolatedWeighted",
293
+ "quantileTDigest",
294
+ "quantileTDigestWeighted",
295
+ "quantileTiming",
296
+ "quantileTimingWeighted",
297
+ "quantiles",
298
+ "quantilesBFloat16",
299
+ "quantilesBFloat16Weighted",
300
+ "quantilesDD",
301
+ "quantilesDeterministic",
302
+ "quantilesExact",
303
+ "quantilesExactExclusive",
304
+ "quantilesExactHigh",
305
+ "quantilesExactInclusive",
306
+ "quantilesExactLow",
307
+ "quantilesExactWeighted",
308
+ "quantilesGK",
309
+ "quantilesInterpolatedWeighted",
310
+ "quantilesTDigest",
311
+ "quantilesTDigestWeighted",
312
+ "quantilesTiming",
313
+ "quantilesTimingWeighted",
314
+ "rank",
315
+ "rankCorr",
316
+ "retention",
317
+ "row_number",
318
+ "sequenceCount",
319
+ "sequenceMatch",
320
+ "sequenceNextNode",
321
+ "simpleLinearRegression",
322
+ "singleValueOrNull",
323
+ "skewPop",
324
+ "skewSamp",
325
+ "sparkBar",
326
+ "sparkbar",
327
+ "stddevPop",
328
+ "stddevPopStable",
329
+ "stddevSamp",
330
+ "stddevSampStable",
331
+ "stochasticLinearRegression",
332
+ "stochasticLogisticRegression",
333
+ "studentTTest",
334
+ "sum",
335
+ "sumCount",
336
+ "sumKahan",
337
+ "sumMapFiltered",
338
+ "sumMapFilteredWithOverflow",
339
+ "sumMapWithOverflow",
340
+ "sumMappedArrays",
341
+ "sumWithOverflow",
342
+ "theilsU",
343
+ "topK",
344
+ "topKWeighted",
345
+ "uniq",
346
+ "uniqCombined",
347
+ "uniqCombined64",
348
+ "uniqExact",
349
+ "uniqHLL12",
350
+ "uniqTheta",
351
+ "uniqUpTo",
352
+ "varPop",
353
+ "varPopStable",
354
+ "varSamp",
355
+ "varSampStable",
356
+ "welchTTest",
357
+ "windowFunnel",
358
+ ]
359
+
360
+
1
361
  create_project_prompt = """
2
362
  You are a Tinybird expert. You will be given a prompt describing a data project and you will generate all the associated datasources and pipes.
3
363
  <datasource>
@@ -231,3 +591,290 @@ You are a Tinybird expert. You will be given a pipe endpoint containing differen
231
591
  - Extra context: {prompt}
232
592
  </instructions>
233
593
  """
594
+
595
+ test_create_prompt = """
596
+ You are a Tinybird expert. You will be given a pipe containing different nodes with SQL and Tinybird templating syntax. You will generate URLs to test it with different parameters combinations.
597
+ <pipe>
598
+ <name>{name}</name>
599
+ <content>{content}</content>
600
+ <parameters>{parameters}</parameters>
601
+ </pipe>
602
+
603
+ <instructions>
604
+ - Every test name must be unique.
605
+ - The test command must be a valid Tinybird command that can be run in the terminal.
606
+ - The test command can have as many parameters as are needed to test the pipe.
607
+ - The parameter within Tinybird templating syntax looks like this one {{String(my_param_name, default_value)}}.
608
+ - If there are no parameters, you can omit parameters and generate a single test command.
609
+ - The format of the parameters is the following: ?param1=value1&param2=value2&param3=value3
610
+ </instructions>
611
+
612
+ Follow the instructions and generate the following response with no additional text:
613
+
614
+ <response>
615
+ <test>
616
+ <name>[test name here]</name>
617
+ <description>[test description here]</description>
618
+ <parameters>[parameters here]</parameters>
619
+ </test>
620
+ </response>
621
+ """
622
+
623
+
624
+ def create_prompt(existing_resources: str) -> str:
625
+ return """
626
+ You are a Tinybird expert. You will be given a prompt to generate Tinybird resources: datasources and/or pipes.
627
+ <existing_resources>
628
+ {existing_resources}
629
+ </existing_resources>
630
+ <datasource_file_instructions>
631
+ - The datasource names must be unique.
632
+ - No indentation is allowed for property names: DESCRIPTION, SCHEMA, ENGINE, ENGINE_PARTITION_KEY, ENGINE_SORTING_KEY, etc.
633
+ </datasource_file_instructions>
634
+ <pipe_file_instructions>
635
+ - The pipe names must be unique.
636
+ - Nodes do NOT use the same name as the Pipe they belong to. So if the pipe name is "my_pipe", the nodes must be named different like "my_pipe_node_1", "my_pipe_node_2", etc.
637
+ - Nodes can't have the same exact name as the Pipe they belong to.
638
+ - Avoid more than one node per pipe unless it is really necessary or requested by the user.
639
+ - No indentation is allowed for property names: DESCRIPTION, NODE, SQL, TYPE, etc.
640
+ - Endpoints can export Prometehus format, Node sql must have name two columns:
641
+ - name (String): The name of the metric
642
+ - value (Number): The numeric value for the metric.
643
+ - and then some optional columns:
644
+ - help (String): A description of the metric.
645
+ - timestamp (Number): A Unix timestamp for the metric.
646
+ - type (String): Defines the metric type (counter, gauge, histogram, summary, untyped, or empty).
647
+ - labels (Map(String, String)): A set of key-value pairs providing metric dimensions.
648
+ - Use prometheus format when you are asked to monitor something
649
+ </pipe_file_instructions>
650
+ <sql_instructions>
651
+ - The SQL query must be a valid ClickHouse SQL query that mixes ClickHouse syntax and Tinybird templating syntax (Tornado templating language under the hood).
652
+ - SQL queries with parameters must start with "%" character and a newline on top of every query to be able to use the parameters. Examples:
653
+ <invalid_query_with_parameters_no_%_on_top>
654
+ SELECT * FROM events WHERE session_id={{{{String(my_param, "default_value")}}}}
655
+ </invalid_query_with_parameters_no_%_on_top>
656
+ <valid_query_with_parameters_with_%_on_top>
657
+ %
658
+ SELECT * FROM events WHERE session_id={{{{String(my_param, "default_value")}}}}
659
+ </valid_query_with_parameters_with_%_on_top>
660
+ - The Parameter functions like this one {{{{String(my_param_name,default_value)}}}} can be one of the following: String, DateTime, Date, Float32, Float64, Int, Integer, UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
661
+ - Parameter names must be different from column names. Pass always the param name and a default value to the function.
662
+ - Code inside the template {{{{template_expression}}}} follows the rules of Tornado templating language so no module is allowed to be imported. So for example you can't use now() as default value for a DateTime parameter. You need an if else block like this:
663
+ <invalid_condition_with_now>
664
+ AND timestamp BETWEEN {{DateTime(start_date, now() - interval 30 day)}} AND {{DateTime(end_date, now())}}
665
+ </invalid_condition_with_now>
666
+ <valid_condition_without_now>
667
+ {{%if not defined(start_date)%}}
668
+ timestamp BETWEEN now() - interval 30 day
669
+ {{%else%}}
670
+ timestamp BETWEEN {{{{DateTime(start_date)}}}}
671
+ {{%end%}}
672
+ {{%if not defined(end_date)%}}
673
+ AND now()
674
+ {{%else%}}
675
+ AND {{{{DateTime(end_date)}}}}
676
+ {{%end%}}
677
+ </valid_condition_without_now>
678
+ - Use datasource names as table names when doing SELECT statements.
679
+ - Do not use pipe names as table names.
680
+ - The available datasource names to use in the SQL are the ones present in the existing_resources section or the ones you will create.
681
+ - Use node names as table names only when nodes are present in the same file.
682
+ - Do not reference the current node name in the SQL.
683
+ - SQL queries only accept SELECT statements with conditions, aggregations, joins, etc.
684
+ - Do NOT use CREATE TABLE, INSERT INTO, CREATE DATABASE, etc.
685
+ - Use ONLY SELECT statements in the SQL section.
686
+ - INSERT INTO is not supported in SQL section.
687
+ - General functions supported are: {general_functions}
688
+ - Character insensitive functions supported are: {general_functions_insensitive}
689
+ - Aggregate functions supported are: {aggregate_functions}
690
+ - Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
691
+ - If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
692
+ - When aliasing a column, use first the column name and then the alias.
693
+ - General functions and aggregate functions are case sensitive.
694
+ - Character insensitive functions are case insensitive.
695
+ </sql_instructions>
696
+
697
+ <datasource_content>
698
+ DESCRIPTION >
699
+ Some meaningful description of the datasource
700
+
701
+ SCHEMA >
702
+ `column_name_1` clickhouse_tinybird_compatible_data_type `json:$.column_name_1`,
703
+ `column_name_2` clickhouse_tinybird_compatible_data_type `json:$.column_name_2`,
704
+ ...
705
+ `column_name_n` clickhouse_tinybird_compatible_data_type `json:$.column_name_n`
706
+
707
+ ENGINE "MergeTree"
708
+ ENGINE_PARTITION_KEY "partition_key"
709
+ ENGINE_SORTING_KEY "sorting_key_1, sorting_key_2, ..."
710
+ </datasource_content>
711
+ <pipe_content>
712
+ DESCRIPTION >
713
+ Some meaningful description of the pipe
714
+
715
+ NODE node_1
716
+ SQL >
717
+ [sql query using clickhouse syntax and tinybird templating syntax and starting always with SELECT or %\nSELECT]
718
+
719
+ </pipe_content>
720
+
721
+ Use the following format to generate the response and do not wrap it in any other text, including the <response> tag.
722
+
723
+ <response>
724
+ <resource>
725
+ <type>[datasource or pipe]</type>
726
+ <name>[resource name here]</name>
727
+ <content>[resource content here]</content>
728
+ </resource>
729
+ </response>
730
+
731
+ """.format(
732
+ existing_resources=existing_resources,
733
+ general_functions=general_functions,
734
+ general_functions_insensitive=general_functions_insensitive,
735
+ aggregate_functions=aggregate_functions,
736
+ )
737
+
738
+
739
+ def mock_prompt(rows: int) -> str:
740
+ return f"""
741
+ Given the schema for a Tinybird datasource, return a can you create a clickhouse sql query to generate some random data that matches that schema.
742
+
743
+ Response format MUST be just a valid clickhouse sql query.
744
+
745
+ <example>
746
+ <example_datasource_schema>
747
+ SCHEMA >
748
+ experience_gained Int16 `json:$.experience_gained`,
749
+ level Int16 `json:$.level`,
750
+ monster_kills Int16 `json:$.monster_kills`,
751
+ player_id String `json:$.player_id`,
752
+ pvp_kills Int16 `json:$.pvp_kills`,
753
+ quest_completions Int16 `json:$.quest_completions`,
754
+ timestamp DateTime `json:$.timestamp`
755
+ </example_datasource_schema>
756
+ <example_output>
757
+
758
+ SELECT
759
+ rand() % 1000 AS experience_gained, -- Random experience gained between 0 and 999
760
+ 1 + rand() % 100 AS level, -- Random level between 1 and 100
761
+ rand() % 500 AS monster_kills, -- Random monster kills between 0 and 499
762
+ concat('player_', toString(rand() % 10000)) AS player_id, -- Random player IDs like "player_1234"
763
+ rand() % 50 AS pvp_kills, -- Random PvP kills between 0 and 49
764
+ rand() % 200 AS quest_completions, -- Random quest completions between 0 and 199
765
+ now() - rand() % 86400 AS timestamp -- Random timestamp within the last day
766
+ FROM numbers({rows})
767
+ </example_output>
768
+ </example>
769
+
770
+ <instructions>
771
+ - The query MUST return a random sample of data that matches the schema.
772
+ - The query MUST return a valid clickhouse sql query.
773
+ - The query MUST return a sample of EXACTLY {rows} rows.
774
+ - The query MUST be valid for clickhouse and Tinybird.
775
+ - FROM numbers({rows}) part is mandatory.
776
+ - Do NOT include ```clickhouse or ```sql or any other wrapping text to the sql query.
777
+ - Do NOT use any of these functions: elementAt
778
+ - Do NOT add a semicolon at the end of the query
779
+ - Do NOT add any FORMAT at the end of the query, because it will be added later by Tinybird.
780
+ - General functions supported are: {general_functions}
781
+ - Character insensitive functions supported are: {general_functions_insensitive}
782
+ - Aggregate functions supported are: {aggregate_functions}
783
+ - Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
784
+ - If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
785
+ </instructions>
786
+
787
+ <more_examples>
788
+ # Examples with different schemas, like an array field or a nested JSON field:
789
+
790
+ ## Example schema with an array field:
791
+
792
+ ### Schema:
793
+
794
+ SCHEMA >
795
+ `order_id` UInt64 `json:$.order_id`,
796
+ `customer_id` UInt64 `json:$.customer_id`,
797
+ `order_date` DateTime `json:$.order_date`,
798
+ `total_amount` Float64 `json:$.total_amount`,
799
+ `items` Array(String) `json:$.items[:]` // This is an array field
800
+
801
+ ### Desired final output of the query:
802
+ {{
803
+ "order_id": 123456,
804
+ "customer_id": 7890,
805
+ "order_date": "2024-11-30T10:30:00.000Z",
806
+ "total_amount": 150.0,
807
+ "items": ["item1", "item2", "item3"]
808
+ }}
809
+
810
+ ### Example SQL output with an array field:
811
+
812
+ SELECT
813
+ concat('ord_', toString(rand() % 10000)) AS order_id,
814
+ concat('cust_', toString(rand() % 10000)) AS customer_id,
815
+ now() - rand() % 86400 AS order_date,
816
+ rand() % 1000 AS total_amount,
817
+ arrayMap(x -> concat('item_', toString(x)), range(1, rand() % 5 + 1)) AS items
818
+ FROM numbers(ROWS)
819
+
820
+ ## Example schema with a nested JSON field:
821
+
822
+ ### Schema:
823
+
824
+ SCHEMA >
825
+ `request_id` String `json:$.request_id`,
826
+ `timestamp` DateTime `json:$.timestamp`,
827
+ `model` String `json:$.request.model`,
828
+ `temperature` Float32 `json:$.request.options.temperature`,
829
+ `max_tokens` UInt32 `json:$.request.options.max_tokens`,
830
+ `stream` UInt8 `json:$.request.options.stream`
831
+
832
+ ### Desired final output of the query:
833
+
834
+ Note that the important part is generating the nested fields:
835
+ json:$.request.options.max_tokens > this means that the max_tokens field is nested inside the options field inside the request field.
836
+
837
+ {{
838
+ "request_id": "req_abc123",
839
+ "timestamp": "2024-11-30T10:30:00.000Z",
840
+ "request": {{
841
+ "model": "gpt-4",
842
+ "options": {{
843
+ "temperature": 0.7,
844
+ "max_tokens": 1000,
845
+ "stream": false
846
+ }}
847
+ }}
848
+ }}
849
+
850
+ ### Example SQL output with nested fields:
851
+
852
+ SELECT
853
+ request_id,
854
+ timestamp,
855
+ CAST(concat('{{
856
+ "model": "', model, '",
857
+ "options": {{
858
+ "temperature": ', temperature, ',
859
+ "max_tokens": ', max_tokens, ',
860
+ "stream": ', IF(stream = 1, 'true', 'false'), '
861
+ }}
862
+ }}'), 'JSON') AS request
863
+ FROM
864
+ (
865
+ SELECT
866
+ concat('req_', lower(hex(randomString(6)))) AS request_id,
867
+ (now() - toIntervalDay(rand() % 30)) + toIntervalSecond(rand() % 86400) AS timestamp,
868
+ ['gpt-4', 'gpt-3.5-turbo', 'gpt-4-turbo'][(rand() % 3) + 1] AS model,
869
+ round(rand() / 10, 2) AS temperature,
870
+ 500 + (rand() % 2500) AS max_tokens,
871
+ rand() % 2 AS stream
872
+ FROM numbers(ROWS)
873
+ )
874
+ </more_examples>
875
+
876
+ Follow the instructions and generate the following response with no additional text in the following format:
877
+ <response>
878
+ <sql>[raw sql query here]</sql>
879
+ </response>
880
+ """
tinybird/sql.py CHANGED
@@ -8,6 +8,8 @@ from typing import Any, Dict, Iterable, List, Optional
8
8
  valid_chars_name: str = string.ascii_letters + string.digits + "._`*<>+-'"
9
9
  valid_chars_fn: str = valid_chars_name + "[](),=!?:/ \n\t\r"
10
10
 
11
+ INDEX_WHITELIST = ["minmax", "set", "bloom_filter", "ngrambf_v1", "tokenbf_v1"]
12
+
11
13
 
12
14
  @dataclass
13
15
  class TableIndex:
@@ -37,6 +39,13 @@ class TableIndex:
37
39
  def clear_index_sql(self):
38
40
  return f"CLEAR INDEX IF EXISTS {self.name}"
39
41
 
42
+ def validate_allowed(self):
43
+ """
44
+ Validate at API level not to depend on CLI version
45
+ """
46
+ if not any(index in self.type_full for index in INDEX_WHITELIST):
47
+ raise ValueError(f"Not allowed index '{self.type_full}'")
48
+
40
49
 
41
50
  @dataclass
42
51
  class TableProjection: