tinybird 0.0.1.dev25__py3-none-any.whl → 0.0.1.dev27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tinybird might be problematic. Click here for more details.
- tinybird/config.py +1 -1
- tinybird/datatypes.py +46 -57
- tinybird/git_settings.py +4 -4
- tinybird/prompts.py +644 -0
- tinybird/sql.py +9 -0
- tinybird/sql_toolset.py +17 -3
- tinybird/syncasync.py +1 -1
- tinybird/tb/__cli__.py +2 -2
- tinybird/tb/cli.py +2 -0
- tinybird/tb/modules/build.py +47 -19
- tinybird/tb/modules/build_server.py +75 -0
- tinybird/tb/modules/cli.py +22 -0
- tinybird/tb/modules/common.py +2 -2
- tinybird/tb/modules/config.py +13 -14
- tinybird/tb/modules/create.py +125 -120
- tinybird/tb/modules/datafile/build.py +28 -0
- tinybird/tb/modules/datafile/common.py +1 -0
- tinybird/tb/modules/datafile/fixture.py +10 -6
- tinybird/tb/modules/datafile/parse_pipe.py +2 -0
- tinybird/tb/modules/datasource.py +1 -1
- tinybird/tb/modules/deploy.py +160 -0
- tinybird/tb/modules/llm.py +32 -16
- tinybird/tb/modules/llm_utils.py +24 -0
- tinybird/tb/modules/local.py +2 -2
- tinybird/tb/modules/login.py +8 -6
- tinybird/tb/modules/mock.py +13 -9
- tinybird/tb/modules/test.py +69 -47
- tinybird/tb/modules/watch.py +2 -2
- tinybird/tb_cli_modules/common.py +2 -2
- tinybird/tb_cli_modules/config.py +5 -5
- tinybird/tornado_template.py +1 -3
- {tinybird-0.0.1.dev25.dist-info → tinybird-0.0.1.dev27.dist-info}/METADATA +1 -1
- {tinybird-0.0.1.dev25.dist-info → tinybird-0.0.1.dev27.dist-info}/RECORD +36 -33
- {tinybird-0.0.1.dev25.dist-info → tinybird-0.0.1.dev27.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev25.dist-info → tinybird-0.0.1.dev27.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev25.dist-info → tinybird-0.0.1.dev27.dist-info}/top_level.txt +0 -0
tinybird/prompts.py
CHANGED
|
@@ -1,3 +1,363 @@
|
|
|
1
|
+
general_functions = [
|
|
2
|
+
"BLAKE3",
|
|
3
|
+
"CAST",
|
|
4
|
+
"CHARACTER_LENGTH",
|
|
5
|
+
"CHAR_LENGTH",
|
|
6
|
+
"CRC32",
|
|
7
|
+
"CRC32IEEE",
|
|
8
|
+
"CRC64",
|
|
9
|
+
"DATABASE",
|
|
10
|
+
"DATE",
|
|
11
|
+
"DATE_DIFF",
|
|
12
|
+
"DATE_FORMAT",
|
|
13
|
+
"DATE_TRUNC",
|
|
14
|
+
"DAY",
|
|
15
|
+
"DAYOFMONTH",
|
|
16
|
+
"DAYOFWEEK",
|
|
17
|
+
"DAYOFYEAR",
|
|
18
|
+
"FORMAT_BYTES",
|
|
19
|
+
"FQDN",
|
|
20
|
+
"FROM_BASE64",
|
|
21
|
+
"FROM_DAYS",
|
|
22
|
+
"FROM_UNIXTIME",
|
|
23
|
+
"HOUR",
|
|
24
|
+
"INET6_ATON",
|
|
25
|
+
"INET6_NTOA",
|
|
26
|
+
"INET_ATON",
|
|
27
|
+
"INET_NTOA",
|
|
28
|
+
"IPv4CIDRToRange",
|
|
29
|
+
"IPv4NumToString",
|
|
30
|
+
"IPv4NumToStringClassC",
|
|
31
|
+
"IPv4StringToNum",
|
|
32
|
+
"IPv4StringToNumOrDefault",
|
|
33
|
+
"IPv4StringToNumOrNull",
|
|
34
|
+
"IPv4ToIPv6",
|
|
35
|
+
"IPv6CIDRToRange",
|
|
36
|
+
"IPv6NumToString",
|
|
37
|
+
"IPv6StringToNum",
|
|
38
|
+
"IPv6StringToNumOrDefault",
|
|
39
|
+
"IPv6StringToNumOrNull",
|
|
40
|
+
"JSONArrayLength",
|
|
41
|
+
"JSONExtract",
|
|
42
|
+
"JSONExtractArrayRaw",
|
|
43
|
+
"JSONExtractBool",
|
|
44
|
+
"JSONExtractFloat",
|
|
45
|
+
"JSONExtractInt",
|
|
46
|
+
"JSONExtractKeys",
|
|
47
|
+
"JSONExtractKeysAndValues",
|
|
48
|
+
"JSONExtractKeysAndValuesRaw",
|
|
49
|
+
"JSONExtractRaw",
|
|
50
|
+
"JSONExtractString",
|
|
51
|
+
"JSONExtractUInt",
|
|
52
|
+
"JSONHas",
|
|
53
|
+
"JSONKey",
|
|
54
|
+
"JSONLength",
|
|
55
|
+
"JSONRemoveDynamoDBAnnotations",
|
|
56
|
+
"JSONType",
|
|
57
|
+
"JSON_ARRAY_LENGTH",
|
|
58
|
+
"JSON_EXISTS",
|
|
59
|
+
"JSON_QUERY",
|
|
60
|
+
"JSON_VALUE",
|
|
61
|
+
"L1Distance",
|
|
62
|
+
"L1Norm",
|
|
63
|
+
"L1Normalize",
|
|
64
|
+
"L2Distance",
|
|
65
|
+
"L2Norm",
|
|
66
|
+
"L2Normalize",
|
|
67
|
+
"L2SquaredDistance",
|
|
68
|
+
"L2SquaredNorm",
|
|
69
|
+
"LAST_DAY",
|
|
70
|
+
"LinfDistance",
|
|
71
|
+
"LinfNorm",
|
|
72
|
+
"LinfNormalize",
|
|
73
|
+
"LpDistance",
|
|
74
|
+
"LpNorm",
|
|
75
|
+
"LpNormalize",
|
|
76
|
+
"MACNumToString",
|
|
77
|
+
"MACStringToNum",
|
|
78
|
+
"MACStringToOUI",
|
|
79
|
+
"MAP_FROM_ARRAYS",
|
|
80
|
+
"MD4",
|
|
81
|
+
"MD5",
|
|
82
|
+
"MILLISECOND",
|
|
83
|
+
"MINUTE",
|
|
84
|
+
"MONTH",
|
|
85
|
+
"OCTET_LENGTH",
|
|
86
|
+
"QUARTER",
|
|
87
|
+
"REGEXP_EXTRACT",
|
|
88
|
+
"REGEXP_MATCHES",
|
|
89
|
+
"REGEXP_REPLACE",
|
|
90
|
+
"SCHEMA",
|
|
91
|
+
"SECOND",
|
|
92
|
+
"SHA1",
|
|
93
|
+
"SHA224",
|
|
94
|
+
"SHA256",
|
|
95
|
+
"SHA384",
|
|
96
|
+
"SHA512",
|
|
97
|
+
"SHA512_256",
|
|
98
|
+
"SUBSTRING_INDEX",
|
|
99
|
+
"SVG",
|
|
100
|
+
"TIMESTAMP_DIFF",
|
|
101
|
+
"TO_BASE64",
|
|
102
|
+
"TO_DAYS",
|
|
103
|
+
"TO_UNIXTIME",
|
|
104
|
+
"ULIDStringToDateTime",
|
|
105
|
+
"URLHash",
|
|
106
|
+
"URLHierarchy",
|
|
107
|
+
"URLPathHierarchy",
|
|
108
|
+
"UTCTimestamp",
|
|
109
|
+
"UTC_timestamp",
|
|
110
|
+
"UUIDNumToString",
|
|
111
|
+
"UUIDStringToNum",
|
|
112
|
+
"UUIDToNum",
|
|
113
|
+
"UUIDv7ToDateTime",
|
|
114
|
+
"YEAR",
|
|
115
|
+
"YYYYMMDDToDate",
|
|
116
|
+
"YYYYMMDDToDate32",
|
|
117
|
+
"YYYYMMDDhhmmssToDateTime",
|
|
118
|
+
"YYYYMMDDhhmmssToDateTime64",
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
general_functions_insensitive = [
|
|
122
|
+
"cast",
|
|
123
|
+
"character_length",
|
|
124
|
+
"char_length",
|
|
125
|
+
"crc32",
|
|
126
|
+
"crc32ieee",
|
|
127
|
+
"crc64",
|
|
128
|
+
"database",
|
|
129
|
+
"date",
|
|
130
|
+
"date_format",
|
|
131
|
+
"date_trunc",
|
|
132
|
+
"day",
|
|
133
|
+
"dayofmonth",
|
|
134
|
+
"dayofweek",
|
|
135
|
+
"dayofyear",
|
|
136
|
+
"format_bytes",
|
|
137
|
+
"fqdn",
|
|
138
|
+
"from_base64",
|
|
139
|
+
"from_days",
|
|
140
|
+
"from_unixtime",
|
|
141
|
+
"hour",
|
|
142
|
+
"inet6_aton",
|
|
143
|
+
"inet6_ntoa",
|
|
144
|
+
"inet_aton",
|
|
145
|
+
"inet_ntoa",
|
|
146
|
+
"json_array_length",
|
|
147
|
+
"last_day",
|
|
148
|
+
"millisecond",
|
|
149
|
+
"minute",
|
|
150
|
+
"month",
|
|
151
|
+
"octet_length",
|
|
152
|
+
"quarter",
|
|
153
|
+
"regexp_extract",
|
|
154
|
+
"regexp_matches",
|
|
155
|
+
"regexp_replace",
|
|
156
|
+
"schema",
|
|
157
|
+
"second",
|
|
158
|
+
"substring_index",
|
|
159
|
+
"to_base64",
|
|
160
|
+
"to_days",
|
|
161
|
+
"to_unixtime",
|
|
162
|
+
"utctimestamp",
|
|
163
|
+
"utc_timestamp",
|
|
164
|
+
"year",
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
aggregate_functions = [
|
|
168
|
+
"BIT_AND",
|
|
169
|
+
"BIT_OR",
|
|
170
|
+
"BIT_XOR",
|
|
171
|
+
"COVAR_POP",
|
|
172
|
+
"COVAR_SAMP",
|
|
173
|
+
"STD",
|
|
174
|
+
"STDDEV_POP",
|
|
175
|
+
"STDDEV_SAMP",
|
|
176
|
+
"VAR_POP",
|
|
177
|
+
"VAR_SAMP",
|
|
178
|
+
"aggThrow",
|
|
179
|
+
"analysisOfVariance",
|
|
180
|
+
"anova",
|
|
181
|
+
"any",
|
|
182
|
+
"anyHeavy",
|
|
183
|
+
"anyLast",
|
|
184
|
+
"anyLast_respect_nulls",
|
|
185
|
+
"any_respect_nulls",
|
|
186
|
+
"any_value",
|
|
187
|
+
"any_value_respect_nulls",
|
|
188
|
+
"approx_top_count",
|
|
189
|
+
"approx_top_k",
|
|
190
|
+
"approx_top_sum",
|
|
191
|
+
"argMax",
|
|
192
|
+
"argMin",
|
|
193
|
+
"array_agg",
|
|
194
|
+
"array_concat_agg",
|
|
195
|
+
"avg",
|
|
196
|
+
"avgWeighted",
|
|
197
|
+
"boundingRatio",
|
|
198
|
+
"categoricalInformationValue",
|
|
199
|
+
"contingency",
|
|
200
|
+
"corr",
|
|
201
|
+
"corrMatrix",
|
|
202
|
+
"corrStable",
|
|
203
|
+
"count",
|
|
204
|
+
"covarPop",
|
|
205
|
+
"covarPopMatrix",
|
|
206
|
+
"covarPopStable",
|
|
207
|
+
"covarSamp",
|
|
208
|
+
"covarSampMatrix",
|
|
209
|
+
"covarSampStable",
|
|
210
|
+
"cramersV",
|
|
211
|
+
"cramersVBiasCorrected",
|
|
212
|
+
"deltaSum",
|
|
213
|
+
"deltaSumTimestamp",
|
|
214
|
+
"dense_rank",
|
|
215
|
+
"entropy",
|
|
216
|
+
"exponentialMovingAverage",
|
|
217
|
+
"exponentialTimeDecayedAvg",
|
|
218
|
+
"exponentialTimeDecayedCount",
|
|
219
|
+
"exponentialTimeDecayedMax",
|
|
220
|
+
"exponentialTimeDecayedSum",
|
|
221
|
+
"first_value",
|
|
222
|
+
"first_value_respect_nulls",
|
|
223
|
+
"flameGraph",
|
|
224
|
+
"groupArray",
|
|
225
|
+
"groupArrayInsertAt",
|
|
226
|
+
"groupArrayIntersect",
|
|
227
|
+
"groupArrayLast",
|
|
228
|
+
"groupArrayMovingAvg",
|
|
229
|
+
"groupArrayMovingSum",
|
|
230
|
+
"groupArraySample",
|
|
231
|
+
"groupArraySorted",
|
|
232
|
+
"groupBitAnd",
|
|
233
|
+
"groupBitOr",
|
|
234
|
+
"groupBitXor",
|
|
235
|
+
"groupBitmap",
|
|
236
|
+
"groupBitmapAnd",
|
|
237
|
+
"groupBitmapOr",
|
|
238
|
+
"groupBitmapXor",
|
|
239
|
+
"groupUniqArray",
|
|
240
|
+
"histogram",
|
|
241
|
+
"intervalLengthSum",
|
|
242
|
+
"kolmogorovSmirnovTest",
|
|
243
|
+
"kurtPop",
|
|
244
|
+
"kurtSamp",
|
|
245
|
+
"lagInFrame",
|
|
246
|
+
"largestTriangleThreeBuckets",
|
|
247
|
+
"last_value",
|
|
248
|
+
"last_value_respect_nulls",
|
|
249
|
+
"leadInFrame",
|
|
250
|
+
"lttb",
|
|
251
|
+
"mannWhitneyUTest",
|
|
252
|
+
"max",
|
|
253
|
+
"maxIntersections",
|
|
254
|
+
"maxIntersectionsPosition",
|
|
255
|
+
"maxMappedArrays",
|
|
256
|
+
"meanZTest",
|
|
257
|
+
"median",
|
|
258
|
+
"medianBFloat16",
|
|
259
|
+
"medianBFloat16Weighted",
|
|
260
|
+
"medianDD",
|
|
261
|
+
"medianDeterministic",
|
|
262
|
+
"medianExact",
|
|
263
|
+
"medianExactHigh",
|
|
264
|
+
"medianExactLow",
|
|
265
|
+
"medianExactWeighted",
|
|
266
|
+
"medianGK",
|
|
267
|
+
"medianInterpolatedWeighted",
|
|
268
|
+
"medianTDigest",
|
|
269
|
+
"medianTDigestWeighted",
|
|
270
|
+
"medianTiming",
|
|
271
|
+
"medianTimingWeighted",
|
|
272
|
+
"min",
|
|
273
|
+
"minMappedArrays",
|
|
274
|
+
"nonNegativeDerivative",
|
|
275
|
+
"nothing",
|
|
276
|
+
"nothingNull",
|
|
277
|
+
"nothingUInt64",
|
|
278
|
+
"nth_value",
|
|
279
|
+
"ntile",
|
|
280
|
+
"quantile",
|
|
281
|
+
"quantileBFloat16",
|
|
282
|
+
"quantileBFloat16Weighted",
|
|
283
|
+
"quantileDD",
|
|
284
|
+
"quantileDeterministic",
|
|
285
|
+
"quantileExact",
|
|
286
|
+
"quantileExactExclusive",
|
|
287
|
+
"quantileExactHigh",
|
|
288
|
+
"quantileExactInclusive",
|
|
289
|
+
"quantileExactLow",
|
|
290
|
+
"quantileExactWeighted",
|
|
291
|
+
"quantileGK",
|
|
292
|
+
"quantileInterpolatedWeighted",
|
|
293
|
+
"quantileTDigest",
|
|
294
|
+
"quantileTDigestWeighted",
|
|
295
|
+
"quantileTiming",
|
|
296
|
+
"quantileTimingWeighted",
|
|
297
|
+
"quantiles",
|
|
298
|
+
"quantilesBFloat16",
|
|
299
|
+
"quantilesBFloat16Weighted",
|
|
300
|
+
"quantilesDD",
|
|
301
|
+
"quantilesDeterministic",
|
|
302
|
+
"quantilesExact",
|
|
303
|
+
"quantilesExactExclusive",
|
|
304
|
+
"quantilesExactHigh",
|
|
305
|
+
"quantilesExactInclusive",
|
|
306
|
+
"quantilesExactLow",
|
|
307
|
+
"quantilesExactWeighted",
|
|
308
|
+
"quantilesGK",
|
|
309
|
+
"quantilesInterpolatedWeighted",
|
|
310
|
+
"quantilesTDigest",
|
|
311
|
+
"quantilesTDigestWeighted",
|
|
312
|
+
"quantilesTiming",
|
|
313
|
+
"quantilesTimingWeighted",
|
|
314
|
+
"rank",
|
|
315
|
+
"rankCorr",
|
|
316
|
+
"retention",
|
|
317
|
+
"row_number",
|
|
318
|
+
"sequenceCount",
|
|
319
|
+
"sequenceMatch",
|
|
320
|
+
"sequenceNextNode",
|
|
321
|
+
"simpleLinearRegression",
|
|
322
|
+
"singleValueOrNull",
|
|
323
|
+
"skewPop",
|
|
324
|
+
"skewSamp",
|
|
325
|
+
"sparkBar",
|
|
326
|
+
"sparkbar",
|
|
327
|
+
"stddevPop",
|
|
328
|
+
"stddevPopStable",
|
|
329
|
+
"stddevSamp",
|
|
330
|
+
"stddevSampStable",
|
|
331
|
+
"stochasticLinearRegression",
|
|
332
|
+
"stochasticLogisticRegression",
|
|
333
|
+
"studentTTest",
|
|
334
|
+
"sum",
|
|
335
|
+
"sumCount",
|
|
336
|
+
"sumKahan",
|
|
337
|
+
"sumMapFiltered",
|
|
338
|
+
"sumMapFilteredWithOverflow",
|
|
339
|
+
"sumMapWithOverflow",
|
|
340
|
+
"sumMappedArrays",
|
|
341
|
+
"sumWithOverflow",
|
|
342
|
+
"theilsU",
|
|
343
|
+
"topK",
|
|
344
|
+
"topKWeighted",
|
|
345
|
+
"uniq",
|
|
346
|
+
"uniqCombined",
|
|
347
|
+
"uniqCombined64",
|
|
348
|
+
"uniqExact",
|
|
349
|
+
"uniqHLL12",
|
|
350
|
+
"uniqTheta",
|
|
351
|
+
"uniqUpTo",
|
|
352
|
+
"varPop",
|
|
353
|
+
"varPopStable",
|
|
354
|
+
"varSamp",
|
|
355
|
+
"varSampStable",
|
|
356
|
+
"welchTTest",
|
|
357
|
+
"windowFunnel",
|
|
358
|
+
]
|
|
359
|
+
|
|
360
|
+
|
|
1
361
|
create_project_prompt = """
|
|
2
362
|
You are a Tinybird expert. You will be given a prompt describing a data project and you will generate all the associated datasources and pipes.
|
|
3
363
|
<datasource>
|
|
@@ -231,3 +591,287 @@ You are a Tinybird expert. You will be given a pipe endpoint containing differen
|
|
|
231
591
|
- Extra context: {prompt}
|
|
232
592
|
</instructions>
|
|
233
593
|
"""
|
|
594
|
+
|
|
595
|
+
test_create_prompt = """
|
|
596
|
+
You are a Tinybird expert. You will be given a pipe containing different nodes with SQL and Tinybird templating syntax. You will generate URLs to test it with different parameters combinations.
|
|
597
|
+
<pipe>
|
|
598
|
+
<name>{name}</name>
|
|
599
|
+
<content>{content}</content>
|
|
600
|
+
<parameters>{parameters}</parameters>
|
|
601
|
+
</pipe>
|
|
602
|
+
|
|
603
|
+
<instructions>
|
|
604
|
+
- Every test name must be unique.
|
|
605
|
+
- The test command must be a valid Tinybird command that can be run in the terminal.
|
|
606
|
+
- The test command can have as many parameters as are needed to test the pipe.
|
|
607
|
+
- The parameter within Tinybird templating syntax looks like this one {{String(my_param_name, default_value)}}.
|
|
608
|
+
- If there are no parameters, you can omit parameters and generate a single test command.
|
|
609
|
+
- The format of the parameters is the following: ?param1=value1¶m2=value2¶m3=value3
|
|
610
|
+
</instructions>
|
|
611
|
+
|
|
612
|
+
Follow the instructions and generate the following response with no additional text:
|
|
613
|
+
|
|
614
|
+
<response>
|
|
615
|
+
<test>
|
|
616
|
+
<name>[test name here]</name>
|
|
617
|
+
<description>[test description here]</description>
|
|
618
|
+
<parameters>[parameters here]</parameters>
|
|
619
|
+
</test>
|
|
620
|
+
</response>
|
|
621
|
+
"""
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def create_prompt(existing_resources: str) -> str:
|
|
625
|
+
return """
|
|
626
|
+
You are a Tinybird expert. You will be given a prompt to generate Tinybird resources: datasources and/or pipes.
|
|
627
|
+
<existing_resources>
|
|
628
|
+
{existing_resources}
|
|
629
|
+
</existing_resources>
|
|
630
|
+
<instructions>
|
|
631
|
+
- The datasource names must be unique.
|
|
632
|
+
- The pipe names must be unique.
|
|
633
|
+
- The datasource will be the landing table for the data.
|
|
634
|
+
- The SQL query must be a valid ClickHouse SQL query that mixes ClickHouse syntax and Tinybird templating syntax (Tornado templating language under the hood).
|
|
635
|
+
- SQL queries with parameters must start with "%" character and a newline on top of every query to be able to use the parameters. Examples:
|
|
636
|
+
<invalid_query_with_parameters_no_%_on_top>
|
|
637
|
+
SELECT * FROM events WHERE session_id={{{{String(my_param, "default_value")}}}}
|
|
638
|
+
</invalid_query_with_parameters_no_%_on_top>
|
|
639
|
+
<valid_query_with_parameters_with_%_on_top>
|
|
640
|
+
%
|
|
641
|
+
SELECT * FROM events WHERE session_id={{{{String(my_param, "default_value")}}}}
|
|
642
|
+
</valid_query_with_parameters_with_%_on_top>
|
|
643
|
+
- The Parameter functions like this one {{{{String(my_param_name,default_value)}}}} can be one of the following: String, DateTime, Date, Float32, Float64, Int, Integer, UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
|
|
644
|
+
- Parameter names must be different from column names. Pass always the param name and a default value to the function.
|
|
645
|
+
- Code inside the template {{{{template_expression}}}} follows the rules of Tornado templating language so no module is allowed to be imported. So for example you can't use now() as default value for a DateTime parameter. You need an if else block like this:
|
|
646
|
+
<invalid_condition_with_now>
|
|
647
|
+
AND timestamp BETWEEN {{DateTime(start_date, now() - interval 30 day)}} AND {{DateTime(end_date, now())}}
|
|
648
|
+
</invalid_condition_with_now>
|
|
649
|
+
<valid_condition_without_now>
|
|
650
|
+
{{%if not defined(start_date)%}}
|
|
651
|
+
timestamp BETWEEN now() - interval 30 day
|
|
652
|
+
{{%else%}}
|
|
653
|
+
timestamp BETWEEN {{{{DateTime(start_date)}}}}
|
|
654
|
+
{{%end%}}
|
|
655
|
+
{{%if not defined(end_date)%}}
|
|
656
|
+
AND now()
|
|
657
|
+
{{%else%}}
|
|
658
|
+
AND {{{{DateTime(end_date)}}}}
|
|
659
|
+
{{%end%}}
|
|
660
|
+
</valid_condition_without_now>
|
|
661
|
+
- Nodes can't have the same exact name as the Pipe they belong to.
|
|
662
|
+
- Endpoints can export Prometehus format, Node sql must have name two columns:
|
|
663
|
+
name (String): The name of the metric
|
|
664
|
+
value (Number): The numeric value for the metric.
|
|
665
|
+
and then some optional columns:
|
|
666
|
+
help (String): A description of the metric.
|
|
667
|
+
timestamp (Number): A Unix timestamp for the metric.
|
|
668
|
+
type (String): Defines the metric type (counter, gauge, histogram, summary, untyped, or empty).
|
|
669
|
+
labels (Map(String, String)): A set of key-value pairs providing metric dimensions.
|
|
670
|
+
- Use prometheus format when you are asked to monitor something
|
|
671
|
+
- Nodes do NOT use the same name as the Pipe they belong to. So if the pipe name is "my_pipe", the nodes must be named "my_pipe_node_1", "my_pipe_node_2", etc.
|
|
672
|
+
- If you use some sql function, use just Clickhouse and Tinybird compatible functions.
|
|
673
|
+
- Definition settings does not have indentation.
|
|
674
|
+
</instructions>
|
|
675
|
+
<sql_instructions>
|
|
676
|
+
- Use datasource names as table names when doing SELECT statements.
|
|
677
|
+
- Use node names as table names only when nodes are present in the same file.
|
|
678
|
+
- Do not reference the current node name in the SQL.
|
|
679
|
+
- SQL queries only accept SELECT statements with conditions, aggregations, joins, etc.
|
|
680
|
+
- General functions supported are: {general_functions}
|
|
681
|
+
- Character insensitive functions supported are: {general_functions_insensitive}
|
|
682
|
+
- Aggregate functions supported are: {aggregate_functions}
|
|
683
|
+
- Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
|
|
684
|
+
- If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
|
|
685
|
+
- When aliasing a column, use first the column name and then the alias.
|
|
686
|
+
</sql_instructions>
|
|
687
|
+
<datasource_file_instructions>
|
|
688
|
+
- No indentation is allowed for property names: DESCRIPTION, SCHEMA, ENGINE, ENGINE_PARTITION_KEY, ENGINE_SORTING_KEY, etc.
|
|
689
|
+
<datasource_file_instructions>
|
|
690
|
+
<pipe_file_instructions>
|
|
691
|
+
- No indentation is allowed for property names: DESCRIPTION, NODE, SQL, TYPE, etc.
|
|
692
|
+
<pipe_file_instructions>
|
|
693
|
+
<response>
|
|
694
|
+
<resource>
|
|
695
|
+
<type>[datasource or pipe]</type>
|
|
696
|
+
<name>[resource name here]</name>
|
|
697
|
+
<content>[resource content here]</content>
|
|
698
|
+
</resource>
|
|
699
|
+
</response>
|
|
700
|
+
|
|
701
|
+
<datasource_content>
|
|
702
|
+
DESCRIPTION >
|
|
703
|
+
Some meaningful description of the datasource
|
|
704
|
+
|
|
705
|
+
SCHEMA >
|
|
706
|
+
`column_name_1` clickhouse_tinybird_compatible_data_type `json:$.column_name_1`,
|
|
707
|
+
`column_name_2` clickhouse_tinybird_compatible_data_type `json:$.column_name_2`,
|
|
708
|
+
...
|
|
709
|
+
`column_name_n` clickhouse_tinybird_compatible_data_type `json:$.column_name_n`
|
|
710
|
+
|
|
711
|
+
ENGINE "MergeTree"
|
|
712
|
+
ENGINE_PARTITION_KEY "partition_key"
|
|
713
|
+
ENGINE_SORTING_KEY "sorting_key_1, sorting_key_2, ..."
|
|
714
|
+
</datasource_content>
|
|
715
|
+
<pipe_content>
|
|
716
|
+
DESCRIPTION >
|
|
717
|
+
Some meaningful description of the pipe
|
|
718
|
+
|
|
719
|
+
NODE node_1
|
|
720
|
+
SQL >
|
|
721
|
+
sql_query_using_clickhouse_syntax_and_tinybird_templating_syntax
|
|
722
|
+
|
|
723
|
+
...
|
|
724
|
+
|
|
725
|
+
NODE node_n
|
|
726
|
+
SQL >
|
|
727
|
+
sql_query_using_clickhouse_syntax_and_tinybird_templating_syntax
|
|
728
|
+
</pipe_content>
|
|
729
|
+
""".format(
|
|
730
|
+
existing_resources=existing_resources,
|
|
731
|
+
general_functions=general_functions,
|
|
732
|
+
general_functions_insensitive=general_functions_insensitive,
|
|
733
|
+
aggregate_functions=aggregate_functions,
|
|
734
|
+
)
|
|
735
|
+
|
|
736
|
+
|
|
737
|
+
def mock_prompt(rows: int) -> str:
|
|
738
|
+
return f"""
|
|
739
|
+
Given the schema for a Tinybird datasource, return a can you create a clickhouse sql query to generate some random data that matches that schema.
|
|
740
|
+
|
|
741
|
+
Response format MUST be just a valid clickhouse sql query.
|
|
742
|
+
|
|
743
|
+
<example>
|
|
744
|
+
<example_input>
|
|
745
|
+
SCHEMA >
|
|
746
|
+
experience_gained Int16 `json:$.experience_gained`,
|
|
747
|
+
level Int16 `json:$.level`,
|
|
748
|
+
monster_kills Int16 `json:$.monster_kills`,
|
|
749
|
+
player_id String `json:$.player_id`,
|
|
750
|
+
pvp_kills Int16 `json:$.pvp_kills`,
|
|
751
|
+
quest_completions Int16 `json:$.quest_completions`,
|
|
752
|
+
timestamp DateTime `json:$.timestamp`
|
|
753
|
+
</example_input>
|
|
754
|
+
<example_output>
|
|
755
|
+
|
|
756
|
+
SELECT
|
|
757
|
+
rand() % 1000 AS experience_gained, -- Random experience gained between 0 and 999
|
|
758
|
+
1 + rand() % 100 AS level, -- Random level between 1 and 100
|
|
759
|
+
rand() % 500 AS monster_kills, -- Random monster kills between 0 and 499
|
|
760
|
+
concat('player_', toString(rand() % 10000)) AS player_id, -- Random player IDs like "player_1234"
|
|
761
|
+
rand() % 50 AS pvp_kills, -- Random PvP kills between 0 and 49
|
|
762
|
+
rand() % 200 AS quest_completions, -- Random quest completions between 0 and 199
|
|
763
|
+
now() - rand() % 86400 AS timestamp -- Random timestamp within the last day
|
|
764
|
+
FROM numbers({rows})
|
|
765
|
+
</example_output>
|
|
766
|
+
</example>
|
|
767
|
+
|
|
768
|
+
<instructions>
|
|
769
|
+
- The query MUST return a random sample of data that matches the schema.
|
|
770
|
+
- The query MUST return a valid clickhouse sql query.
|
|
771
|
+
- The query MUST return a sample of EXACTLY {rows} rows.
|
|
772
|
+
- The query MUST be valid for clickhouse and Tinybird.
|
|
773
|
+
- Return JUST the sql query, without any other text or symbols.
|
|
774
|
+
- Do NOT include ```clickhouse or ```sql or any other wrapping text.
|
|
775
|
+
- Do NOT use any of these functions: elementAt
|
|
776
|
+
- Do NOT add a semicolon at the end of the query
|
|
777
|
+
- Do NOT add any FORMAT at the end of the query, because it will be added later by Tinybird.
|
|
778
|
+
- General functions supported are: {general_functions}
|
|
779
|
+
- Character insensitive functions supported are: {general_functions_insensitive}
|
|
780
|
+
- Aggregate functions supported are: {aggregate_functions}
|
|
781
|
+
- Do not use any function that is not present in the list of general functions, character insensitive functions and aggregate functions.
|
|
782
|
+
- If the function is not present in the list, the sql query will fail, so avoid at all costs to use any function that is not present in the list.
|
|
783
|
+
</instructions>
|
|
784
|
+
|
|
785
|
+
<more_examples>
|
|
786
|
+
# Examples with different schemas, like an array field or a nested JSON field:
|
|
787
|
+
|
|
788
|
+
## Example schema with an array field:
|
|
789
|
+
|
|
790
|
+
### Schema:
|
|
791
|
+
|
|
792
|
+
SCHEMA >
|
|
793
|
+
`order_id` UInt64 `json:$.order_id`,
|
|
794
|
+
`customer_id` UInt64 `json:$.customer_id`,
|
|
795
|
+
`order_date` DateTime `json:$.order_date`,
|
|
796
|
+
`total_amount` Float64 `json:$.total_amount`,
|
|
797
|
+
`items` Array(String) `json:$.items[:]` // This is an array field
|
|
798
|
+
|
|
799
|
+
### Desired final output of the query:
|
|
800
|
+
{{
|
|
801
|
+
"order_id": 123456,
|
|
802
|
+
"customer_id": 7890,
|
|
803
|
+
"order_date": "2024-11-30T10:30:00.000Z",
|
|
804
|
+
"total_amount": 150.0,
|
|
805
|
+
"items": ["item1", "item2", "item3"]
|
|
806
|
+
}}
|
|
807
|
+
|
|
808
|
+
### Example SQL output with an array field:
|
|
809
|
+
|
|
810
|
+
SELECT
|
|
811
|
+
concat('ord_', toString(rand() % 10000)) AS order_id,
|
|
812
|
+
concat('cust_', toString(rand() % 10000)) AS customer_id,
|
|
813
|
+
now() - rand() % 86400 AS order_date,
|
|
814
|
+
rand() % 1000 AS total_amount,
|
|
815
|
+
arrayMap(x -> concat('item_', toString(x)), range(1, rand() % 5 + 1)) AS items
|
|
816
|
+
FROM numbers(ROWS)
|
|
817
|
+
|
|
818
|
+
## Example schema with a nested JSON field:
|
|
819
|
+
|
|
820
|
+
### Schema:
|
|
821
|
+
|
|
822
|
+
SCHEMA >
|
|
823
|
+
`request_id` String `json:$.request_id`,
|
|
824
|
+
`timestamp` DateTime `json:$.timestamp`,
|
|
825
|
+
`model` String `json:$.request.model`,
|
|
826
|
+
`temperature` Float32 `json:$.request.options.temperature`,
|
|
827
|
+
`max_tokens` UInt32 `json:$.request.options.max_tokens`,
|
|
828
|
+
`stream` UInt8 `json:$.request.options.stream`
|
|
829
|
+
|
|
830
|
+
### Desired final output of the query:
|
|
831
|
+
|
|
832
|
+
Note that the important part is generating the nested fields:
|
|
833
|
+
json:$.request.options.max_tokens > this means that the max_tokens field is nested inside the options field inside the request field.
|
|
834
|
+
|
|
835
|
+
{{
|
|
836
|
+
"request_id": "req_abc123",
|
|
837
|
+
"timestamp": "2024-11-30T10:30:00.000Z",
|
|
838
|
+
"request": {{
|
|
839
|
+
"model": "gpt-4",
|
|
840
|
+
"options": {{
|
|
841
|
+
"temperature": 0.7,
|
|
842
|
+
"max_tokens": 1000,
|
|
843
|
+
"stream": false
|
|
844
|
+
}}
|
|
845
|
+
}}
|
|
846
|
+
}}
|
|
847
|
+
|
|
848
|
+
### Example SQL output with nested fields:
|
|
849
|
+
|
|
850
|
+
SELECT
|
|
851
|
+
request_id,
|
|
852
|
+
timestamp,
|
|
853
|
+
CAST(concat('{{
|
|
854
|
+
"model": "', model, '",
|
|
855
|
+
"options": {{
|
|
856
|
+
"temperature": ', temperature, ',
|
|
857
|
+
"max_tokens": ', max_tokens, ',
|
|
858
|
+
"stream": ', IF(stream = 1, 'true', 'false'), '
|
|
859
|
+
}}
|
|
860
|
+
}}'), 'JSON') AS request
|
|
861
|
+
FROM
|
|
862
|
+
(
|
|
863
|
+
SELECT
|
|
864
|
+
concat('req_', lower(hex(randomString(6)))) AS request_id,
|
|
865
|
+
(now() - toIntervalDay(rand() % 30)) + toIntervalSecond(rand() % 86400) AS timestamp,
|
|
866
|
+
['gpt-4', 'gpt-3.5-turbo', 'gpt-4-turbo'][(rand() % 3) + 1] AS model,
|
|
867
|
+
round(rand() / 10, 2) AS temperature,
|
|
868
|
+
500 + (rand() % 2500) AS max_tokens,
|
|
869
|
+
rand() % 2 AS stream
|
|
870
|
+
FROM numbers(ROWS)
|
|
871
|
+
)
|
|
872
|
+
</more_examples>
|
|
873
|
+
Follow the instructions and generate the following response with no additional text:
|
|
874
|
+
<response>
|
|
875
|
+
<sql>[sql query here]</sql>
|
|
876
|
+
</response>
|
|
877
|
+
"""
|
tinybird/sql.py
CHANGED
|
@@ -8,6 +8,8 @@ from typing import Any, Dict, Iterable, List, Optional
|
|
|
8
8
|
valid_chars_name: str = string.ascii_letters + string.digits + "._`*<>+-'"
|
|
9
9
|
valid_chars_fn: str = valid_chars_name + "[](),=!?:/ \n\t\r"
|
|
10
10
|
|
|
11
|
+
INDEX_WHITELIST = ["minmax", "set", "bloom_filter", "ngrambf_v1", "tokenbf_v1"]
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
@dataclass
|
|
13
15
|
class TableIndex:
|
|
@@ -37,6 +39,13 @@ class TableIndex:
|
|
|
37
39
|
def clear_index_sql(self):
|
|
38
40
|
return f"CLEAR INDEX IF EXISTS {self.name}"
|
|
39
41
|
|
|
42
|
+
def validate_allowed(self):
|
|
43
|
+
"""
|
|
44
|
+
Validate at API level not to depend on CLI version
|
|
45
|
+
"""
|
|
46
|
+
if not any(index in self.type_full for index in INDEX_WHITELIST):
|
|
47
|
+
raise ValueError(f"Not allowed index '{self.type_full}'")
|
|
48
|
+
|
|
40
49
|
|
|
41
50
|
@dataclass
|
|
42
51
|
class TableProjection:
|