tinybird 0.0.1.dev285__py3-none-any.whl → 0.0.1.dev287__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tinybird might be problematic. Click here for more details.

@@ -0,0 +1,911 @@
1
+ """
2
+ Service datasources definitions and utilities.
3
+
4
+ This module provides access to predefined service datasources and their schemas
5
+ for both Tinybird and Organization scopes.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional
9
+
10
+
11
+ def get_tinybird_service_datasources() -> List[Dict[str, Any]]:
12
+ """
13
+ Get all Tinybird-specific service datasources.
14
+
15
+ Returns:
16
+ List[Dict[str, Any]]: A list of Tinybird service datasources.
17
+ """
18
+ return [
19
+ {
20
+ "name": "tinybird.pipe_stats_rt",
21
+ "description": "Contains information about all requests made to your API endpoints in real time. This data source has a TTL of 7 days. If you need to query data older than 7 days you must use the aggregated by day data available at tinybird.pipe_stats.",
22
+ "dateColumn": "start_datetime",
23
+ "engine": {
24
+ "engine": "MergeTree",
25
+ "sorting_key": "pipe_id, start_datetime",
26
+ "ttl": "start_datetime + toIntervalDay(7)",
27
+ },
28
+ "columns": [
29
+ {"name": "start_datetime", "type": "DateTime"},
30
+ {"name": "pipe_id", "type": "String"},
31
+ {"name": "pipe_name", "type": "String"},
32
+ {"name": "token", "type": "String"},
33
+ {"name": "token_name", "type": "String"},
34
+ {"name": "duration", "type": "Float32"},
35
+ {"name": "read_bytes", "type": "UInt64"},
36
+ {"name": "read_rows", "type": "UInt64"},
37
+ {"name": "cpu_time", "type": "Float32"},
38
+ {"name": "url", "type": "Nullable(String)"},
39
+ {"name": "error", "type": "UInt8"},
40
+ {"name": "status_code", "type": "Int32"},
41
+ {"name": "request_id", "type": "String"},
42
+ {"name": "parameters", "type": "Map(String, String)"},
43
+ {"name": "method", "type": "String"},
44
+ {"name": "release", "type": "String"},
45
+ {"name": "user_agent", "type": "Nullable(String)"},
46
+ {"name": "resource_tags", "type": "Array(String)"},
47
+ {"name": "memory_usage", "type": "UInt64"},
48
+ ],
49
+ },
50
+ {
51
+ "name": "tinybird.pipe_stats",
52
+ "description": "Aggregates the request stats in tinybird.pipe_stats_rt by day.",
53
+ "dateColumn": "date",
54
+ "engine": {
55
+ "engine": "SummingMergeTree",
56
+ "sorting_key": "pipe_id, date",
57
+ "partition_key": "toYYYYMM(date)",
58
+ },
59
+ "columns": [
60
+ {"name": "date", "type": "DateTime"},
61
+ {"name": "pipe_id", "type": "String"},
62
+ {"name": "pipe_name", "type": "String"},
63
+ {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
64
+ {"name": "error_count", "type": "UInt64"},
65
+ {"name": "view_count", "type": "UInt64"},
66
+ {
67
+ "name": "quantile_timing_state",
68
+ "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
69
+ },
70
+ {"name": "read_bytes_sum", "type": "UInt64"},
71
+ {"name": "read_rows_sum", "type": "UInt64"},
72
+ {"name": "cpu_time_sum", "type": "Float64"},
73
+ {"name": "resource_tags", "type": "Array(String)"},
74
+ ],
75
+ },
76
+ {
77
+ "name": "tinybird.block_log",
78
+ "description": "The data source contains details about how Tinybird ingests data into your data sources. You can use this Service data source to spot problematic parts of your data.",
79
+ "dateColumn": "timestamp",
80
+ "engine": {
81
+ "engine": "MergeTree",
82
+ "sorting_key": "timestamp, cityHash64(datasource_name)",
83
+ "partition_key": "toYear(timestamp)",
84
+ "sampling_key": "cityHash64(datasource_name)",
85
+ },
86
+ "columns": [
87
+ {"name": "timestamp", "type": "DateTime"},
88
+ {"name": "request_id", "type": "String"},
89
+ {"name": "import_id", "type": "String"},
90
+ {"name": "job_id", "type": "Nullable(String)"},
91
+ {"name": "source", "type": "String"},
92
+ {"name": "token_id", "type": "String"},
93
+ {"name": "block_id", "type": "String"},
94
+ {"name": "status", "type": "String"},
95
+ {"name": "user_id", "type": "String"},
96
+ {"name": "user_mail", "type": "String"},
97
+ {"name": "datasource_id", "type": "String"},
98
+ {"name": "datasource_name", "type": "String"},
99
+ {"name": "start_offset", "type": "Nullable(Int64)"},
100
+ {"name": "end_offset", "type": "Nullable(Int64)"},
101
+ {"name": "rows", "type": "Nullable(Int32)"},
102
+ {"name": "parser", "type": "Nullable(String)"},
103
+ {"name": "quarantine_lines", "type": "Nullable(UInt32)"},
104
+ {"name": "empty_lines", "type": "Nullable(UInt32)"},
105
+ {"name": "bytes", "type": "Nullable(UInt32)"},
106
+ {"name": "processing_time", "type": "Nullable(Float32)"},
107
+ {"name": "processing_error", "type": "Nullable(String)"},
108
+ ],
109
+ },
110
+ {
111
+ "name": "tinybird.datasources_ops_log",
112
+ "description": "Contains all operations performed to your data sources. Tinybird tracks the following operations: create, append, append, append-hfi, append-kafka, replace, delete, truncate, rename, populateview-queued, populateview, copy, alter",
113
+ "dateColumn": "timestamp",
114
+ "engine": {
115
+ "engine": "MergeTree",
116
+ "sorting_key": "datasource_id, timestamp",
117
+ "partition_key": "toYYYYMM(timestamp)",
118
+ },
119
+ "columns": [
120
+ {"name": "timestamp", "type": "DateTime"},
121
+ {"name": "event_type", "type": "String"},
122
+ {"name": "datasource_id", "type": "String"},
123
+ {"name": "datasource_name", "type": "String"},
124
+ {"name": "result", "type": "String"},
125
+ {"name": "elapsed_time", "type": "Float32"},
126
+ {"name": "error", "type": "Nullable(String)"},
127
+ {"name": "request_id", "type": "String"},
128
+ {"name": "import_id", "type": "Nullable(String)"},
129
+ {"name": "job_id", "type": "Nullable(String)"},
130
+ {"name": "rows", "type": "Nullable(UInt64)"},
131
+ {"name": "rows_quarantine", "type": "Nullable(UInt64)"},
132
+ {"name": "blocks_ids", "type": "Array(String)"},
133
+ {"name": "operation_id", "type": "String"},
134
+ {"name": "read_rows", "type": "UInt64"},
135
+ {"name": "cpu_time", "type": "Float32"},
136
+ {"name": "memory_usage", "type": "UInt64"},
137
+ {"name": "read_bytes", "type": "UInt64"},
138
+ {"name": "written_rows", "type": "UInt64"},
139
+ {"name": "written_bytes", "type": "UInt64"},
140
+ {"name": "written_rows_quarantine", "type": "UInt64"},
141
+ {"name": "written_bytes_quarantine", "type": "UInt64"},
142
+ {"name": "pipe_id", "type": "String"},
143
+ {"name": "pipe_name", "type": "String"},
144
+ {"name": "release", "type": "String"},
145
+ ],
146
+ },
147
+ {
148
+ "name": "tinybird.datasources_ops_stats",
149
+ "description": "Data from tinybird.datasources_ops_log, aggregated by day.",
150
+ "dateColumn": "event_date",
151
+ "engine": {
152
+ "engine": "SummingMergeTree",
153
+ "sorting_key": "event_date, event_type, pipe_id",
154
+ "partition_key": "toYYYYMM(event_date)",
155
+ },
156
+ "columns": [
157
+ {"name": "event_date", "type": "DateTime"},
158
+ {"name": "workspace_id", "type": "String"},
159
+ {"name": "event_type", "type": "LowCardinality(String)"},
160
+ {"name": "pipe_id", "type": "String"},
161
+ {"name": "pipe_name", "type": "String"},
162
+ {"name": "error_count", "type": "UInt64"},
163
+ {"name": "executions", "type": "UInt64"},
164
+ {"name": "avg_elapsed_time_state", "type": "AggregateFunction(avg, Float32)"},
165
+ {"name": "quantiles_state", "type": "AggregateFunction(quantiles(0.9, 0.95, 0.99), Float64)"},
166
+ {"name": "read_rows", "type": "UInt64"},
167
+ {"name": "read_bytes", "type": "UInt64"},
168
+ {"name": "written_rows", "type": "UInt64"},
169
+ {"name": "written_bytes", "type": "UInt64"},
170
+ {"name": "written_rows_quarantine", "type": "UInt64"},
171
+ {"name": "written_bytes_quarantine", "type": "UInt64"},
172
+ {"name": "cpu_time", "type": "Float64"},
173
+ {"name": "resource_tags", "type": "Array(String)"},
174
+ ],
175
+ },
176
+ {
177
+ "name": "tinybird.endpoint_errors",
178
+ "description": "It provides the last 30 days errors of your published endpoints.",
179
+ "dateColumn": "start_datetime",
180
+ "engine": {
181
+ "engine": "MergeTree",
182
+ "sorting_key": "start_datetime",
183
+ "partition_key": "toYYYYMM(toDate(start_datetime))",
184
+ "ttl": "start_datetime + toIntervalDay(30)",
185
+ },
186
+ "columns": [
187
+ {"name": "start_datetime", "type": "DateTime"},
188
+ {"name": "request_id", "type": "String"},
189
+ {"name": "pipe_id", "type": "String"},
190
+ {"name": "pipe_name", "type": "String"},
191
+ {"name": "params", "type": "Nullable(String)"},
192
+ {"name": "url", "type": "Nullable(String)"},
193
+ {"name": "status_code", "type": "Nullable(Int32)"},
194
+ {"name": "error", "type": "Nullable(String)"},
195
+ ],
196
+ },
197
+ {
198
+ "name": "tinybird.kafka_ops_log",
199
+ "description": "Contains all operations performed to your Kafka Data Sources during the last 30 days.",
200
+ "dateColumn": "timestamp",
201
+ "engine": {
202
+ "engine": "MergeTree",
203
+ "sorting_key": "datasource_id, topic, timestamp",
204
+ "partition_key": "toYYYYMMDD(timestamp)",
205
+ "ttl": "timestamp + toIntervalDay(30)",
206
+ },
207
+ "columns": [
208
+ {"name": "timestamp", "type": "DateTime"},
209
+ {"name": "datasource_id", "type": "String"},
210
+ {"name": "topic", "type": "String"},
211
+ {"name": "partition", "type": "Int16"},
212
+ {"name": "msg_type", "type": "String"},
213
+ {"name": "lag", "type": "Int64"},
214
+ {"name": "processed_messages", "type": "Int32"},
215
+ {"name": "processed_bytes", "type": "Int32"},
216
+ {"name": "committed_messages", "type": "Int32"},
217
+ {"name": "msg", "type": "String"},
218
+ ],
219
+ },
220
+ {
221
+ "name": "tinybird.datasources_storage",
222
+ "description": "Contains stats about your Data Sources storage.",
223
+ "dateColumn": "timestamp",
224
+ "engine": {
225
+ "engine": "AggregatingMergeTree",
226
+ "sorting_key": "datasource_id, timestamp",
227
+ "partition_key": "toYYYYMM(timestamp)",
228
+ },
229
+ "columns": [
230
+ {"name": "datasource_id", "type": "String"},
231
+ {"name": "datasource_name", "type": "String"},
232
+ {"name": "timestamp", "type": "DateTime"},
233
+ {"name": "bytes", "type": "SimpleAggregateFunction(max, UInt64)"},
234
+ {"name": "rows", "type": "SimpleAggregateFunction(max, UInt64)"},
235
+ {"name": "bytes_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
236
+ {"name": "rows_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
237
+ ],
238
+ },
239
+ {
240
+ "name": "tinybird.bi_stats_rt",
241
+ "description": "Contains information about all requests to your BI Connector interface in real time.",
242
+ "dateColumn": "start_datetime",
243
+ "engine": {
244
+ "engine": "MergeTree",
245
+ "sorting_key": "cityHash64(query_normalized), start_datetime",
246
+ "ttl": "start_datetime + toIntervalDay(7)",
247
+ },
248
+ "columns": [
249
+ {"name": "start_datetime", "type": "DateTime"},
250
+ {"name": "query", "type": "String"},
251
+ {"name": "query_normalized", "type": "String"},
252
+ {"name": "error_code", "type": "Int32"},
253
+ {"name": "error", "type": "Nullable(String)"},
254
+ {"name": "url", "type": "String"},
255
+ {"name": "duration", "type": "UInt64"},
256
+ {"name": "read_rows", "type": "UInt64"},
257
+ {"name": "read_bytes", "type": "UInt64"},
258
+ {"name": "result_rows", "type": "UInt64"},
259
+ {"name": "result_bytes", "type": "UInt64"},
260
+ ],
261
+ },
262
+ {
263
+ "name": "tinybird.bi_stats",
264
+ "description": "Aggregates the stats in tinybird.bi_stats_rt by day.",
265
+ "dateColumn": "date",
266
+ "engine": {"engine": "MergeTree", "sorting_key": "cityHash64(query_normalized), start_datetime"},
267
+ "columns": [
268
+ {"name": "date", "type": "Date"},
269
+ {"name": "query_normalized", "type": "String"},
270
+ {"name": "view_count", "type": "UInt64"},
271
+ {"name": "error_count", "type": "UInt64"},
272
+ {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
273
+ {
274
+ "name": "quantile_timing_state",
275
+ "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
276
+ },
277
+ {"name": "read_bytes_sum", "type": "UInt64"},
278
+ {"name": "read_rows_sum", "type": "UInt64"},
279
+ {"name": "avg_result_rows_state", "type": "AggregateFunction(avg, Float32)"},
280
+ {"name": "avg_result_bytes_state", "type": "AggregateFunction(avg, Float32)"},
281
+ ],
282
+ },
283
+ {
284
+ "name": "tinybird.sinks_ops_log",
285
+ "description": "Contains information about your Sink pipes.",
286
+ "dateColumn": "timestamp",
287
+ "engine": {
288
+ "engine": "MergeTree",
289
+ "sorting_key": "pipe_id, timestamp",
290
+ "partition_key": "toYYYYMM(timestamp)",
291
+ },
292
+ "columns": [
293
+ {"name": "timestamp", "type": "DateTime"},
294
+ {"name": "job_id", "type": "Nullable(String)"},
295
+ {"name": "service", "type": "LowCardinality(String)"},
296
+ {"name": "pipe_name", "type": "String"},
297
+ {"name": "pipe_id", "type": "String"},
298
+ {"name": "result", "type": "LowCardinality(String)"},
299
+ {"name": "error", "type": "Nullable(String)"},
300
+ {"name": "elapsed_time", "type": "Float64"},
301
+ {"name": "read_rows", "type": "UInt64"},
302
+ {"name": "written_rows", "type": "UInt64"},
303
+ {"name": "read_bytes", "type": "UInt64"},
304
+ {"name": "written_bytes", "type": "UInt64"},
305
+ {"name": "cpu_time", "type": "Float32"},
306
+ {"name": "output", "type": "UInt64"},
307
+ {"name": "parameters", "type": "Map(String, String)"},
308
+ {"name": "options", "type": "Map(String, String)"},
309
+ {"name": "token_name", "type": "String"},
310
+ ],
311
+ },
312
+ {
313
+ "name": "tinybird.releases_log",
314
+ "description": "Contains operations performed to your releases.",
315
+ "dateColumn": "timestamp",
316
+ "engine": {
317
+ "engine": "MergeTree",
318
+ "sorting_key": "semver, timestamp",
319
+ "partition_key": "toYYYYMM(timestamp)",
320
+ },
321
+ "columns": [
322
+ {"name": "timestamp", "type": "DateTime64(3)"},
323
+ {"name": "event_type", "type": "LowCardinality(String)"},
324
+ {"name": "commit", "type": "String"},
325
+ {"name": "semver", "type": "String"},
326
+ {"name": "token", "type": "String"},
327
+ {"name": "token_name", "type": "String"},
328
+ {"name": "result", "type": "LowCardinality(String)"},
329
+ {"name": "error", "type": "String"},
330
+ ],
331
+ },
332
+ {
333
+ "name": "tinybird.data_transfer",
334
+ "description": "Stats of data transferred per hour by a workspace.",
335
+ "dateColumn": "timestamp",
336
+ "engine": {},
337
+ "columns": [
338
+ {"name": "timestamp", "type": "DateTime"},
339
+ {"name": "event_type", "type": "LowCardinality(String)"},
340
+ {"name": "origin_provider", "type": "LowCardinality(String)"},
341
+ {"name": "origin_region", "type": "LowCardinality(String)"},
342
+ {"name": "destination_provider", "type": "LowCardinality(String)"},
343
+ {"name": "destination_region", "type": "LowCardinality(String)"},
344
+ {"name": "kind", "type": "LowCardinality(String)"},
345
+ {"name": "bytes", "type": "UInt64"},
346
+ ],
347
+ },
348
+ {
349
+ "name": "tinybird.jobs_log",
350
+ "description": "Contains all job executions performed in your workspace.",
351
+ "dateColumn": "created_at",
352
+ "engine": {
353
+ "engine": "ReplacingMergeTree",
354
+ "sorting_key": "created_at, job_id",
355
+ "partition_key": "toYYYYMM(created_at)",
356
+ },
357
+ "columns": [
358
+ {"name": "job_id", "type": "String"},
359
+ {"name": "job_type", "type": "LowCardinality(String)"},
360
+ {"name": "status", "type": "LowCardinality(String)"},
361
+ {"name": "error", "type": "Nullable(String)"},
362
+ {"name": "workspace_id", "type": "String"},
363
+ {"name": "request_id", "type": "Nullable(String)"},
364
+ {"name": "pipe_id", "type": "Nullable(String)"},
365
+ {"name": "datasource_id", "type": "Nullable(String)"},
366
+ {"name": "created_at", "type": "DateTime64(3)"},
367
+ {"name": "started_at", "type": "Nullable(DateTime64(3))"},
368
+ {"name": "updated_at", "type": "DateTime64(3)"},
369
+ {"name": "job_metadata", "type": "String"},
370
+ ],
371
+ },
372
+ {
373
+ "name": "tinybird.hook_log",
374
+ "description": "Log of hook executions and their results.",
375
+ "dateColumn": "timestamp",
376
+ "engine": {
377
+ "engine": "ReplacingMergeTree",
378
+ "sorting_key": "timestamp, cityHash64(datasource_name)",
379
+ "partition_key": "toYear(timestamp)",
380
+ },
381
+ "columns": [
382
+ {"name": "timestamp", "type": "DateTime"},
383
+ {"name": "request_id", "type": "String"},
384
+ {"name": "import_id", "type": "Nullable(String)"},
385
+ {"name": "job_id", "type": "Nullable(String)"},
386
+ {"name": "source", "type": "String"},
387
+ {"name": "hook_id", "type": "String"},
388
+ {"name": "name", "type": "String"},
389
+ {"name": "operation", "type": "String"},
390
+ {"name": "status", "type": "String"},
391
+ {"name": "datasource_id", "type": "String"},
392
+ {"name": "datasource_name", "type": "String"},
393
+ {"name": "processing_time", "type": "Nullable(Float32)"},
394
+ {"name": "processing_error", "type": "Nullable(String)"},
395
+ ],
396
+ },
397
+ {
398
+ "name": "tinybird.data_guess",
399
+ "description": "Guesses the type of data in a datasource for a user.",
400
+ "dateColumn": "timestamp",
401
+ "engine": {
402
+ "engine": "ReplacingMergeTree",
403
+ "sorting_key": "user_id, datasource_id, timestamp",
404
+ "partition_key": "toYYYYMMDD(timestamp)",
405
+ },
406
+ "columns": [
407
+ {"name": "user_id", "type": "LowCardinality(String)"},
408
+ {"name": "datasource_id", "type": "LowCardinality(String)"},
409
+ {"name": "timestamp", "type": "DateTime"},
410
+ {"name": "path", "type": "LowCardinality(String)"},
411
+ {"name": "type", "type": "LowCardinality(String)"},
412
+ {"name": "num", "type": "Float64"},
413
+ {"name": "str", "type": "String"},
414
+ ],
415
+ },
416
+ {
417
+ "name": "tinybird.estimated_shared_infra_cpu_time",
418
+ "description": "Contains CPU time in seconds for all your operations in the workspace during a natural minute.",
419
+ "dateColumn": "minute",
420
+ "engine": {
421
+ "engine": "AggregatingMergeTree",
422
+ "sorting_key": "minute_slot, workspace_id",
423
+ "partition_key": "toYYYYMM(minute)",
424
+ },
425
+ "columns": [
426
+ {"name": "minute", "type": "DateTime"},
427
+ {"name": "workspace_id", "type": "String"},
428
+ {"name": "total_cpu_time_seconds", "type": "Float64"},
429
+ ],
430
+ },
431
+ ]
432
+
433
+
434
+ def get_organization_service_datasources() -> List[Dict[str, Any]]:
435
+ """
436
+ Get all Organization-specific service datasources.
437
+
438
+ Returns:
439
+ List[Dict[str, Any]]: A list of Organization service datasources.
440
+ """
441
+ return [
442
+ {
443
+ "name": "organization.workspaces",
444
+ "description": "Lists all Organization Workspaces and related information (name, IDs, databases, plan, when it was created, and whether it was soft-deleted).",
445
+ "dateColumn": "timestamp",
446
+ "engine": {"engine": "ReplacingMergeTree", "sorting_key": "workspace_id", "partition_key": "tuple()"},
447
+ "columns": [
448
+ {"name": "workspace_id", "type": "String"},
449
+ {"name": "name", "type": "String"},
450
+ {"name": "database", "type": "String"},
451
+ {"name": "plan", "type": "String"},
452
+ {"name": "created_at", "type": "DateTime"},
453
+ {"name": "deleted_at", "type": "Nullable(DateTime)"},
454
+ ],
455
+ },
456
+ {
457
+ "name": "organization.processed_data",
458
+ "description": "Information related to all processed data per day per workspace.",
459
+ "dateColumn": "date",
460
+ "engine": {
461
+ "engine": "SummingMergeTree",
462
+ "sorting_key": "database, date",
463
+ "partition_key": "toYYYYMM(date)",
464
+ },
465
+ "columns": [
466
+ {"name": "date", "type": "Date"},
467
+ {"name": "database", "type": "String"},
468
+ {"name": "read_bytes", "type": "UInt64"},
469
+ {"name": "written_bytes", "type": "UInt64"},
470
+ ],
471
+ },
472
+ {
473
+ "name": "organization.datasources_storage",
474
+ "description": "Similar to tinybird.datasources_storage but with data for all Organization Workspaces.",
475
+ "dateColumn": "timestamp",
476
+ "engine": {
477
+ "engine": "AggregatingMergeTree",
478
+ "sorting_key": "workspace_id, datasource_id, timestamp",
479
+ "partition_key": "toYYYYMM(timestamp)",
480
+ },
481
+ "columns": [
482
+ {"name": "workspace_id", "type": "String"},
483
+ {"name": "datasource_id", "type": "String"},
484
+ {"name": "datasource_name", "type": "String"},
485
+ {"name": "timestamp", "type": "DateTime"},
486
+ {"name": "bytes", "type": "SimpleAggregateFunction(max, UInt64)"},
487
+ {"name": "rows", "type": "SimpleAggregateFunction(max, UInt64)"},
488
+ {"name": "bytes_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
489
+ {"name": "rows_quarantine", "type": "SimpleAggregateFunction(max, UInt64)"},
490
+ ],
491
+ },
492
+ {
493
+ "name": "organization.datasources_ops_log",
494
+ "description": "Similar to tinybird.datasources_ops_log but with data for all Organization Workspaces.",
495
+ "dateColumn": "timestamp",
496
+ "engine": {
497
+ "engine": "ReplacingMergeTree",
498
+ "sorting_key": "workspace_id, datasource_id, timestamp",
499
+ "partition_key": "tuple()",
500
+ },
501
+ "columns": [
502
+ {"name": "workspace_id", "type": "String"},
503
+ {"name": "timestamp", "type": "DateTime"},
504
+ {"name": "event_type", "type": "String"},
505
+ {"name": "datasource_id", "type": "String"},
506
+ {"name": "datasource_name", "type": "String"},
507
+ {"name": "result", "type": "String"},
508
+ {"name": "elapsed_time", "type": "Float32"},
509
+ {"name": "error", "type": "Nullable(String)"},
510
+ {"name": "request_id", "type": "String"},
511
+ {"name": "import_id", "type": "Nullable(String)"},
512
+ {"name": "job_id", "type": "Nullable(String)"},
513
+ {"name": "rows", "type": "Nullable(UInt64)"},
514
+ {"name": "rows_quarantine", "type": "Nullable(UInt64)"},
515
+ {"name": "blocks_ids", "type": "Array(String)"},
516
+ {"name": "operation_id", "type": "String"},
517
+ {"name": "read_rows", "type": "UInt64"},
518
+ {"name": "cpu_time", "type": "Float32"},
519
+ {"name": "memory_usage", "type": "UInt64"},
520
+ {"name": "read_bytes", "type": "UInt64"},
521
+ {"name": "written_rows", "type": "UInt64"},
522
+ {"name": "written_bytes", "type": "UInt64"},
523
+ {"name": "written_rows_quarantine", "type": "UInt64"},
524
+ {"name": "written_bytes_quarantine", "type": "UInt64"},
525
+ {"name": "pipe_id", "type": "String"},
526
+ {"name": "pipe_name", "type": "String"},
527
+ {"name": "release", "type": "String"},
528
+ ],
529
+ },
530
+ {
531
+ "name": "organization.datasources_ops_stats",
532
+ "description": "Similar to tinybird.datasources_ops_stats but with data for all Organization Workspaces.",
533
+ "dateColumn": "event_date",
534
+ "engine": {
535
+ "engine": "SummingMergeTree",
536
+ "sorting_key": "event_date, workspace_id, event_type, pipe_id",
537
+ "partition_key": "toYYYYMM(event_date)",
538
+ },
539
+ "columns": [
540
+ {"name": "event_date", "type": "DateTime"},
541
+ {"name": "workspace_id", "type": "String"},
542
+ {"name": "event_type", "type": "LowCardinality(String)"},
543
+ {"name": "pipe_id", "type": "String"},
544
+ {"name": "pipe_name", "type": "String"},
545
+ {"name": "error_count", "type": "UInt64"},
546
+ {"name": "executions", "type": "UInt64"},
547
+ {"name": "avg_elapsed_time_state", "type": "AggregateFunction(avg, Float32)"},
548
+ {"name": "quantiles_state", "type": "AggregateFunction(quantiles(0.9, 0.95, 0.99), Float64)"},
549
+ {"name": "read_rows", "type": "UInt64"},
550
+ {"name": "read_bytes", "type": "UInt64"},
551
+ {"name": "written_rows", "type": "UInt64"},
552
+ {"name": "written_bytes", "type": "UInt64"},
553
+ {"name": "written_rows_quarantine", "type": "UInt64"},
554
+ {"name": "written_bytes_quarantine", "type": "UInt64"},
555
+ ],
556
+ },
557
+ {
558
+ "name": "organization.pipe_stats",
559
+ "description": "Similar to tinybird.pipe_stats but with data for all Organization Workspaces.",
560
+ "dateColumn": "date",
561
+ "engine": {
562
+ "engine": "SummingMergeTree",
563
+ "sorting_key": "workspace_id, pipe_id, date",
564
+ "partition_key": "toYYYYMM(date)",
565
+ },
566
+ "columns": [
567
+ {"name": "workspace_id", "type": "String"},
568
+ {"name": "date", "type": "Date"},
569
+ {"name": "pipe_id", "type": "String"},
570
+ {"name": "pipe_name", "type": "String"},
571
+ {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
572
+ {"name": "error_count", "type": "UInt64"},
573
+ {"name": "view_count", "type": "UInt64"},
574
+ {
575
+ "name": "quantile_timing_state",
576
+ "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
577
+ },
578
+ {"name": "read_bytes_sum", "type": "UInt64"},
579
+ {"name": "read_rows_sum", "type": "UInt64"},
580
+ {"name": "cpu_time_sum", "type": "Float64"},
581
+ {"name": "resource_tags", "type": "Array(String)"},
582
+ ],
583
+ },
584
+ {
585
+ "name": "organization.pipe_stats_rt",
586
+ "description": "Similar to tinybird.pipe_stats_rt but with data for all Organization Workspaces.",
587
+ "dateColumn": "start_datetime",
588
+ "engine": {
589
+ "engine": "MergeTree",
590
+ "sorting_key": "workspace_id, pipe_id, start_datetime",
591
+ "ttl": "start_datetime + toIntervalDay(7)",
592
+ },
593
+ "columns": [
594
+ {"name": "workspace_id", "type": "String"},
595
+ {"name": "start_datetime", "type": "DateTime"},
596
+ {"name": "pipe_id", "type": "String"},
597
+ {"name": "pipe_name", "type": "String"},
598
+ {"name": "token", "type": "String"},
599
+ {"name": "token_name", "type": "String"},
600
+ {"name": "duration", "type": "Float32"},
601
+ {"name": "read_bytes", "type": "UInt64"},
602
+ {"name": "read_rows", "type": "UInt64"},
603
+ {"name": "cpu_time", "type": "Float32"},
604
+ {"name": "url", "type": "Nullable(String)"},
605
+ {"name": "error", "type": "UInt8"},
606
+ {"name": "status_code", "type": "Int32"},
607
+ {"name": "request_id", "type": "String"},
608
+ {"name": "parameters", "type": "Map(String, String)"},
609
+ {"name": "method", "type": "String"},
610
+ {"name": "release", "type": "String"},
611
+ {"name": "user_agent", "type": "Nullable(String)"},
612
+ {"name": "resource_tags", "type": "Array(String)"},
613
+ ],
614
+ },
615
+ {
616
+ "name": "organization.data_transfer",
617
+ "description": "Similar to tinybird.data_transfer but with data for all Organization Workspaces.",
618
+ "dateColumn": "timestamp",
619
+ "engine": {},
620
+ "columns": [
621
+ {"name": "workspace_id", "type": "String"},
622
+ {"name": "timestamp", "type": "DateTime"},
623
+ {"name": "event_type", "type": "LowCardinality(String)"},
624
+ {"name": "origin_provider", "type": "LowCardinality(String)"},
625
+ {"name": "origin_region", "type": "LowCardinality(String)"},
626
+ {"name": "destination_provider", "type": "LowCardinality(String)"},
627
+ {"name": "destination_region", "type": "LowCardinality(String)"},
628
+ {"name": "kind", "type": "LowCardinality(String)"},
629
+ {"name": "bytes", "type": "UInt64"},
630
+ ],
631
+ },
632
+ {
633
+ "name": "organization.jobs_log",
634
+ "description": "Historic Logs for all kinds of job executions across the organization",
635
+ "dateColumn": "created_at",
636
+ "engine": {
637
+ "engine": "ReplacingMergeTree",
638
+ "sorting_key": "workspace_id, created_at, job_id",
639
+ "partition_key": "toYYYYMM(created_at)",
640
+ },
641
+ "columns": [
642
+ {"name": "job_id", "type": "String"},
643
+ {"name": "job_type", "type": "LowCardinality(String)"},
644
+ {"name": "status", "type": "LowCardinality(String)"},
645
+ {"name": "error", "type": "Nullable(String)"},
646
+ {"name": "workspace_id", "type": "String"},
647
+ {"name": "request_id", "type": "Nullable(String)"},
648
+ {"name": "pipe_id", "type": "Nullable(String)"},
649
+ {"name": "datasource_id", "type": "Nullable(String)"},
650
+ {"name": "created_at", "type": "DateTime64(3)"},
651
+ {"name": "started_at", "type": "Nullable(DateTime64(3))"},
652
+ {"name": "updated_at", "type": "DateTime64(3)"},
653
+ {"name": "job_metadata", "type": "String"},
654
+ ],
655
+ },
656
+ {
657
+ "name": "organization.sinks_ops_log",
658
+ "description": "Historic Logs for all Sink job executions across the organization",
659
+ "dateColumn": "timestamp",
660
+ "engine": {
661
+ "engine": "ReplacingMergeTree",
662
+ "sorting_key": "workspace_id, pipe_id, timestamp",
663
+ "partition_key": "toYYYYMM(timestamp)",
664
+ },
665
+ "columns": [
666
+ {"name": "timestamp", "type": "DateTime"},
667
+ {"name": "workspace_id", "type": "String"},
668
+ {"name": "service", "type": "LowCardinality(String)"},
669
+ {"name": "pipe_id", "type": "String"},
670
+ {"name": "pipe_name", "type": "String"},
671
+ {"name": "result", "type": "LowCardinality(String)"},
672
+ {"name": "error", "type": "Nullable(String)"},
673
+ {"name": "elapsed_time", "type": "Float64"},
674
+ {"name": "job_id", "type": "Nullable(String)"},
675
+ {"name": "read_rows", "type": "UInt64"},
676
+ {"name": "written_rows", "type": "UInt64"},
677
+ {"name": "read_bytes", "type": "UInt64"},
678
+ {"name": "written_bytes", "type": "UInt64"},
679
+ {"name": "cpu_time", "type": "Float32"},
680
+ {"name": "output", "type": "Array(String)"},
681
+ {"name": "parameters", "type": "Map(String, String)"},
682
+ {"name": "options", "type": "Map(String, String)"},
683
+ {"name": "token_name", "type": "String"},
684
+ ],
685
+ },
686
+ {
687
+ "name": "organization.bi_stats_rt",
688
+ "description": "Contains information about all requests to the BI Connector interface for the whole Organization in real time.",
689
+ "dateColumn": "start_datetime",
690
+ "engine": {
691
+ "engine": "MergeTree",
692
+ "sorting_key": "cityHash64(query_normalized), start_datetime",
693
+ "ttl": "start_datetime + toIntervalDay(7)",
694
+ },
695
+ "columns": [
696
+ {"name": "database", "type": "String"},
697
+ {"name": "start_datetime", "type": "DateTime"},
698
+ {"name": "query", "type": "String"},
699
+ {"name": "query_normalized", "type": "String"},
700
+ {"name": "error_code", "type": "Int32"},
701
+ {"name": "error", "type": "Nullable(String)"},
702
+ {"name": "url", "type": "String"},
703
+ {"name": "duration", "type": "UInt64"},
704
+ {"name": "read_rows", "type": "UInt64"},
705
+ {"name": "read_bytes", "type": "UInt64"},
706
+ {"name": "result_rows", "type": "UInt64"},
707
+ {"name": "result_bytes", "type": "UInt64"},
708
+ ],
709
+ },
710
+ {
711
+ "name": "organization.bi_stats",
712
+ "description": "Aggregates the stats in organization.bi_stats_rt by day.",
713
+ "dateColumn": "date",
714
+ "engine": {"engine": "MergeTree", "sorting_key": "database, cityHash64(query_normalized), date"},
715
+ "columns": [
716
+ {"name": "database", "type": "String"},
717
+ {"name": "date", "type": "Date"},
718
+ {"name": "query_normalized", "type": "String"},
719
+ {"name": "view_count", "type": "UInt64"},
720
+ {"name": "error_count", "type": "UInt64"},
721
+ {"name": "avg_duration_state", "type": "AggregateFunction(avg, Float32)"},
722
+ {
723
+ "name": "quantile_timing_state",
724
+ "type": "AggregateFunction(quantilesTiming(0.9, 0.95, 0.99), Float64)",
725
+ },
726
+ {"name": "read_bytes_sum", "type": "UInt64"},
727
+ {"name": "read_rows_sum", "type": "UInt64"},
728
+ {"name": "avg_result_rows_state", "type": "AggregateFunction(avg, Float32)"},
729
+ {"name": "avg_result_bytes_state", "type": "AggregateFunction(avg, Float32)"},
730
+ ],
731
+ },
732
+ {
733
+ "name": "organization.metrics_logs",
734
+ "description": "Metrics of your organization's dedicated clusters",
735
+ "dateColumn": "timestamp",
736
+ "engine": {
737
+ "engine": "MergeTree",
738
+ "sorting_key": "timestamp",
739
+ "ttl": "toDate(timestamp) + toIntervalDay(30)",
740
+ },
741
+ "columns": [
742
+ {"name": "timestamp", "type": "DateTime"},
743
+ {"name": "cluster", "type": "LowCardinality(String)"},
744
+ {"name": "host", "type": "LowCardinality(String)"},
745
+ {"name": "metric", "type": "LowCardinality(String)"},
746
+ {"name": "value", "type": "String"},
747
+ {"name": "description", "type": "LowCardinality(String)"},
748
+ ],
749
+ },
750
+ {
751
+ "name": "organization.kafka_ops_log",
752
+ "description": "Contains all operations performed to your Kafka Data Sources during the last 30 days accross the organization.",
753
+ "dateColumn": "timestamp",
754
+ "engine": {
755
+ "engine": "MergeTree",
756
+ "sorting_key": "workspace_id, datasource_id, topic, timestamp",
757
+ "partition_key": "toYYYYMMDD(timestamp)",
758
+ "ttl": "timestamp + toIntervalDay(30)",
759
+ },
760
+ "columns": [
761
+ {"name": "timestamp", "type": "DateTime"},
762
+ {"name": "workspace_id", "type": "String"},
763
+ {"name": "datasource_id", "type": "String"},
764
+ {"name": "topic", "type": "String"},
765
+ {"name": "partition", "type": "Int16"},
766
+ {"name": "msg_type", "type": "String"},
767
+ {"name": "lag", "type": "Int64"},
768
+ {"name": "processed_messages", "type": "Int32"},
769
+ {"name": "processed_bytes", "type": "Int32"},
770
+ {"name": "committed_messages", "type": "Int32"},
771
+ {"name": "msg", "type": "String"},
772
+ ],
773
+ },
774
+ {
775
+ "name": "organization.endpoint_errors",
776
+ "description": "Similar to tinybird.endpoint_errors but with data for all Organization Workspaces.",
777
+ "dateColumn": "start_datetime",
778
+ "engine": {
779
+ "engine": "MergeTree",
780
+ "sorting_key": "start_datetime",
781
+ "partition_key": "toYYYYMM(toDate(start_datetime))",
782
+ "ttl": "start_datetime + toIntervalDay(30)",
783
+ },
784
+ "columns": [
785
+ {"name": "workspace_id", "type": "String"},
786
+ {"name": "start_datetime", "type": "DateTime"},
787
+ {"name": "request_id", "type": "String"},
788
+ {"name": "pipe_id", "type": "String"},
789
+ {"name": "pipe_name", "type": "String"},
790
+ {"name": "params", "type": "Nullable(String)"},
791
+ {"name": "url", "type": "Nullable(String)"},
792
+ {"name": "status_code", "type": "Nullable(Int32)"},
793
+ {"name": "error", "type": "Nullable(String)"},
794
+ ],
795
+ },
796
+ {
797
+ "name": "organization.shared_infra_active_minutes",
798
+ "description": "Contains information about vCPU active minutes consumption aggregated by minute for all Organization workspaces. Only available for Developer and Enterprise plans in shared infrastructure.",
799
+ "dateColumn": "minute",
800
+ "columns": [
801
+ {"name": "minute", "type": "DateTime"},
802
+ {"name": "organization_id", "type": "String"},
803
+ {"name": "organization_name", "type": "String"},
804
+ {"name": "total_cpu_time_in_seconds", "type": "Float64"},
805
+ {"name": "vcpus", "type": "SimpleAggregateFunction(max, Float64)"},
806
+ {"name": "active_minutes", "type": "Float64"},
807
+ ],
808
+ },
809
+ {
810
+ "name": "organization.shared_infra_qps_overages",
811
+ "description": "Contains information about QPS consumption and overages aggregated by second for all Organization workspaces. Only available for Developer and Enterprise plans in shared infrastructure.",
812
+ "dateColumn": "start_datetime",
813
+ "columns": [
814
+ {"name": "start_datetime", "type": "DateTime"},
815
+ {"name": "organization_id", "type": "String"},
816
+ {"name": "organization_name", "type": "String"},
817
+ {"name": "plan_qps", "type": "SimpleAggregateFunction(max, Int64)"},
818
+ {"name": "total_qps", "type": "UInt64"},
819
+ {"name": "overage", "type": "Int64"},
820
+ ],
821
+ },
822
+ {
823
+ "name": "organization.pipe_metrics_by_minute",
824
+ "description": (
825
+ "Contains information about organization.pipe_stats_rt metrics aggregated by minute, "
826
+ "pipe_name and workspace_id for all Organization workspaces. Join with organization.workspaces "
827
+ "to get the workspace name."
828
+ ),
829
+ "dateColumn": "minute_interval",
830
+ "columns": [
831
+ {"name": "minute_interval", "type": "DateTime"},
832
+ {"name": "pipe_name", "type": "String"},
833
+ {"name": "workspace_id", "type": "String"},
834
+ {"name": "request_count", "type": "UInt64"},
835
+ {"name": "avg_duration", "type": "Float64"},
836
+ {"name": "max_duration", "type": "Float64"},
837
+ {"name": "quantiles_05_09_099_duration", "type": "Array(Float64)"},
838
+ {"name": "avg_cpu_time", "type": "Float64"},
839
+ {"name": "max_cpu_time", "type": "Float64"},
840
+ {"name": "quantiles_05_09_099_cpu_time", "type": "Array(Float64)"},
841
+ {"name": "avg_memory_usage", "type": "Float64"},
842
+ {"name": "max_memory_usage", "type": "Float64"},
843
+ {"name": "quantiles_05_09_099_memory_usage", "type": "Array(Float64)"},
844
+ {"name": "total_read_rows", "type": "UInt64"},
845
+ {"name": "total_read_bytes", "type": "UInt64"},
846
+ {"name": "total_result_rows", "type": "UInt64"},
847
+ {"name": "error_count", "type": "UInt64"},
848
+ {"name": "timeout_count", "type": "UInt64"},
849
+ {"name": "rate_limit_count", "type": "UInt64"},
850
+ {"name": "error_500_count", "type": "UInt64"},
851
+ ],
852
+ },
853
+ {
854
+ "name": "organization.datasource_metrics_by_minute",
855
+ "description": (
856
+ "Contains information about organization.datasources_ops_log metrics aggregated by minute, "
857
+ "datasource_name, event_type, pipe_name and workspace_id for all Organization workspaces. "
858
+ "Join with organization.workspaces to get the workspace name."
859
+ ),
860
+ "dateColumn": "minute_interval",
861
+ "columns": [
862
+ {"name": "minute_interval", "type": "DateTime"},
863
+ {"name": "workspace_id", "type": "String"},
864
+ {"name": "datasource_name", "type": "String"},
865
+ {"name": "event_type", "type": "String"},
866
+ {"name": "pipe_name", "type": "String"},
867
+ {"name": "request_count", "type": "UInt64"},
868
+ {"name": "error_count", "type": "UInt64"},
869
+ {"name": "avg_elapsed_time", "type": "Float64"},
870
+ {"name": "max_elapsed_time", "type": "Float64"},
871
+ {"name": "quantiles_05_09_099_elapsed_time", "type": "Array(Float64)"},
872
+ {"name": "avg_cpu_time", "type": "Float64"},
873
+ {"name": "max_cpu_time", "type": "Float64"},
874
+ {"name": "quantiles_05_09_099_cpu_time", "type": "Array(Float64)"},
875
+ {"name": "avg_memory_usage", "type": "Float64"},
876
+ {"name": "max_memory_usage", "type": "Float64"},
877
+ {"name": "quantiles_05_09_099_memory_usage", "type": "Array(Float64)"},
878
+ {"name": "total_read_rows", "type": "UInt64"},
879
+ {"name": "total_read_bytes", "type": "UInt64"},
880
+ {"name": "total_written_rows", "type": "UInt64"},
881
+ {"name": "total_written_bytes", "type": "UInt64"},
882
+ ],
883
+ },
884
+ ]
885
+
886
+
887
+ def get_service_datasources() -> List[Dict[str, Any]]:
888
+ """
889
+ Get the list of all Tinybird and Organization service datasources.
890
+
891
+ Returns:
892
+ List[Dict[str, Any]]: A combined list of all service datasource definitions.
893
+ """
894
+ return get_tinybird_service_datasources() + get_organization_service_datasources()
895
+
896
+
897
+ def get_service_datasource_by_name(name: str) -> Optional[Dict[str, Any]]:
898
+ """
899
+ Get a specific service datasource by name. Name should include the type (e.g. tinybird.datasources_ops_log)
900
+
901
+ Args:
902
+ name: The name of the service datasource to retrieve.
903
+
904
+ Returns:
905
+ Optional[Dict[str, Any]]: The service datasource definition or None if not found.
906
+ """
907
+ service_datasources = get_service_datasources()
908
+ for ds in service_datasources:
909
+ if ds["name"] == name:
910
+ return ds
911
+ return None