tinybird 0.0.1.dev17__py3-none-any.whl → 0.0.1.dev18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinybird/tb/modules/build.py +18 -10
- tinybird/tb/modules/build_shell.py +205 -28
- tinybird/tb/modules/cicd.py +9 -89
- tinybird/tb/modules/common.py +1 -108
- tinybird/tb/modules/create.py +2 -6
- tinybird/tb/modules/datafile/common.py +223 -247
- tinybird/tb/modules/datafile/parse_datasource.py +8 -0
- tinybird/tb/modules/datafile/parse_pipe.py +10 -1
- tinybird/tb/modules/llm.py +4 -3
- tinybird/tb/modules/local_common.py +1 -1
- tinybird/tb/modules/login.py +1 -1
- tinybird/tb/modules/mock.py +14 -12
- tinybird/tb/modules/test.py +90 -17
- {tinybird-0.0.1.dev17.dist-info → tinybird-0.0.1.dev18.dist-info}/METADATA +2 -1
- {tinybird-0.0.1.dev17.dist-info → tinybird-0.0.1.dev18.dist-info}/RECORD +18 -18
- {tinybird-0.0.1.dev17.dist-info → tinybird-0.0.1.dev18.dist-info}/WHEEL +0 -0
- {tinybird-0.0.1.dev17.dist-info → tinybird-0.0.1.dev18.dist-info}/entry_points.txt +0 -0
- {tinybird-0.0.1.dev17.dist-info → tinybird-0.0.1.dev18.dist-info}/top_level.txt +0 -0
|
@@ -33,13 +33,42 @@ class DatafileSyntaxError(Exception):
|
|
|
33
33
|
def __init__(self, message: str, lineno: int, pos: int, hint: Optional[str] = None):
|
|
34
34
|
super().__init__(message)
|
|
35
35
|
self.message = message
|
|
36
|
+
self.context = None
|
|
36
37
|
self.hint = hint
|
|
37
38
|
self.lineno = lineno
|
|
38
39
|
self.pos = pos
|
|
39
40
|
|
|
41
|
+
def add_context(self, context: str):
|
|
42
|
+
self.context = context
|
|
43
|
+
|
|
44
|
+
def get_context_from_file_contents(self, s: str) -> None:
|
|
45
|
+
lines = s.splitlines()
|
|
46
|
+
|
|
47
|
+
start_line = max(0, self.lineno - 3) # 2 lines before
|
|
48
|
+
end_line = self.lineno # Only context before the error
|
|
49
|
+
|
|
50
|
+
# Calculate padding needed for line numbers
|
|
51
|
+
max_line_digits = len(str(end_line))
|
|
52
|
+
|
|
53
|
+
context = []
|
|
54
|
+
for i in range(start_line, end_line):
|
|
55
|
+
line_num = str(i + 1).rjust(max_line_digits)
|
|
56
|
+
line = lines[i].rstrip()
|
|
57
|
+
context.append(f"{line_num}: {line}")
|
|
58
|
+
|
|
59
|
+
# Add pointer line if this is the error line
|
|
60
|
+
if i + 1 == self.lineno:
|
|
61
|
+
pointer = " " * (max_line_digits + 2 + self.pos - 1) + "^"
|
|
62
|
+
context.append(pointer)
|
|
63
|
+
|
|
64
|
+
error_context = "\n".join(context)
|
|
65
|
+
self.add_context(error_context)
|
|
66
|
+
|
|
40
67
|
def __str__(self) -> str:
|
|
41
|
-
|
|
42
|
-
|
|
68
|
+
output = f"{self.message}"
|
|
69
|
+
output += f"\n\n{self.context}" if self.context else f" at {self.lineno}:{self.pos}."
|
|
70
|
+
output += f"\n{self.hint}." if self.hint else ""
|
|
71
|
+
return output
|
|
43
72
|
|
|
44
73
|
|
|
45
74
|
class SchemaSyntaxError(DatafileSyntaxError):
|
|
@@ -242,15 +271,21 @@ def parse_indexes_structure(indexes: Optional[List[str]]) -> List[TableIndex]:
|
|
|
242
271
|
>>> parse_indexes_structure(["index_name u64 * length(s)"])
|
|
243
272
|
Traceback (most recent call last):
|
|
244
273
|
...
|
|
245
|
-
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
|
|
274
|
+
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
|
|
275
|
+
Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
|
|
276
|
+
|
|
246
277
|
>>> parse_indexes_structure(["index_name a TYPE set(100) GRANULARITY 100, index_name_bf mapValues(d) TYPE bloom_filter(0.001) GRANULARITY 16"])
|
|
247
278
|
Traceback (most recent call last):
|
|
248
279
|
...
|
|
249
|
-
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
|
|
280
|
+
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
|
|
281
|
+
Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
|
|
282
|
+
|
|
250
283
|
>>> parse_indexes_structure(["", " ", " wrong_index_syntax,"])
|
|
251
284
|
Traceback (most recent call last):
|
|
252
285
|
...
|
|
253
|
-
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 3:6.
|
|
286
|
+
tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 3:6.
|
|
287
|
+
Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
|
|
288
|
+
|
|
254
289
|
>>> parse_indexes_structure(["my_index m['key'] TYPE ngrambf_v1(1, 1024, 1, 42) GRANULARITY 1"])
|
|
255
290
|
[TableIndex(name='my_index', expr="m['key']", type_full='ngrambf_v1(1, 1024, 1, 42)', granularity='1')]
|
|
256
291
|
>>> parse_indexes_structure(["my_index_lambda arrayMap(x -> tupleElement(x,'message'), column_name) TYPE ngrambf_v1(1, 1024, 1, 42) GRANULARITY 1"])
|
|
@@ -513,9 +548,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
513
548
|
if c not in valid_chars_name:
|
|
514
549
|
raise SchemaSyntaxError(
|
|
515
550
|
message=f"Column name contains invalid character {repr(c)}",
|
|
516
|
-
hint="
|
|
551
|
+
hint="Hint: use backticks",
|
|
517
552
|
lineno=line,
|
|
518
|
-
pos=
|
|
553
|
+
pos=pos,
|
|
519
554
|
)
|
|
520
555
|
advance_single_char()
|
|
521
556
|
return schema[begin:i]
|
|
@@ -524,7 +559,11 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
524
559
|
advance_single_char()
|
|
525
560
|
return get_backticked()
|
|
526
561
|
|
|
527
|
-
def parse_expr(lookup: Iterable[SyntaxExpr]) -> str:
|
|
562
|
+
def parse_expr(lookup: Iterable[SyntaxExpr], attribute: str) -> str:
|
|
563
|
+
"""Parse an expression for an attribute.
|
|
564
|
+
|
|
565
|
+
The name of the attribute is used to generate the error message.
|
|
566
|
+
"""
|
|
528
567
|
nonlocal i, line, pos
|
|
529
568
|
|
|
530
569
|
begin: int = i
|
|
@@ -542,13 +581,38 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
542
581
|
elif c == "(" and (context is None or context == "("):
|
|
543
582
|
context_stack.append("(")
|
|
544
583
|
elif context is None and lookahead_matches(lookup):
|
|
584
|
+
if i == begin:
|
|
585
|
+
# This happens when we're parsing a column and an expr is missing for an attribute that requires it,
|
|
586
|
+
# like DEFAULT or CODEC. For example:
|
|
587
|
+
# SCHEMA >
|
|
588
|
+
# timestamp DateTime DEFAULT,
|
|
589
|
+
# col_b Int32
|
|
590
|
+
raise SchemaSyntaxError(
|
|
591
|
+
message=f"Missing mandatory value for {attribute}",
|
|
592
|
+
lineno=line,
|
|
593
|
+
pos=pos,
|
|
594
|
+
)
|
|
545
595
|
return schema[begin:i].strip(" \t\r\n")
|
|
546
596
|
elif (context is None and c not in valid_chars_fn) or (context == "(" and c not in valid_chars_fn):
|
|
547
597
|
raise SchemaSyntaxError(message=f"Invalid character {repr(c)}", lineno=line, pos=pos)
|
|
548
598
|
advance_single_char()
|
|
599
|
+
|
|
600
|
+
# Check for unclosed contexts before returning
|
|
601
|
+
if len(context_stack) > 1:
|
|
602
|
+
last_context = context_stack[-1]
|
|
603
|
+
closing_char = "'" if last_context == "'" else ('"' if last_context == '"' else ")")
|
|
604
|
+
raise SchemaSyntaxError(message=f"Expected closing {closing_char}", lineno=line, pos=pos)
|
|
605
|
+
|
|
549
606
|
if i == begin:
|
|
550
|
-
#
|
|
551
|
-
|
|
607
|
+
# This happens when we're parsing a column and an expr is missing for an attribute that requires it, like
|
|
608
|
+
# DEFAULT or CODEC, and we reach the end of the schema. For example:
|
|
609
|
+
# SCHEMA >
|
|
610
|
+
# timestamp DateTime DEFAULT
|
|
611
|
+
raise SchemaSyntaxError(
|
|
612
|
+
message=f"Missing mandatory value for {attribute}",
|
|
613
|
+
lineno=line,
|
|
614
|
+
pos=pos,
|
|
615
|
+
)
|
|
552
616
|
return schema[begin:].strip(" \t\r\n")
|
|
553
617
|
|
|
554
618
|
columns: List[Dict[str, Any]] = []
|
|
@@ -556,7 +620,6 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
556
620
|
name: str = ""
|
|
557
621
|
_type: str = ""
|
|
558
622
|
default: str = ""
|
|
559
|
-
materialized: str = ""
|
|
560
623
|
codec: str = ""
|
|
561
624
|
jsonpath: str = ""
|
|
562
625
|
last: Optional[SyntaxExpr] = None
|
|
@@ -564,24 +627,22 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
564
627
|
col_end: Tuple[int, int] = (0, 0) # (0, 0) means not set. It's not a valid line/pos as they start at 1
|
|
565
628
|
|
|
566
629
|
def add_column(found: str) -> None:
|
|
567
|
-
nonlocal name, _type, default,
|
|
568
|
-
|
|
569
|
-
# TODO(eclbg): get rid of this ValueError and replace it with a custom one so it can be handled by the
|
|
570
|
-
# caller
|
|
571
|
-
raise ValueError(
|
|
572
|
-
format_parse_error(schema, i, pos, f"Syntax error: expecting NAME, found {found}", line=line)
|
|
573
|
-
)
|
|
630
|
+
nonlocal name, _type, default, codec, jsonpath, col_start, col_end
|
|
631
|
+
lineno, pos = col_start
|
|
574
632
|
default = "" if not default else f"DEFAULT {default}"
|
|
575
|
-
materialized = "" if not materialized else f"MATERIALIZED {materialized}"
|
|
576
633
|
codec = "" if not codec else f"CODEC{codec}"
|
|
577
|
-
|
|
578
|
-
|
|
634
|
+
if not name or not (_type or default):
|
|
635
|
+
raise SchemaSyntaxError(
|
|
636
|
+
message="Column name and either type or DEFAULT are required",
|
|
637
|
+
lineno=lineno,
|
|
638
|
+
pos=pos,
|
|
639
|
+
)
|
|
579
640
|
columns.append(
|
|
580
641
|
{
|
|
581
642
|
"name": name,
|
|
582
643
|
"type": _type,
|
|
583
644
|
"codec": codec,
|
|
584
|
-
"default_value": default
|
|
645
|
+
"default_value": default,
|
|
585
646
|
"jsonpath": jsonpath,
|
|
586
647
|
# "col_start": col_start,
|
|
587
648
|
# "col_end": col_end,
|
|
@@ -590,7 +651,6 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
590
651
|
name = ""
|
|
591
652
|
_type = ""
|
|
592
653
|
default = ""
|
|
593
|
-
materialized = ""
|
|
594
654
|
codec = ""
|
|
595
655
|
jsonpath = ""
|
|
596
656
|
|
|
@@ -619,7 +679,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
619
679
|
advance("")
|
|
620
680
|
valid_next = [NULL, NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA, NEW_LINE]
|
|
621
681
|
type_start_pos = pos # Save the position of the type start to use it in the error message
|
|
622
|
-
detected_type = parse_expr(
|
|
682
|
+
detected_type = parse_expr(
|
|
683
|
+
[NULL, NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "TYPE"
|
|
684
|
+
)
|
|
623
685
|
try:
|
|
624
686
|
# Imported in the body to be compatible with the CLI
|
|
625
687
|
from chtoolset.query import check_compatible_types
|
|
@@ -633,7 +695,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
633
695
|
):
|
|
634
696
|
raise SchemaSyntaxError(message=str(e), lineno=line, pos=type_start_pos)
|
|
635
697
|
else:
|
|
636
|
-
|
|
698
|
+
# TODO(eclbg): The resulting error message is a bit confusing, as the clickhouse error contains some
|
|
699
|
+
# references to positions that don't match the position in the schema.
|
|
700
|
+
raise SchemaSyntaxError(f"Error parsing type: {e}", lineno=line, pos=type_start_pos)
|
|
637
701
|
except ModuleNotFoundError:
|
|
638
702
|
pass
|
|
639
703
|
_type = detected_type
|
|
@@ -658,14 +722,16 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
658
722
|
advance("DEFAULT")
|
|
659
723
|
valid_next = [
|
|
660
724
|
CODEC,
|
|
661
|
-
TTL,
|
|
662
725
|
COMMA,
|
|
663
|
-
|
|
726
|
+
JSONPATH,
|
|
727
|
+
# The matches below are not supported. We're adding them here to say they aren't, instead of just
|
|
728
|
+
# complaining about their placement.
|
|
664
729
|
MATERIALIZED,
|
|
730
|
+
TTL,
|
|
665
731
|
NULL,
|
|
666
732
|
NOTNULL,
|
|
667
733
|
]
|
|
668
|
-
default = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA])
|
|
734
|
+
default = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "DEFAULT")
|
|
669
735
|
elif found == MATERIALIZED:
|
|
670
736
|
advance("")
|
|
671
737
|
raise SchemaSyntaxError(
|
|
@@ -684,15 +750,16 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
684
750
|
elif found == CODEC:
|
|
685
751
|
advance("CODEC")
|
|
686
752
|
valid_next = [
|
|
687
|
-
TTL,
|
|
688
753
|
COMMA,
|
|
689
754
|
JSONPATH,
|
|
690
|
-
# The
|
|
755
|
+
# The matches below are not supported. We're adding them here to say they aren't, instead of just
|
|
756
|
+
# complaining about their placement.
|
|
691
757
|
MATERIALIZED,
|
|
758
|
+
TTL,
|
|
692
759
|
NULL,
|
|
693
760
|
NOTNULL,
|
|
694
761
|
]
|
|
695
|
-
codec = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA])
|
|
762
|
+
codec = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "CODEC")
|
|
696
763
|
elif found == TTL:
|
|
697
764
|
advance("") # We need to advance to get the correct position
|
|
698
765
|
# Not implemented
|
|
@@ -706,22 +773,20 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
|
706
773
|
advance("`json:")
|
|
707
774
|
jsonpath = get_backticked()
|
|
708
775
|
elif found == COMMA:
|
|
709
|
-
if name == "INDEX":
|
|
710
|
-
advance(",")
|
|
711
|
-
continue
|
|
712
776
|
advance(",")
|
|
713
777
|
valid_next = []
|
|
714
778
|
col_end = (line, pos)
|
|
715
779
|
add_column("COMMA")
|
|
716
|
-
elif found == NEW_LINE
|
|
780
|
+
elif found == NEW_LINE:
|
|
717
781
|
i += 1
|
|
718
782
|
else:
|
|
783
|
+
# Note(eclbg): I haven't found any case where this error is raised.
|
|
719
784
|
raise ValueError(
|
|
720
785
|
format_parse_error(
|
|
721
786
|
schema,
|
|
722
787
|
i,
|
|
723
788
|
pos,
|
|
724
|
-
"wrong value, DEFAULT,
|
|
789
|
+
"wrong value. Expected a data type, DEFAULT, CODEC, a jsonpath, a comma, or a new line",
|
|
725
790
|
line=line,
|
|
726
791
|
)
|
|
727
792
|
)
|
|
@@ -819,217 +884,128 @@ def schema_to_sql_columns(schema: List[Dict[str, Any]]) -> List[str]:
|
|
|
819
884
|
|
|
820
885
|
|
|
821
886
|
def parse_table_structure(schema: str) -> List[Dict[str, Any]]:
|
|
822
|
-
"""
|
|
823
|
-
Columns follow the syntax:
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
>>> parse_table_structure('foo Float32 DEFAULT 13, bar UInt64')
|
|
945
|
-
[{'name': 'foo', 'type': 'Float32', 'codec': None, 'default_value': 'DEFAULT 13', 'jsonpath': None, 'nullable': False, 'normalized_name': 'foo'}, {'name': 'bar', 'type': 'UInt64', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'bar'}]
|
|
946
|
-
|
|
947
|
-
>>> parse_table_structure('foo Float32 DEFAULT 1$$$3')
|
|
948
|
-
Traceback (most recent call last):
|
|
949
|
-
...
|
|
950
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Invalid character '$' at 1:22.
|
|
951
|
-
|
|
952
|
-
>>> parse_table_structure('foo Float32 CODEC(Delta(4), LZ#4)')
|
|
953
|
-
Traceback (most recent call last):
|
|
954
|
-
...
|
|
955
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Invalid character '#' at 1:31.
|
|
956
|
-
|
|
957
|
-
>>> parse_table_structure('\\n `temperature` Float32,\\n `temperature_delta` Float32 MATERIALIZED temperature CODEC(Delta(4), LZ4)\\n ')
|
|
958
|
-
Traceback (most recent call last):
|
|
959
|
-
...
|
|
960
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 3:33.
|
|
961
|
-
|
|
962
|
-
>>> parse_table_structure('temperature Float32, temperature_delta Float32 MATERIALIZED temperature Codec(Delta(4)), temperature_doubledelta Float32 MATERIALIZED temperature Codec(DoubleDelta), temperature_doubledelta_lz4 Float32 MATERIALIZED temperature Codec(DoubleDelta, LZ4)')
|
|
963
|
-
Traceback (most recent call last):
|
|
964
|
-
...
|
|
965
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:48.
|
|
966
|
-
|
|
967
|
-
>>> parse_table_structure('t UInt8 CODEC(Delta(1), LZ4)')
|
|
968
|
-
[{'name': 't', 'type': 'UInt8', 'codec': 'CODEC(Delta(1), LZ4)', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 't'}]
|
|
969
|
-
|
|
970
|
-
>>> parse_table_structure('tt UInt8 MATERIALIZED t')
|
|
971
|
-
Traceback (most recent call last):
|
|
972
|
-
...
|
|
973
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
|
|
974
|
-
|
|
975
|
-
>>> parse_table_structure('tt UInt8 MATERIALIZED t CODEC(Delta(1), LZ4)')
|
|
976
|
-
Traceback (most recent call last):
|
|
977
|
-
...
|
|
978
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
|
|
979
|
-
|
|
980
|
-
>>> parse_table_structure('tt SimpleAggregateFunction(any, Nullable(UInt8))')
|
|
981
|
-
[{'name': 'tt', 'type': 'SimpleAggregateFunction(any, Nullable(UInt8))', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'tt'}]
|
|
982
|
-
|
|
983
|
-
>>> parse_table_structure("timestamp DateTime MATERIALIZED toDateTime(JSONExtractInt(JSONExtractRaw(record, 'payload'), 'timestamp') / 1000)")
|
|
984
|
-
Traceback (most recent call last):
|
|
985
|
-
...
|
|
986
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:20.
|
|
987
|
-
|
|
988
|
-
>>> parse_table_structure("`test_default_cast` DEFAULT plus(13,1)")
|
|
989
|
-
[{'name': 'test_default_cast', 'type': '', 'codec': None, 'default_value': 'DEFAULT plus(13,1)', 'jsonpath': None, 'nullable': False, 'normalized_name': 'test_default_cast'}]
|
|
990
|
-
|
|
991
|
-
>>> parse_table_structure("hola Int, `materialized` String MATERIALIZED upper(no_nullable_string)")
|
|
992
|
-
Traceback (most recent call last):
|
|
993
|
-
...
|
|
994
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:33.
|
|
995
|
-
|
|
996
|
-
>>> parse_table_structure('`a2` String `json:$.a2`, `a3` String `json:$.a3`\\n')
|
|
997
|
-
[{'name': 'a2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
|
|
998
|
-
|
|
999
|
-
>>> parse_table_structure("`arr` Array(String) DEFAULT ['-']")
|
|
1000
|
-
[{'name': 'arr', 'type': 'Array(String)', 'codec': None, 'default_value': "DEFAULT ['-']", 'jsonpath': None, 'nullable': False, 'normalized_name': 'arr'}]
|
|
1001
|
-
|
|
1002
|
-
>>> parse_table_structure("`arr` Array(String) DEFAULT array('-')")
|
|
1003
|
-
[{'name': 'arr', 'type': 'Array(String)', 'codec': None, 'default_value': "DEFAULT array('-')", 'jsonpath': None, 'nullable': False, 'normalized_name': 'arr'}]
|
|
1004
|
-
|
|
1005
|
-
>>> parse_table_structure('`a2` Float32 CODEC(Delta, ZSTD(4)) `json:$.a2`, `a3` String `json:$.a3`\\n')
|
|
1006
|
-
[{'name': 'a2', 'type': 'Float32', 'codec': 'CODEC(Delta, ZSTD(4))', 'default_value': None, 'jsonpath': '$.a2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
|
|
1007
|
-
|
|
1008
|
-
>>> parse_table_structure('`a` String, INDEX index_name a TYPE set(100) GRANULARITY 100')
|
|
1009
|
-
Traceback (most recent call last):
|
|
1010
|
-
...
|
|
1011
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 1:13. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
|
|
1012
|
-
|
|
1013
|
-
>>> parse_table_structure(' `a` String,\\n INDEX index_name a TYPE set(100, 1) GRANULARITY 100')
|
|
1014
|
-
Traceback (most recent call last):
|
|
1015
|
-
...
|
|
1016
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 2:5. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
|
|
1017
|
-
|
|
1018
|
-
>>> parse_table_structure('`index` String, INDEX index_name a TYPE set(100, 1) GRANULARITY 100')
|
|
1019
|
-
Traceback (most recent call last):
|
|
1020
|
-
...
|
|
1021
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 1:17. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
|
|
1022
|
-
|
|
1023
|
-
>>> parse_table_structure('`a2` String `json:$.a--2`, `a3` String `json:$.a3`\\n')
|
|
1024
|
-
[{'name': 'a2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a--2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
|
|
1025
|
-
|
|
1026
|
-
>>> parse_table_structure('a InvalidType')
|
|
1027
|
-
Traceback (most recent call last):
|
|
1028
|
-
...
|
|
1029
|
-
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Unknown data type family: InvalidType at 1:3.
|
|
1030
|
-
|
|
1031
|
-
>>> parse_table_structure('a Int32 DEFAULT 'a') # doctest: +SKIP
|
|
1032
|
-
# should fail as the type and default expr are incompatible
|
|
887
|
+
"""Parse a table schema definition into a structured format.
|
|
888
|
+
Columns follow the syntax: name [type] [DEFAULT expr] [CODEC codec] [JSONPATH `json:jsonpath`] [,]
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
schema: The schema definition string
|
|
892
|
+
|
|
893
|
+
Returns:
|
|
894
|
+
List of dictionaries containing column definitions
|
|
895
|
+
|
|
896
|
+
Examples:
|
|
897
|
+
>>> parse_table_structure('') # Empty schema
|
|
898
|
+
[]
|
|
899
|
+
|
|
900
|
+
>>> parse_table_structure('col Int32') # Basic column
|
|
901
|
+
[{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
902
|
+
|
|
903
|
+
>>> parse_table_structure('col1 Int32, col2 String') # Multiple columns
|
|
904
|
+
[{'name': 'col1', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col1'}, {'name': 'col2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col2'}]
|
|
905
|
+
|
|
906
|
+
>>> parse_table_structure('col Int32 DEFAULT 0') # With DEFAULT
|
|
907
|
+
[{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': 'DEFAULT 0', 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
908
|
+
|
|
909
|
+
>>> parse_table_structure('col DEFAULT 42') # Column without type but with default
|
|
910
|
+
[{'name': 'col', 'type': '', 'codec': None, 'default_value': 'DEFAULT 42', 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
911
|
+
|
|
912
|
+
>>> parse_table_structure('col String CODEC(ZSTD)') # With CODEC
|
|
913
|
+
[{'name': 'col', 'type': 'String', 'codec': 'CODEC(ZSTD)', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
914
|
+
|
|
915
|
+
>>> parse_table_structure('`column.name!@#$%` String') # Quoted identifier
|
|
916
|
+
[{'name': 'column.name!@#$%', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'column.name!@#$%'}]
|
|
917
|
+
|
|
918
|
+
>>> parse_table_structure('col Nullable(Int32)') # Nullable type
|
|
919
|
+
[{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': True, 'normalized_name': 'col'}]
|
|
920
|
+
|
|
921
|
+
>>> parse_table_structure('col Array(Int32)') # Complex type
|
|
922
|
+
[{'name': 'col', 'type': 'Array(Int32)', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
923
|
+
|
|
924
|
+
>>> parse_table_structure('col SimpleAggregateFunction(any, Int32)') # Aggregate function
|
|
925
|
+
[{'name': 'col', 'type': 'SimpleAggregateFunction(any, Int32)', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
|
|
926
|
+
|
|
927
|
+
Error cases:
|
|
928
|
+
>>> parse_table_structure('col') # Missing type
|
|
929
|
+
Traceback (most recent call last):
|
|
930
|
+
...
|
|
931
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name and either type or DEFAULT are required at 1:1.
|
|
932
|
+
|
|
933
|
+
>>> parse_table_structure('`col Int32') # Unclosed backtick
|
|
934
|
+
Traceback (most recent call last):
|
|
935
|
+
...
|
|
936
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Expected closing backtick at 1:5.
|
|
937
|
+
|
|
938
|
+
>>> parse_table_structure('col Int32 DEFAULT') # Missing DEFAULT value
|
|
939
|
+
Traceback (most recent call last):
|
|
940
|
+
...
|
|
941
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for DEFAULT at 1:18.
|
|
942
|
+
|
|
943
|
+
>>> parse_table_structure('col Int32 CODEC') # Missing CODEC parameters
|
|
944
|
+
Traceback (most recent call last):
|
|
945
|
+
...
|
|
946
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for CODEC at 1:16.
|
|
947
|
+
|
|
948
|
+
>>> parse_table_structure('col#name Int32') # Invalid character in name
|
|
949
|
+
Traceback (most recent call last):
|
|
950
|
+
...
|
|
951
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name contains invalid character '#' at 1:4.
|
|
952
|
+
Hint: use backticks.
|
|
953
|
+
|
|
954
|
+
>>> parse_table_structure('col Int32 MATERIALIZED expr') # Unsupported MATERIALIZED
|
|
955
|
+
Traceback (most recent call last):
|
|
956
|
+
...
|
|
957
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
|
|
958
|
+
|
|
959
|
+
>>> parse_table_structure('col Int32 TTL timestamp + INTERVAL 1 DAY') # Unsupported TTL
|
|
960
|
+
Traceback (most recent call last):
|
|
961
|
+
...
|
|
962
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: column TTL is not supported at 1:11.
|
|
963
|
+
|
|
964
|
+
>>> parse_table_structure('col Int32 NULL') # Unsupported NULL
|
|
965
|
+
Traceback (most recent call last):
|
|
966
|
+
...
|
|
967
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: NULL column syntax not supported at 1:11.
|
|
968
|
+
Hint: use Nullable(...).
|
|
969
|
+
|
|
970
|
+
>>> parse_table_structure('col Int32 NOT NULL') # Unsupported NOT NULL
|
|
971
|
+
Traceback (most recent call last):
|
|
972
|
+
...
|
|
973
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: NOT NULL column syntax not supported at 1:11.
|
|
974
|
+
Hint: Columns are not nullable by default.
|
|
975
|
+
|
|
976
|
+
>>> parse_table_structure('''
|
|
977
|
+
... col Array(Int32)
|
|
978
|
+
... CODEC(
|
|
979
|
+
... ZSTD''') # Unclosed CODEC parenthesis across lines
|
|
980
|
+
Traceback (most recent call last):
|
|
981
|
+
...
|
|
982
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Expected closing ) at 4:17.
|
|
983
|
+
|
|
984
|
+
>>> parse_table_structure('''
|
|
985
|
+
... timestamp DateTime
|
|
986
|
+
... DEFAULT
|
|
987
|
+
... CODEC(ZSTD)''') # Missing DEFAULT value with following CODEC
|
|
988
|
+
Traceback (most recent call last):
|
|
989
|
+
...
|
|
990
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for DEFAULT at 3:16.
|
|
991
|
+
|
|
992
|
+
>>> parse_table_structure('''
|
|
993
|
+
... col String
|
|
994
|
+
... DEFAULT 'test'
|
|
995
|
+
... MATERIALIZED
|
|
996
|
+
... now()''') # MATERIALIZED with heavy indentation
|
|
997
|
+
Traceback (most recent call last):
|
|
998
|
+
...
|
|
999
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 4:13.
|
|
1000
|
+
|
|
1001
|
+
>>> parse_table_structure('''
|
|
1002
|
+
... `column.with.dots`
|
|
1003
|
+
... Int32
|
|
1004
|
+
... TTL
|
|
1005
|
+
... timestamp + INTERVAL 1 DAY''') # TTL with increasing indentation
|
|
1006
|
+
Traceback (most recent call last):
|
|
1007
|
+
...
|
|
1008
|
+
tinybird.tb.modules.datafile.common.SchemaSyntaxError: column TTL is not supported at 4:18.
|
|
1033
1009
|
"""
|
|
1034
1010
|
return _parse_table_structure(schema)
|
|
1035
1011
|
|