tinybird 0.0.1.dev17__py3-none-any.whl → 0.0.1.dev18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,13 +33,42 @@ class DatafileSyntaxError(Exception):
33
33
  def __init__(self, message: str, lineno: int, pos: int, hint: Optional[str] = None):
34
34
  super().__init__(message)
35
35
  self.message = message
36
+ self.context = None
36
37
  self.hint = hint
37
38
  self.lineno = lineno
38
39
  self.pos = pos
39
40
 
41
+ def add_context(self, context: str):
42
+ self.context = context
43
+
44
+ def get_context_from_file_contents(self, s: str) -> None:
45
+ lines = s.splitlines()
46
+
47
+ start_line = max(0, self.lineno - 3) # 2 lines before
48
+ end_line = self.lineno # Only context before the error
49
+
50
+ # Calculate padding needed for line numbers
51
+ max_line_digits = len(str(end_line))
52
+
53
+ context = []
54
+ for i in range(start_line, end_line):
55
+ line_num = str(i + 1).rjust(max_line_digits)
56
+ line = lines[i].rstrip()
57
+ context.append(f"{line_num}: {line}")
58
+
59
+ # Add pointer line if this is the error line
60
+ if i + 1 == self.lineno:
61
+ pointer = " " * (max_line_digits + 2 + self.pos - 1) + "^"
62
+ context.append(pointer)
63
+
64
+ error_context = "\n".join(context)
65
+ self.add_context(error_context)
66
+
40
67
  def __str__(self) -> str:
41
- hint = f" {self.hint}." if self.hint else ""
42
- return f"{self.message} at {self.lineno}:{self.pos}." + hint
68
+ output = f"{self.message}"
69
+ output += f"\n\n{self.context}" if self.context else f" at {self.lineno}:{self.pos}."
70
+ output += f"\n{self.hint}." if self.hint else ""
71
+ return output
43
72
 
44
73
 
45
74
  class SchemaSyntaxError(DatafileSyntaxError):
@@ -242,15 +271,21 @@ def parse_indexes_structure(indexes: Optional[List[str]]) -> List[TableIndex]:
242
271
  >>> parse_indexes_structure(["index_name u64 * length(s)"])
243
272
  Traceback (most recent call last):
244
273
  ...
245
- tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1. Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
274
+ tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
275
+ Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
276
+
246
277
  >>> parse_indexes_structure(["index_name a TYPE set(100) GRANULARITY 100, index_name_bf mapValues(d) TYPE bloom_filter(0.001) GRANULARITY 16"])
247
278
  Traceback (most recent call last):
248
279
  ...
249
- tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1. Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
280
+ tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 1:1.
281
+ Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
282
+
250
283
  >>> parse_indexes_structure(["", " ", " wrong_index_syntax,"])
251
284
  Traceback (most recent call last):
252
285
  ...
253
- tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 3:6. Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
286
+ tinybird.tb.modules.datafile.common.IndexesSyntaxError: Invalid INDEX syntax at 3:6.
287
+ Usage: `[INDEX] name expr TYPE type_full GRANULARITY granularity`.
288
+
254
289
  >>> parse_indexes_structure(["my_index m['key'] TYPE ngrambf_v1(1, 1024, 1, 42) GRANULARITY 1"])
255
290
  [TableIndex(name='my_index', expr="m['key']", type_full='ngrambf_v1(1, 1024, 1, 42)', granularity='1')]
256
291
  >>> parse_indexes_structure(["my_index_lambda arrayMap(x -> tupleElement(x,'message'), column_name) TYPE ngrambf_v1(1, 1024, 1, 42) GRANULARITY 1"])
@@ -513,9 +548,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
513
548
  if c not in valid_chars_name:
514
549
  raise SchemaSyntaxError(
515
550
  message=f"Column name contains invalid character {repr(c)}",
516
- hint="Tip: use backticks",
551
+ hint="Hint: use backticks",
517
552
  lineno=line,
518
- pos=i + 1,
553
+ pos=pos,
519
554
  )
520
555
  advance_single_char()
521
556
  return schema[begin:i]
@@ -524,7 +559,11 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
524
559
  advance_single_char()
525
560
  return get_backticked()
526
561
 
527
- def parse_expr(lookup: Iterable[SyntaxExpr]) -> str:
562
+ def parse_expr(lookup: Iterable[SyntaxExpr], attribute: str) -> str:
563
+ """Parse an expression for an attribute.
564
+
565
+ The name of the attribute is used to generate the error message.
566
+ """
528
567
  nonlocal i, line, pos
529
568
 
530
569
  begin: int = i
@@ -542,13 +581,38 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
542
581
  elif c == "(" and (context is None or context == "("):
543
582
  context_stack.append("(")
544
583
  elif context is None and lookahead_matches(lookup):
584
+ if i == begin:
585
+ # This happens when we're parsing a column and an expr is missing for an attribute that requires it,
586
+ # like DEFAULT or CODEC. For example:
587
+ # SCHEMA >
588
+ # timestamp DateTime DEFAULT,
589
+ # col_b Int32
590
+ raise SchemaSyntaxError(
591
+ message=f"Missing mandatory value for {attribute}",
592
+ lineno=line,
593
+ pos=pos,
594
+ )
545
595
  return schema[begin:i].strip(" \t\r\n")
546
596
  elif (context is None and c not in valid_chars_fn) or (context == "(" and c not in valid_chars_fn):
547
597
  raise SchemaSyntaxError(message=f"Invalid character {repr(c)}", lineno=line, pos=pos)
548
598
  advance_single_char()
599
+
600
+ # Check for unclosed contexts before returning
601
+ if len(context_stack) > 1:
602
+ last_context = context_stack[-1]
603
+ closing_char = "'" if last_context == "'" else ('"' if last_context == '"' else ")")
604
+ raise SchemaSyntaxError(message=f"Expected closing {closing_char}", lineno=line, pos=pos)
605
+
549
606
  if i == begin:
550
- # TODO(eclbg): Turn this into a SchemaSyntaxError. I don't know when it happens
551
- raise ValueError(format_parse_error(schema, i, pos, "wrong value", line=line))
607
+ # This happens when we're parsing a column and an expr is missing for an attribute that requires it, like
608
+ # DEFAULT or CODEC, and we reach the end of the schema. For example:
609
+ # SCHEMA >
610
+ # timestamp DateTime DEFAULT
611
+ raise SchemaSyntaxError(
612
+ message=f"Missing mandatory value for {attribute}",
613
+ lineno=line,
614
+ pos=pos,
615
+ )
552
616
  return schema[begin:].strip(" \t\r\n")
553
617
 
554
618
  columns: List[Dict[str, Any]] = []
@@ -556,7 +620,6 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
556
620
  name: str = ""
557
621
  _type: str = ""
558
622
  default: str = ""
559
- materialized: str = ""
560
623
  codec: str = ""
561
624
  jsonpath: str = ""
562
625
  last: Optional[SyntaxExpr] = None
@@ -564,24 +627,22 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
564
627
  col_end: Tuple[int, int] = (0, 0) # (0, 0) means not set. It's not a valid line/pos as they start at 1
565
628
 
566
629
  def add_column(found: str) -> None:
567
- nonlocal name, _type, default, materialized, codec, jsonpath, col_start, col_end
568
- if not name:
569
- # TODO(eclbg): get rid of this ValueError and replace it with a custom one so it can be handled by the
570
- # caller
571
- raise ValueError(
572
- format_parse_error(schema, i, pos, f"Syntax error: expecting NAME, found {found}", line=line)
573
- )
630
+ nonlocal name, _type, default, codec, jsonpath, col_start, col_end
631
+ lineno, pos = col_start
574
632
  default = "" if not default else f"DEFAULT {default}"
575
- materialized = "" if not materialized else f"MATERIALIZED {materialized}"
576
633
  codec = "" if not codec else f"CODEC{codec}"
577
- # TODO(eclbg): We should validate the column as a whole. Name is mandatory, and one of type, default_value or
578
- # materialized (I think).
634
+ if not name or not (_type or default):
635
+ raise SchemaSyntaxError(
636
+ message="Column name and either type or DEFAULT are required",
637
+ lineno=lineno,
638
+ pos=pos,
639
+ )
579
640
  columns.append(
580
641
  {
581
642
  "name": name,
582
643
  "type": _type,
583
644
  "codec": codec,
584
- "default_value": default or materialized,
645
+ "default_value": default,
585
646
  "jsonpath": jsonpath,
586
647
  # "col_start": col_start,
587
648
  # "col_end": col_end,
@@ -590,7 +651,6 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
590
651
  name = ""
591
652
  _type = ""
592
653
  default = ""
593
- materialized = ""
594
654
  codec = ""
595
655
  jsonpath = ""
596
656
 
@@ -619,7 +679,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
619
679
  advance("")
620
680
  valid_next = [NULL, NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA, NEW_LINE]
621
681
  type_start_pos = pos # Save the position of the type start to use it in the error message
622
- detected_type = parse_expr([NULL, NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA])
682
+ detected_type = parse_expr(
683
+ [NULL, NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "TYPE"
684
+ )
623
685
  try:
624
686
  # Imported in the body to be compatible with the CLI
625
687
  from chtoolset.query import check_compatible_types
@@ -633,7 +695,9 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
633
695
  ):
634
696
  raise SchemaSyntaxError(message=str(e), lineno=line, pos=type_start_pos)
635
697
  else:
636
- raise e
698
+ # TODO(eclbg): The resulting error message is a bit confusing, as the clickhouse error contains some
699
+ # references to positions that don't match the position in the schema.
700
+ raise SchemaSyntaxError(f"Error parsing type: {e}", lineno=line, pos=type_start_pos)
637
701
  except ModuleNotFoundError:
638
702
  pass
639
703
  _type = detected_type
@@ -658,14 +722,16 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
658
722
  advance("DEFAULT")
659
723
  valid_next = [
660
724
  CODEC,
661
- TTL,
662
725
  COMMA,
663
- # The three matches below are never valid. We're adding them here to avoid just complaining about their placement.
726
+ JSONPATH,
727
+ # The matches below are not supported. We're adding them here to say they aren't, instead of just
728
+ # complaining about their placement.
664
729
  MATERIALIZED,
730
+ TTL,
665
731
  NULL,
666
732
  NOTNULL,
667
733
  ]
668
- default = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA])
734
+ default = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "DEFAULT")
669
735
  elif found == MATERIALIZED:
670
736
  advance("")
671
737
  raise SchemaSyntaxError(
@@ -684,15 +750,16 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
684
750
  elif found == CODEC:
685
751
  advance("CODEC")
686
752
  valid_next = [
687
- TTL,
688
753
  COMMA,
689
754
  JSONPATH,
690
- # The three matches below are never valid. We're adding them here to avoid just complaining about their placement.
755
+ # The matches below are not supported. We're adding them here to say they aren't, instead of just
756
+ # complaining about their placement.
691
757
  MATERIALIZED,
758
+ TTL,
692
759
  NULL,
693
760
  NOTNULL,
694
761
  ]
695
- codec = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA])
762
+ codec = parse_expr([NOTNULL, DEFAULT, MATERIALIZED, ALIAS, CODEC, TTL, JSONPATH, COMMA], "CODEC")
696
763
  elif found == TTL:
697
764
  advance("") # We need to advance to get the correct position
698
765
  # Not implemented
@@ -706,22 +773,20 @@ def _parse_table_structure(schema: str) -> List[Dict[str, Any]]:
706
773
  advance("`json:")
707
774
  jsonpath = get_backticked()
708
775
  elif found == COMMA:
709
- if name == "INDEX":
710
- advance(",")
711
- continue
712
776
  advance(",")
713
777
  valid_next = []
714
778
  col_end = (line, pos)
715
779
  add_column("COMMA")
716
- elif found == NEW_LINE or (name == "INDEX" and not found):
780
+ elif found == NEW_LINE:
717
781
  i += 1
718
782
  else:
783
+ # Note(eclbg): I haven't found any case where this error is raised.
719
784
  raise ValueError(
720
785
  format_parse_error(
721
786
  schema,
722
787
  i,
723
788
  pos,
724
- "wrong value, DEFAULT, MATERIALIZED, CODEC, TTL expressions, a column data type, a comma, a new line or a jsonpath",
789
+ "wrong value. Expected a data type, DEFAULT, CODEC, a jsonpath, a comma, or a new line",
725
790
  line=line,
726
791
  )
727
792
  )
@@ -819,217 +884,128 @@ def schema_to_sql_columns(schema: List[Dict[str, Any]]) -> List[str]:
819
884
 
820
885
 
821
886
  def parse_table_structure(schema: str) -> List[Dict[str, Any]]:
822
- """This parses the SQL schema for a CREATE TABLE
823
- Columns follow the syntax: name1 [type1] [DEFAULT expr1] [CODEC compression_codec] [TTL expr1] [JSONPATH `json:jsonpath`] [,]
824
-
825
- The ClickHouse reference is followed pretty loosely at this point.
826
- Reference: https://clickhouse.tech/docs/en/sql-reference/statements/create/table/#syntax-forms
827
-
828
- >>> parse_table_structure('potato') # doctest: +SKIP
829
- Traceback (most recent call last):
830
- ...
831
- tinybird.sql.MalformedColumnError: Column name and either type or default_value are required
832
-
833
- >>> parse_table_structure(' potato Int32')
834
- [{'name': 'potato', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'potato'}]
835
-
836
- >>> parse_table_structure('`c Int32')
837
- Traceback (most recent call last):
838
- ...
839
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Expected closing backtick at 1:3.
840
-
841
- >>> parse_table_structure('c Float32, b String')
842
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
843
-
844
- >>> parse_table_structure('c Float32,--comment\\nb String')
845
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
846
-
847
- >>> parse_table_structure('c Float32,--comment\\nb String --another-comment')
848
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
849
-
850
- >>> parse_table_structure('c Float32 --first-comment\\n,--comment\\nb String --another-comment')
851
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
852
-
853
- >>> parse_table_structure('--random comment here\\nc Float32 --another comment\\n,--another one\\nb String --this is the last one')
854
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
855
-
856
- >>> parse_table_structure('--extra comment\\nc--extra comment\\nFloat32--extra comment\\n,--extra comment\\nb--extra comment\\nString--extra comment')
857
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}, {'name': 'b', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'b'}]
858
-
859
- >>> parse_table_structure('c Nullable(Float32)')
860
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': True, 'normalized_name': 'c'}]
861
-
862
- >>> parse_table_structure('c Nullable(Float32) DEFAULT NULL')
863
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': True, 'normalized_name': 'c'}]
864
-
865
- >>> parse_table_structure("c String DEFAULT 'bla'")
866
- [{'name': 'c', 'type': 'String', 'codec': None, 'default_value': "DEFAULT 'bla'", 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}]
867
-
868
- >>> parse_table_structure('`foo.bar` UInt64')
869
- [{'name': 'foo.bar', 'type': 'UInt64', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'foo.bar'}]
870
-
871
- >>> parse_table_structure('double_value Float64 CODEC(LZ4HC(2))')
872
- [{'name': 'double_value', 'type': 'Float64', 'codec': 'CODEC(LZ4HC(2))', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'double_value'}]
873
-
874
- >>> parse_table_structure('doubl/e_value Float64 CODEC(LZ4HC(2))')
875
- Traceback (most recent call last):
876
- ...
877
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name contains invalid character '/' at 1:6. Tip: use backticks.
878
-
879
- >>> parse_table_structure('`c` Nullable(Float32)')
880
- [{'name': 'c', 'type': 'Float32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': True, 'normalized_name': 'c'}]
881
-
882
- >>> parse_table_structure('wadus INT UNSIGNED')
883
- [{'name': 'wadus', 'type': 'INT UNSIGNED', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'wadus'}]
884
-
885
- >>> parse_table_structure('c Int32 CODEC(Delta, LZ4)\\n')
886
- [{'name': 'c', 'type': 'Int32', 'codec': 'CODEC(Delta, LZ4)', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'c'}]
887
-
888
- >>> parse_table_structure('c SimpleAggregateFunction(sum, Int32),\\np SimpleAggregateFunction(sum, Int32)')
889
- Traceback (most recent call last):
890
- ...
891
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Incompatible data types between aggregate function 'sum' which returns Int64 and column storage type Int32 at 1:4.
892
-
893
- >>> parse_table_structure('c Int32 CODEC(Delta, LZ4) Materialized b*2\\n')
894
- Traceback (most recent call last):
895
- ...
896
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:27.
897
-
898
- >>> parse_table_structure('c Int32 CODEC(Delta, LZ4) Materialized ifNull(b*2, 0)\\n')
899
- Traceback (most recent call last):
900
- ...
901
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:27.
902
-
903
- >>> parse_table_structure('c Int32 Materialized b*2\\n')
904
- Traceback (most recent call last):
905
- ...
906
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:9.
907
-
908
- >>> parse_table_structure('c Int32 Materialized b != 1 ? b*2: pow(b, 3)\\n')
909
- Traceback (most recent call last):
910
- ...
911
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:9.
912
-
913
- >>> parse_table_structure('')
914
- []
915
-
916
- >>> parse_table_structure('`date` Date,`timezone` String,`offset` Int32')
917
- [{'name': 'date', 'type': 'Date', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'date'}, {'name': 'timezone', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'timezone'}, {'name': 'offset', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'offset'}]
918
-
919
- >>> parse_table_structure('c Int32 Materialized b*2 CODEC(Delta, LZ4)\\n')
920
- Traceback (most recent call last):
921
- ...
922
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:9.
923
-
924
- >>> parse_table_structure('c Int32 Materialized ifNull(b*2, 0) CODEC(Delta, LZ4)\\n')
925
- Traceback (most recent call last):
926
- ...
927
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:9.
928
-
929
- >>> parse_table_structure('`temperature_delta` Float32 MATERIALIZED temperature CODEC(Delta(4), LZ4)')
930
- Traceback (most recent call last):
931
- ...
932
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:29.
933
-
934
- >>> parse_table_structure('foo^bar Float32')
935
- Traceback (most recent call last):
936
- ...
937
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name contains invalid character '^' at 1:4. Tip: use backticks.
938
-
939
- >>> parse_table_structure('foo Float#32')
940
- Traceback (most recent call last):
941
- ...
942
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Invalid character '#' at 1:10.
943
-
944
- >>> parse_table_structure('foo Float32 DEFAULT 13, bar UInt64')
945
- [{'name': 'foo', 'type': 'Float32', 'codec': None, 'default_value': 'DEFAULT 13', 'jsonpath': None, 'nullable': False, 'normalized_name': 'foo'}, {'name': 'bar', 'type': 'UInt64', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'bar'}]
946
-
947
- >>> parse_table_structure('foo Float32 DEFAULT 1$$$3')
948
- Traceback (most recent call last):
949
- ...
950
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Invalid character '$' at 1:22.
951
-
952
- >>> parse_table_structure('foo Float32 CODEC(Delta(4), LZ#4)')
953
- Traceback (most recent call last):
954
- ...
955
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Invalid character '#' at 1:31.
956
-
957
- >>> parse_table_structure('\\n `temperature` Float32,\\n `temperature_delta` Float32 MATERIALIZED temperature CODEC(Delta(4), LZ4)\\n ')
958
- Traceback (most recent call last):
959
- ...
960
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 3:33.
961
-
962
- >>> parse_table_structure('temperature Float32, temperature_delta Float32 MATERIALIZED temperature Codec(Delta(4)), temperature_doubledelta Float32 MATERIALIZED temperature Codec(DoubleDelta), temperature_doubledelta_lz4 Float32 MATERIALIZED temperature Codec(DoubleDelta, LZ4)')
963
- Traceback (most recent call last):
964
- ...
965
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:48.
966
-
967
- >>> parse_table_structure('t UInt8 CODEC(Delta(1), LZ4)')
968
- [{'name': 't', 'type': 'UInt8', 'codec': 'CODEC(Delta(1), LZ4)', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 't'}]
969
-
970
- >>> parse_table_structure('tt UInt8 MATERIALIZED t')
971
- Traceback (most recent call last):
972
- ...
973
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
974
-
975
- >>> parse_table_structure('tt UInt8 MATERIALIZED t CODEC(Delta(1), LZ4)')
976
- Traceback (most recent call last):
977
- ...
978
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
979
-
980
- >>> parse_table_structure('tt SimpleAggregateFunction(any, Nullable(UInt8))')
981
- [{'name': 'tt', 'type': 'SimpleAggregateFunction(any, Nullable(UInt8))', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'tt'}]
982
-
983
- >>> parse_table_structure("timestamp DateTime MATERIALIZED toDateTime(JSONExtractInt(JSONExtractRaw(record, 'payload'), 'timestamp') / 1000)")
984
- Traceback (most recent call last):
985
- ...
986
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:20.
987
-
988
- >>> parse_table_structure("`test_default_cast` DEFAULT plus(13,1)")
989
- [{'name': 'test_default_cast', 'type': '', 'codec': None, 'default_value': 'DEFAULT plus(13,1)', 'jsonpath': None, 'nullable': False, 'normalized_name': 'test_default_cast'}]
990
-
991
- >>> parse_table_structure("hola Int, `materialized` String MATERIALIZED upper(no_nullable_string)")
992
- Traceback (most recent call last):
993
- ...
994
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:33.
995
-
996
- >>> parse_table_structure('`a2` String `json:$.a2`, `a3` String `json:$.a3`\\n')
997
- [{'name': 'a2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
998
-
999
- >>> parse_table_structure("`arr` Array(String) DEFAULT ['-']")
1000
- [{'name': 'arr', 'type': 'Array(String)', 'codec': None, 'default_value': "DEFAULT ['-']", 'jsonpath': None, 'nullable': False, 'normalized_name': 'arr'}]
1001
-
1002
- >>> parse_table_structure("`arr` Array(String) DEFAULT array('-')")
1003
- [{'name': 'arr', 'type': 'Array(String)', 'codec': None, 'default_value': "DEFAULT array('-')", 'jsonpath': None, 'nullable': False, 'normalized_name': 'arr'}]
1004
-
1005
- >>> parse_table_structure('`a2` Float32 CODEC(Delta, ZSTD(4)) `json:$.a2`, `a3` String `json:$.a3`\\n')
1006
- [{'name': 'a2', 'type': 'Float32', 'codec': 'CODEC(Delta, ZSTD(4))', 'default_value': None, 'jsonpath': '$.a2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
1007
-
1008
- >>> parse_table_structure('`a` String, INDEX index_name a TYPE set(100) GRANULARITY 100')
1009
- Traceback (most recent call last):
1010
- ...
1011
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 1:13. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
1012
-
1013
- >>> parse_table_structure(' `a` String,\\n INDEX index_name a TYPE set(100, 1) GRANULARITY 100')
1014
- Traceback (most recent call last):
1015
- ...
1016
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 2:5. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
1017
-
1018
- >>> parse_table_structure('`index` String, INDEX index_name a TYPE set(100, 1) GRANULARITY 100')
1019
- Traceback (most recent call last):
1020
- ...
1021
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Forbidden INDEX definition at 1:17. Indexes are not allowed in SCHEMA section. Use the INDEXES section instead.
1022
-
1023
- >>> parse_table_structure('`a2` String `json:$.a--2`, `a3` String `json:$.a3`\\n')
1024
- [{'name': 'a2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a--2', 'nullable': False, 'normalized_name': 'a2'}, {'name': 'a3', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': '$.a3', 'nullable': False, 'normalized_name': 'a3'}]
1025
-
1026
- >>> parse_table_structure('a InvalidType')
1027
- Traceback (most recent call last):
1028
- ...
1029
- tinybird.tb.modules.datafile.common.SchemaSyntaxError: Unknown data type family: InvalidType at 1:3.
1030
-
1031
- >>> parse_table_structure('a Int32 DEFAULT 'a') # doctest: +SKIP
1032
- # should fail as the type and default expr are incompatible
887
+ """Parse a table schema definition into a structured format.
888
+ Columns follow the syntax: name [type] [DEFAULT expr] [CODEC codec] [JSONPATH `json:jsonpath`] [,]
889
+
890
+ Args:
891
+ schema: The schema definition string
892
+
893
+ Returns:
894
+ List of dictionaries containing column definitions
895
+
896
+ Examples:
897
+ >>> parse_table_structure('') # Empty schema
898
+ []
899
+
900
+ >>> parse_table_structure('col Int32') # Basic column
901
+ [{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
902
+
903
+ >>> parse_table_structure('col1 Int32, col2 String') # Multiple columns
904
+ [{'name': 'col1', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col1'}, {'name': 'col2', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col2'}]
905
+
906
+ >>> parse_table_structure('col Int32 DEFAULT 0') # With DEFAULT
907
+ [{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': 'DEFAULT 0', 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
908
+
909
+ >>> parse_table_structure('col DEFAULT 42') # Column without type but with default
910
+ [{'name': 'col', 'type': '', 'codec': None, 'default_value': 'DEFAULT 42', 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
911
+
912
+ >>> parse_table_structure('col String CODEC(ZSTD)') # With CODEC
913
+ [{'name': 'col', 'type': 'String', 'codec': 'CODEC(ZSTD)', 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
914
+
915
+ >>> parse_table_structure('`column.name!@#$%` String') # Quoted identifier
916
+ [{'name': 'column.name!@#$%', 'type': 'String', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'column.name!@#$%'}]
917
+
918
+ >>> parse_table_structure('col Nullable(Int32)') # Nullable type
919
+ [{'name': 'col', 'type': 'Int32', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': True, 'normalized_name': 'col'}]
920
+
921
+ >>> parse_table_structure('col Array(Int32)') # Complex type
922
+ [{'name': 'col', 'type': 'Array(Int32)', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
923
+
924
+ >>> parse_table_structure('col SimpleAggregateFunction(any, Int32)') # Aggregate function
925
+ [{'name': 'col', 'type': 'SimpleAggregateFunction(any, Int32)', 'codec': None, 'default_value': None, 'jsonpath': None, 'nullable': False, 'normalized_name': 'col'}]
926
+
927
+ Error cases:
928
+ >>> parse_table_structure('col') # Missing type
929
+ Traceback (most recent call last):
930
+ ...
931
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name and either type or DEFAULT are required at 1:1.
932
+
933
+ >>> parse_table_structure('`col Int32') # Unclosed backtick
934
+ Traceback (most recent call last):
935
+ ...
936
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Expected closing backtick at 1:5.
937
+
938
+ >>> parse_table_structure('col Int32 DEFAULT') # Missing DEFAULT value
939
+ Traceback (most recent call last):
940
+ ...
941
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for DEFAULT at 1:18.
942
+
943
+ >>> parse_table_structure('col Int32 CODEC') # Missing CODEC parameters
944
+ Traceback (most recent call last):
945
+ ...
946
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for CODEC at 1:16.
947
+
948
+ >>> parse_table_structure('col#name Int32') # Invalid character in name
949
+ Traceback (most recent call last):
950
+ ...
951
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Column name contains invalid character '#' at 1:4.
952
+ Hint: use backticks.
953
+
954
+ >>> parse_table_structure('col Int32 MATERIALIZED expr') # Unsupported MATERIALIZED
955
+ Traceback (most recent call last):
956
+ ...
957
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 1:11.
958
+
959
+ >>> parse_table_structure('col Int32 TTL timestamp + INTERVAL 1 DAY') # Unsupported TTL
960
+ Traceback (most recent call last):
961
+ ...
962
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: column TTL is not supported at 1:11.
963
+
964
+ >>> parse_table_structure('col Int32 NULL') # Unsupported NULL
965
+ Traceback (most recent call last):
966
+ ...
967
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: NULL column syntax not supported at 1:11.
968
+ Hint: use Nullable(...).
969
+
970
+ >>> parse_table_structure('col Int32 NOT NULL') # Unsupported NOT NULL
971
+ Traceback (most recent call last):
972
+ ...
973
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: NOT NULL column syntax not supported at 1:11.
974
+ Hint: Columns are not nullable by default.
975
+
976
+ >>> parse_table_structure('''
977
+ ... col Array(Int32)
978
+ ... CODEC(
979
+ ... ZSTD''') # Unclosed CODEC parenthesis across lines
980
+ Traceback (most recent call last):
981
+ ...
982
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Expected closing ) at 4:17.
983
+
984
+ >>> parse_table_structure('''
985
+ ... timestamp DateTime
986
+ ... DEFAULT
987
+ ... CODEC(ZSTD)''') # Missing DEFAULT value with following CODEC
988
+ Traceback (most recent call last):
989
+ ...
990
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: Missing mandatory value for DEFAULT at 3:16.
991
+
992
+ >>> parse_table_structure('''
993
+ ... col String
994
+ ... DEFAULT 'test'
995
+ ... MATERIALIZED
996
+ ... now()''') # MATERIALIZED with heavy indentation
997
+ Traceback (most recent call last):
998
+ ...
999
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: MATERIALIZED columns are not supported at 4:13.
1000
+
1001
+ >>> parse_table_structure('''
1002
+ ... `column.with.dots`
1003
+ ... Int32
1004
+ ... TTL
1005
+ ... timestamp + INTERVAL 1 DAY''') # TTL with increasing indentation
1006
+ Traceback (most recent call last):
1007
+ ...
1008
+ tinybird.tb.modules.datafile.common.SchemaSyntaxError: column TTL is not supported at 4:18.
1033
1009
  """
1034
1010
  return _parse_table_structure(schema)
1035
1011