tinybird-cli 5.22.3.dev0__py3-none-any.whl → 6.0.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tinybird/sql_template.py CHANGED
@@ -6,9 +6,8 @@ import re
6
6
  from collections import deque
7
7
  from datetime import datetime
8
8
  from functools import lru_cache
9
- from io import StringIO
10
9
  from json import loads
11
- from typing import Any, Dict, List, Optional, Tuple, Union
10
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
12
11
 
13
12
  from tornado import escape
14
13
  from tornado.util import ObjectDict, exec_in, unicode_type
@@ -26,6 +25,11 @@ TB_SECRET_PREFIX = "tb_secret_"
26
25
  CH_PARAM_PREFIX = "param_"
27
26
  REQUIRED_PARAM_NOT_DEFINED = "Required parameter is not defined"
28
27
 
28
+ # Pre-compiled regex patterns for performance
29
+ _STRING_LINE_NUMBER_RE = re.compile(r"\<string\>:(\d*)")
30
+ _ARRAY_TYPE_RE = re.compile(r"Array\((\w+)\)")
31
+ _EMBEDDED_TEMPLATE_EXPRESSION_RE = re.compile(r"\{\{(.*?)\}\}")
32
+
29
33
 
30
34
  def secret_template_key(secret_name: str) -> str:
31
35
  return f"{TB_SECRET_PREFIX}{secret_name}"
@@ -372,9 +376,7 @@ def boolean(x, default=None):
372
376
 
373
377
 
374
378
  def defined(x=None):
375
- if isinstance(x, Placeholder) or x is None:
376
- return False
377
- return True
379
+ return not (isinstance(x, Placeholder) or x is None)
378
380
 
379
381
 
380
382
  def array_type(types):
@@ -1512,8 +1514,8 @@ def generate(self, **kwargs) -> Tuple[str, TemplateExecutionResults]:
1512
1514
  text = getattr(e, "text", message)
1513
1515
  line = None
1514
1516
  try:
1515
- line = re.findall(r"\<string\>:(\d*)", text)
1516
- message = re.sub(r"\<string\>:(\d*)", "", message)
1517
+ line = _STRING_LINE_NUMBER_RE.findall(text)
1518
+ message = _STRING_LINE_NUMBER_RE.sub("", message)
1517
1519
  except TypeError:
1518
1520
  pass
1519
1521
 
@@ -1568,28 +1570,404 @@ class CodeWriter:
1568
1570
  print(" " * indent + line + line_comment, file=self.file)
1569
1571
 
1570
1572
 
1571
- def get_var_names(t):
1572
- try:
1573
+ def get_var_names(t: Template):
1574
+ """
1575
+ Extract variable names from a template.
1576
+
1577
+ === BASIC EXPRESSIONS ===
1578
+
1579
+ Simple variable reference:
1580
+ >>> get_var_names(Template("SELECT * FROM test WHERE id = {{my_var}}"))
1581
+ [{'line': 1, 'name': 'my_var'}]
1582
+
1583
+ Multiple variables:
1584
+ >>> get_var_names(Template("SELECT {{a}}, {{b}} FROM test"))
1585
+ [{'line': 1, 'name': 'a'}, {'line': 1, 'name': 'b'}]
1586
+
1587
+ No variables (static SQL):
1588
+ >>> get_var_names(Template("SELECT * FROM test"))
1589
+ []
1590
+
1591
+ === TYPE CASTING FUNCTIONS ===
1592
+
1593
+ Integer types:
1594
+ >>> [v['name'] for v in get_var_names(Template("{{Int8(a, 0)}} {{Int16(b, 0)}} {{Int32(c, 0)}} {{Int64(d, 0)}}"))]
1595
+ ['Int8', 'a', 'Int16', 'b', 'Int32', 'c', 'Int64', 'd']
1596
+
1597
+ Unsigned integer types:
1598
+ >>> [v['name'] for v in get_var_names(Template("{{UInt8(a, 0)}} {{UInt32(b, 0)}} {{UInt64(c, 0)}}"))]
1599
+ ['UInt8', 'a', 'UInt32', 'b', 'UInt64', 'c']
1600
+
1601
+ Float types:
1602
+ >>> [v['name'] for v in get_var_names(Template("{{Float32(price, 0.0)}} {{Float64(amount, 0.0)}}"))]
1603
+ ['Float32', 'price', 'Float64', 'amount']
1604
+
1605
+ String type:
1606
+ >>> [v['name'] for v in get_var_names(Template("{{String(name, 'default')}}"))]
1607
+ ['String', 'name']
1608
+
1609
+ Boolean type (False is in reserved_vars):
1610
+ >>> [v['name'] for v in get_var_names(Template("{{Boolean(flag, False)}}"))]
1611
+ ['Boolean', 'flag']
1612
+
1613
+ Date/DateTime types:
1614
+ >>> [v['name'] for v in get_var_names(Template("{{Date(d)}} {{DateTime(dt)}} {{DateTime64(dt64)}}"))]
1615
+ ['Date', 'd', 'DateTime', 'dt', 'DateTime64', 'dt64']
1616
+
1617
+ Array type:
1618
+ >>> [v['name'] for v in get_var_names(Template("{{Array(ids, 'Int32')}}"))]
1619
+ ['Array', 'ids']
1620
+
1621
+ JSON type:
1622
+ >>> [v['name'] for v in get_var_names(Template("{{JSON(data, '{}')}}"))]
1623
+ ['JSON', 'data']
1624
+
1625
+ === SQL SAFETY FUNCTIONS ===
1626
+
1627
+ Column function (column is in _namespace, filtered out):
1628
+ >>> [v['name'] for v in get_var_names(Template("SELECT {{column(col_name, 'id')}} FROM t"))]
1629
+ ['col_name']
1630
+
1631
+ Columns function:
1632
+ >>> [v['name'] for v in get_var_names(Template("SELECT {{columns(col_list, 'a,b')}} FROM t"))]
1633
+ ['columns', 'col_list']
1634
+
1635
+ Symbol function (symbol is in _namespace, filtered out):
1636
+ >>> [v['name'] for v in get_var_names(Template("SELECT * FROM {{symbol(table_name)}}"))]
1637
+ ['table_name']
1638
+
1639
+ Table function:
1640
+ >>> [v['name'] for v in get_var_names(Template("SELECT * FROM {{table(tbl)}}"))]
1641
+ ['table', 'tbl']
1642
+
1643
+ === CONTROL FLOW - IF/ELIF/ELSE ===
1644
+
1645
+ Simple if:
1646
+ >>> get_var_names(Template("{% if condition %}{{value}}{% end %}"))
1647
+ [{'line': 1, 'name': 'condition'}, {'line': 1, 'name': 'value'}]
1648
+
1649
+ If with defined():
1650
+ >>> [v['name'] for v in get_var_names(Template("{% if defined(flag) %}WHERE x = 1{% end %}"))]
1651
+ ['defined', 'flag']
1652
+
1653
+ If/else:
1654
+ >>> [v['name'] for v in get_var_names(Template("{% if cond %}{{a}}{% else %}{{b}}{% end %}"))]
1655
+ ['cond', 'a', 'b']
1656
+
1657
+ If/elif/else chain:
1658
+ >>> [v['name'] for v in get_var_names(Template("{% if a %}1{% elif b %}2{% elif c %}3{% else %}4{% end %}"))]
1659
+ ['a', 'b', 'c']
1660
+
1661
+ Nested if:
1662
+ >>> [v['name'] for v in get_var_names(Template("{% if outer %}{% if inner %}{{val}}{% end %}{% end %}"))]
1663
+ ['outer', 'inner', 'val']
1664
+
1665
+ If with complex condition (co_names deduplicates names):
1666
+ >>> [v['name'] for v in get_var_names(Template("{% if defined(a) and defined(b) %}{{c}}{% end %}"))]
1667
+ ['defined', 'a', 'b', 'c']
1668
+
1669
+ If with or condition:
1670
+ >>> [v['name'] for v in get_var_names(Template("{% if x or y %}{{z}}{% end %}"))]
1671
+ ['x', 'y', 'z']
1672
+
1673
+ If with comparison:
1674
+ >>> [v['name'] for v in get_var_names(Template("{% if count > 0 %}{{result}}{% end %}"))]
1675
+ ['count', 'result']
1676
+
1677
+ === CONTROL FLOW - FOR LOOPS ===
1678
+
1679
+ Simple for loop:
1680
+ >>> [v['name'] for v in get_var_names(Template("{% for item in items %}{{item}}{% end %}"))]
1681
+ ['items', 'item', 'item']
1682
+
1683
+ For with index (enumerate is in reserved_vars):
1684
+ >>> [v['name'] for v in get_var_names(Template("{% for i, item in enumerate(items) %}{{i}}:{{item}}{% end %}"))]
1685
+ ['items', 'i', 'item', 'i', 'item']
1686
+
1687
+ Nested for loops:
1688
+ >>> [v['name'] for v in get_var_names(Template("{% for row in rows %}{% for col in cols %}{{row}}.{{col}}{% end %}{% end %}"))]
1689
+ ['rows', 'row', 'cols', 'col', 'row', 'col']
1690
+
1691
+ For with split_to_array:
1692
+ >>> [v['name'] for v in get_var_names(Template("{% for x in split_to_array(csv_data) %}{{x}}{% end %}"))]
1693
+ ['split_to_array', 'csv_data', 'x', 'x']
1694
+
1695
+ For with enumerate_with_last:
1696
+ >>> [v['name'] for v in get_var_names(Template("{% for is_last, item in enumerate_with_last(items) %}{{item}}{% end %}"))]
1697
+ ['enumerate_with_last', 'items', 'is_last', 'item', 'item']
1698
+
1699
+ === CONTROL FLOW - WHILE ===
1700
+
1701
+ Simple while:
1702
+ >>> [v['name'] for v in get_var_names(Template("{% while running %}{{counter}}{% end %}"))]
1703
+ ['running', 'counter']
1704
+
1705
+ While with condition:
1706
+ >>> [v['name'] for v in get_var_names(Template("{% while count < max_count %}{{count}}{% end %}"))]
1707
+ ['count', 'max_count', 'count']
1708
+
1709
+ === CONTROL FLOW - BREAK/CONTINUE ===
1710
+
1711
+ Break inside for loop:
1712
+ >>> [v['name'] for v in get_var_names(Template("{% for x in items %}{% if x > limit %}{% break %}{% end %}{{x}}{% end %}"))]
1713
+ ['items', 'x', 'x', 'limit', 'x']
1714
+
1715
+ Continue inside for loop:
1716
+ >>> [v['name'] for v in get_var_names(Template("{% for x in items %}{% if x < 0 %}{% continue %}{% end %}{{x}}{% end %}"))]
1717
+ ['items', 'x', 'x', 'x']
1718
+
1719
+ Break inside while loop:
1720
+ >>> [v['name'] for v in get_var_names(Template("{% while running %}{% if done %}{% break %}{% end %}{{counter}}{% end %}"))]
1721
+ ['running', 'done', 'counter']
1722
+
1723
+ === CONTROL FLOW - TRY/EXCEPT/FINALLY ===
1724
+
1725
+ Simple try/except:
1726
+ >>> [v['name'] for v in get_var_names(Template("{% try %}{{risky}}{% except %}{{fallback}}{% end %}"))]
1727
+ ['risky', 'fallback']
1728
+
1729
+ Try/except/finally:
1730
+ >>> [v['name'] for v in get_var_names(Template("{% try %}{{a}}{% except %}{{b}}{% finally %}{{c}}{% end %}"))]
1731
+ ['a', 'b', 'c']
1732
+
1733
+ Except with type:
1734
+ >>> [v['name'] for v in get_var_names(Template("{% try %}{{a}}{% except MyError as e %}{{e}}{% end %}"))]
1735
+ ['a', 'MyError', 'e', 'e']
1736
+
1737
+ === SET STATEMENTS ===
1738
+
1739
+ Simple set:
1740
+ >>> [v['name'] for v in get_var_names(Template("{% set x = myvar + 1 %}{{x}}"))]
1741
+ ['myvar', 'x', 'x']
1573
1742
 
1743
+ Set with expression:
1744
+ >>> [v['name'] for v in get_var_names(Template("{% set total = a + b * c %}{{total}}"))]
1745
+ ['a', 'b', 'c', 'total', 'total']
1746
+
1747
+ Set with function call:
1748
+ >>> [v['name'] for v in get_var_names(Template("{% set items = list_data %}{{items}}"))]
1749
+ ['list_data', 'items', 'items']
1750
+
1751
+ Set with template expression (skipped - contains {{}}):
1752
+ >>> [v['name'] for v in get_var_names(Template("{% set x = {{String(y)}} %}{{x}}"))]
1753
+ ['x']
1754
+
1755
+ === UTILITY FUNCTIONS ===
1756
+
1757
+ Defined function:
1758
+ >>> [v['name'] for v in get_var_names(Template("{% if defined(param) %}yes{% end %}"))]
1759
+ ['defined', 'param']
1760
+
1761
+ Error function (error is in _namespace, filtered out):
1762
+ >>> [v['name'] for v in get_var_names(Template("{% if not valid %}{{error('Invalid input')}}{% end %}"))]
1763
+ ['valid']
1764
+
1765
+ Custom error (custom_error is in _namespace, filtered out):
1766
+ >>> get_var_names(Template("{{custom_error('Not found', 404)}}"))
1767
+ []
1768
+
1769
+ === DATE FUNCTIONS ===
1770
+
1771
+ Day diff:
1772
+ >>> [v['name'] for v in get_var_names(Template("{{day_diff(start_date, end_date)}}"))]
1773
+ ['day_diff', 'start_date', 'end_date']
1774
+
1775
+ Date diff in days:
1776
+ >>> [v['name'] for v in get_var_names(Template("{{date_diff_in_days(d1, d2)}}"))]
1777
+ ['date_diff_in_days', 'd1', 'd2']
1778
+
1779
+ Date diff in hours:
1780
+ >>> [v['name'] for v in get_var_names(Template("{{date_diff_in_hours(t1, t2)}}"))]
1781
+ ['date_diff_in_hours', 't1', 't2']
1782
+
1783
+ === RUNTIME CONFIGURATION ===
1784
+
1785
+ Max threads:
1786
+ >>> [v['name'] for v in get_var_names(Template("{{max_threads(num_threads)}}"))]
1787
+ ['max_threads', 'num_threads']
1788
+
1789
+ TB secret:
1790
+ >>> [v['name'] for v in get_var_names(Template("{{tb_secret(secret_name)}}"))]
1791
+ ['tb_secret', 'secret_name']
1792
+
1793
+ Cache TTL:
1794
+ >>> [v['name'] for v in get_var_names(Template("{{cache_ttl(ttl_value)}}"))]
1795
+ ['cache_ttl', 'ttl_value']
1796
+
1797
+ === COMPLEX NESTED COMBINATIONS ===
1798
+
1799
+ If inside for:
1800
+ >>> [v['name'] for v in get_var_names(Template("{% for item in items %}{% if defined(item) %}{{item}}{% end %}{% end %}"))]
1801
+ ['items', 'item', 'defined', 'item', 'item']
1802
+
1803
+ For inside if:
1804
+ >>> [v['name'] for v in get_var_names(Template("{% if show_list %}{% for x in data %}{{x}}{% end %}{% end %}"))]
1805
+ ['show_list', 'data', 'x', 'x']
1806
+
1807
+ Set + if + for (max, range are in reserved_vars):
1808
+ >>> [v['name'] for v in get_var_names(Template("{% set limit = max_val %}{% if limit > 0 %}{% for i in my_range(limit) %}{{i}}{% end %}{% end %}"))]
1809
+ ['max_val', 'limit', 'limit', 'my_range', 'limit', 'i', 'i']
1810
+
1811
+ Multiple control blocks:
1812
+ >>> t = Template("{% set limit = max_rows %}{% if flag %}{% for c in cols %}{{c}}{% end %}{% elif other %}{{x}}{% else %}{{y}}{% end %}{% while more %}{{b}}{% end %}{% try %}{{q}}{% except E as e %}{{e}}{% finally %}{{done}}{% end %}")
1813
+ >>> [v['name'] for v in get_var_names(t)]
1814
+ ['max_rows', 'limit', 'flag', 'cols', 'c', 'c', 'other', 'x', 'y', 'more', 'b', 'q', 'E', 'e', 'e', 'done']
1815
+
1816
+ Type casting inside control block:
1817
+ >>> [v['name'] for v in get_var_names(Template("{% if defined(id) %}WHERE id = {{Int32(id, 0)}}{% end %}"))]
1818
+ ['defined', 'id', 'Int32', 'id']
1819
+
1820
+ Column + Array combination (column is in _namespace):
1821
+ >>> [v['name'] for v in get_var_names(Template("SELECT {{column(col)}} FROM t WHERE id IN {{Array(ids, 'Int32')}}"))]
1822
+ ['col', 'Array', 'ids']
1823
+
1824
+ Deeply nested structure:
1825
+ >>> t = Template("{% if a %}{% for x in items %}{% if defined(x) %}{% try %}{{Int32(x, 0)}}{% except %}{{default}}{% end %}{% end %}{% end %}{% end %}")
1826
+ >>> [v['name'] for v in get_var_names(t)]
1827
+ ['a', 'items', 'x', 'defined', 'x', 'Int32', 'x', 'default']
1828
+
1829
+ === FUNCTIONS WITH CONTROL FLOW ===
1830
+
1831
+ Date function inside if:
1832
+ >>> [v['name'] for v in get_var_names(Template("{% if defined(start) %}{{date_diff_in_days(start, end)}}{% end %}"))]
1833
+ ['defined', 'start', 'date_diff_in_days', 'start', 'end']
1834
+
1835
+ Date function inside for:
1836
+ >>> [v['name'] for v in get_var_names(Template("{% for d in dates %}{{day_diff(d, today)}}{% end %}"))]
1837
+ ['dates', 'd', 'day_diff', 'd', 'today']
1838
+
1839
+ Date function with elif:
1840
+ >>> t = Template("{% if mode == 'days' %}{{date_diff_in_days(t1, t2)}}{% elif mode == 'hours' %}{{date_diff_in_hours(t1, t2)}}{% end %}")
1841
+ >>> [v['name'] for v in get_var_names(t)]
1842
+ ['mode', 'date_diff_in_days', 't1', 't2', 'mode', 'date_diff_in_hours', 't1', 't2']
1843
+
1844
+ Set with date function:
1845
+ >>> [v['name'] for v in get_var_names(Template("{% set diff = day_diff(start_date, end_date) %}{{diff}}"))]
1846
+ ['day_diff', 'start_date', 'end_date', 'diff', 'diff']
1847
+
1848
+ Max threads inside if:
1849
+ >>> [v['name'] for v in get_var_names(Template("{% if parallel %}{{max_threads(thread_count)}}{% end %}"))]
1850
+ ['parallel', 'max_threads', 'thread_count']
1851
+
1852
+ Cache TTL with condition:
1853
+ >>> [v['name'] for v in get_var_names(Template("{% if use_cache %}{{cache_ttl(ttl)}}{% else %}{{cache_ttl(0)}}{% end %}"))]
1854
+ ['use_cache', 'cache_ttl', 'ttl', 'cache_ttl']
1855
+
1856
+ TB secret inside for:
1857
+ >>> [v['name'] for v in get_var_names(Template("{% for name in secret_names %}{{tb_secret(name)}}{% end %}"))]
1858
+ ['secret_names', 'name', 'tb_secret', 'name']
1859
+
1860
+ Type casting with date function in elif chain (type is in reserved_vars):
1861
+ >>> t = Template("{% if kind == 'int' %}{{Int32(val, 0)}}{% elif kind == 'date' %}{{DateTime(val)}}{% elif kind == 'diff' %}{{date_diff_in_days(val, now)}}{% end %}")
1862
+ >>> [v['name'] for v in get_var_names(t)]
1863
+ ['kind', 'Int32', 'val', 'kind', 'DateTime', 'val', 'kind', 'date_diff_in_days', 'val', 'now']
1864
+
1865
+ While with date function:
1866
+ >>> [v['name'] for v in get_var_names(Template("{% while day_diff(current, target) > 0 %}{{current}}{% end %}"))]
1867
+ ['day_diff', 'current', 'target', 'current']
1868
+
1869
+ Try/except with date function:
1870
+ >>> [v['name'] for v in get_var_names(Template("{% try %}{{date_diff_in_hours(t1, t2)}}{% except %}{{default_hours}}{% end %}"))]
1871
+ ['date_diff_in_hours', 't1', 't2', 'default_hours']
1872
+
1873
+ Complex: set + for + if + multiple functions:
1874
+ >>> t = Template("{% set threshold = Int32(max_days, 30) %}{% for d in dates %}{% if day_diff(d, now) < threshold %}{{String(d)}}{% end %}{% end %}")
1875
+ >>> [v['name'] for v in get_var_names(t)]
1876
+ ['Int32', 'max_days', 'threshold', 'dates', 'd', 'day_diff', 'd', 'now', 'threshold', 'String', 'd']
1877
+
1878
+ Nested control blocks with config functions:
1879
+ >>> t = Template("{% if enabled %}{% set threads = max_threads(n) %}{% for i in tasks %}{{cache_ttl(ttl)}}{% end %}{% end %}")
1880
+ >>> [v['name'] for v in get_var_names(t)]
1881
+ ['enabled', 'max_threads', 'n', 'threads', 'tasks', 'i', 'cache_ttl', 'ttl']
1882
+
1883
+ === MULTILINE TEMPLATES ===
1884
+
1885
+ Multiline with mixed constructs (column is in _namespace, filter is in reserved_vars):
1886
+ >>> t = Template('''
1887
+ ... SELECT
1888
+ ... {{column(col1)}},
1889
+ ... {{column(col2)}}
1890
+ ... FROM {{table(tbl)}}
1891
+ ... {% if defined(flag) %}
1892
+ ... WHERE {{column(filter_col)}} = {{String(value)}}
1893
+ ... {% end %}
1894
+ ... ''')
1895
+ >>> sorted(set(v['name'] for v in get_var_names(t)))
1896
+ ['String', 'col1', 'col2', 'defined', 'filter_col', 'flag', 'table', 'tbl', 'value']
1897
+ """
1898
+ try:
1899
+ # Recursive helper that traverses the template's parsed chunks and collects variable names.
1900
+ # The template is parsed into a tree of chunks: _ChunkList (container), _Expression ({{...}}),
1901
+ # and _ControlBlock ({% if %}, {% for %}, etc.).
1902
+ #
1903
+ # We use compile() to extract variable names because Python's compiler automatically
1904
+ # collects all referenced names into the code object's co_names attribute. This avoids
1905
+ # manually parsing Python expressions with the ast module. For example:
1906
+ # compile("Int32(num_val, 0)", ...).co_names → ('Int32', 'num_val')
1574
1907
  def _n(chunks, v):
1575
1908
  for x in chunks:
1576
1909
  line_number = x.line
1910
+
1577
1911
  if type(x).__name__ == "_ChunkList":
1912
+ # Container node: recurse into its children
1578
1913
  _n(x.chunks, v)
1914
+
1579
1915
  elif type(x).__name__ == "_Expression":
1916
+ # Simple template expression like {{my_var}} or {{Int32(num_val, 0)}}
1917
+ # Compile the expression to bytecode and extract all referenced names
1918
+ # from co_names (the tuple of names used by the bytecode)
1580
1919
  c = compile(x.expression, "<string>", "exec", dont_inherit=True)
1920
+ # Filter out internal namespace functions and reserved variable names
1581
1921
  variable_names = [x for x in c.co_names if x not in _namespace and x not in reserved_vars]
1582
1922
  v += list(map(lambda variable: {"line": line_number, "name": variable}, variable_names))
1923
+
1583
1924
  elif type(x).__name__ == "_ControlBlock":
1584
- buffer = StringIO()
1585
- writer = CodeWriter(buffer, t)
1586
- x.generate(writer)
1587
- c = compile(buffer.getvalue(), "<string>", "exec", dont_inherit=True)
1925
+ # Control structure like {% if cond %}, {% for item in items %}, {% try %}, etc.
1926
+ # Compile only the statement (condition/iterator), not the full generated code.
1927
+ # This avoids compiling large SQL literals in the body, which is expensive
1928
+ # and unnecessary since we only need variable names from the condition.
1929
+ # Note: "try" has no condition/expression, so skip compilation for it.
1930
+ if x.statement != "try":
1931
+ statement_code = x.statement + ": pass"
1932
+ c = compile(statement_code, "<string>", "exec", dont_inherit=True)
1933
+ variable_names = [x for x in c.co_names if x not in _namespace and x not in reserved_vars]
1934
+ v += list(map(lambda variable: {"line": line_number, "name": variable}, variable_names))
1935
+
1936
+ # Recurse into the body of the control block to find nested expressions
1937
+ _n(x.body.chunks, v)
1938
+
1939
+ elif type(x).__name__ == "_IntermediateControlBlock":
1940
+ # Intermediate control structure like {% elif cond %}, {% else %}, {% except %}, etc.
1941
+ # These appear inside _ControlBlock bodies.
1942
+ # For "else"/"finally", there's no condition to extract.
1943
+ # For "elif cond", we need to extract variables from the condition.
1944
+ # For "except Type as e", we need to extract the exception type and alias.
1945
+ # Note: "elif"/"except" aren't valid Python on their own, so we wrap them.
1946
+ if x.statement.startswith("elif "):
1947
+ statement_code = "if False: pass\n" + x.statement + ": pass"
1948
+ c = compile(statement_code, "<string>", "exec", dont_inherit=True)
1949
+ variable_names = [x for x in c.co_names if x not in _namespace and x not in reserved_vars]
1950
+ v += list(map(lambda variable: {"line": line_number, "name": variable}, variable_names))
1951
+
1952
+ elif x.statement.startswith("except "):
1953
+ # "except Exception" or "except Exception as e"
1954
+ statement_code = "try: pass\n" + x.statement + ": pass"
1955
+ c = compile(statement_code, "<string>", "exec", dont_inherit=True)
1956
+ variable_names = [x for x in c.co_names if x not in _namespace and x not in reserved_vars]
1957
+ v += list(map(lambda variable: {"line": line_number, "name": variable}, variable_names))
1958
+
1959
+ elif (
1960
+ type(x).__name__ == "_Statement"
1961
+ and x.statement not in ("break", "continue") # No variables; fail compile() outside loop
1962
+ and "{{" not in x.statement # Skip template expressions which aren't valid Python
1963
+ ):
1964
+ # Statement like {% set x = expr %}
1965
+ c = compile(x.statement, "<string>", "exec", dont_inherit=True)
1588
1966
  variable_names = [x for x in c.co_names if x not in _namespace and x not in reserved_vars]
1589
1967
  v += list(map(lambda variable: {"line": line_number, "name": variable}, variable_names))
1590
- _n(x.body.chunks, v)
1591
1968
 
1592
- var = []
1969
+ var: list[dict[str, Any]] = []
1970
+ # Start traversal from the root of the parsed template
1593
1971
  _n(t.file.body.chunks, var)
1594
1972
  return var
1595
1973
  except SecurityException as e:
@@ -1659,6 +2037,11 @@ def get_var_data(content, node_id=None):
1659
2037
  return str(value)
1660
2038
  return value
1661
2039
 
2040
+ # TODO: Remove this retry logic. It was added in commit 1314a3b120 as a workaround for
2041
+ # a Python 3.11 bug (https://github.com/python/cpython/issues/106905) where AST recursion
2042
+ # depth tracking was broken. However, retrying doesn't actually help since the corrupted
2043
+ # state persists. The bug was fixed in Python 3.11.5+ and 3.12+, so this can be simplified
2044
+ # to just `ast.parse(content)` once we confirm all environments use patched Python versions.
1662
2045
  def parse_content(content, retries=0):
1663
2046
  try:
1664
2047
  parsed = ast.parse(content)
@@ -1776,12 +2159,32 @@ def get_var_names_and_types(t, node_id=None):
1776
2159
  [{'name': 'with_value', 'type': 'Float32', 'default': 0.1}]
1777
2160
  >>> get_var_names_and_types(Template("SELECT * FROM filter_value WHERE description = {{String(d, 'test_1')}} AND value = {{Int8(v, 3)}}"))
1778
2161
  [{'name': 'd', 'type': 'String', 'default': 'test_1'}, {'name': 'v', 'type': 'Int8', 'default': 3}]
2162
+ >>> get_var_names_and_types(Template("select * from test {% if defined(number_variable) %} where number_variable = {{UInt64(number_variable)}} {% end %}"))
2163
+ [{'name': 'number_variable', 'type': 'UInt64', 'default': None}]
1779
2164
  >>> get_var_names_and_types(Template("select * from test {% if defined({{UInt64(number_variable)}}) %} where 1 {% end %}"))
1780
2165
  [{'name': 'number_variable', 'type': 'UInt64', 'default': None}]
1781
2166
  >>> get_var_names_and_types(Template("select * from test {% if defined(testing) and defined(testing2) %} where 1 {%end %}"))
1782
2167
  [{'name': 'testing', 'type': 'String', 'default': None, 'used_in': 'function_call'}, {'name': 'testing2', 'type': 'String', 'default': None, 'used_in': 'function_call'}]
1783
2168
  >>> get_var_names_and_types(Template("select * from test {% if defined({{UInt64(number_variable)}}) %} where 1 {% end %}"))
1784
2169
  [{'name': 'number_variable', 'type': 'UInt64', 'default': None}]
2170
+ >>> get_var_names_and_types(Template("select * from test {% if defined({{UInt64(x)}}) and defined(y) %} where 1 {% end %}"))
2171
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'y', 'type': 'String', 'default': None, 'used_in': 'function_call'}]
2172
+ >>> get_var_names_and_types(Template("select * from test {% if defined(y) and x > 0 %} where x = {{UInt64(x)}} {% end %}"))
2173
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'y', 'type': 'String', 'default': None, 'used_in': 'function_call'}]
2174
+ >>> get_var_names_and_types(Template("select * from test {% if defined(y) and defined(z) and x > 0 %} {{UInt64(x)}} {% end %}"))
2175
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'y', 'type': 'String', 'default': None, 'used_in': 'function_call'}, {'name': 'z', 'type': 'String', 'default': None, 'used_in': 'function_call'}]
2176
+ >>> get_var_names_and_types(Template("{% if '{{' in marker %}{{UInt64(x)}}{% end %}"))
2177
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'marker', 'type': 'String', 'default': None}]
2178
+ >>> get_var_names_and_types(Template("{% if a %}1{% elif defined(y) %}{{UInt64(x)}}{% end %}"))
2179
+ [{'name': 'y', 'type': 'String', 'default': None, 'used_in': 'function_call'}, {'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'a', 'type': 'String', 'default': None}]
2180
+ >>> get_var_names_and_types(Template("{% for x in items %}{{Int32(x, 0)}}{% end %}"))
2181
+ [{'name': 'x', 'type': 'Int32', 'default': 0}, {'name': 'items', 'type': 'String', 'default': None}]
2182
+ >>> get_var_names_and_types(Template("{% while more %}{{UInt64(x)}}{% end %}"))
2183
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'more', 'type': 'String', 'default': None}]
2184
+ >>> get_var_names_and_types(Template("{% set a = Int32(x, 0) %}"))
2185
+ [{'name': 'a', 'type': 'String', 'default': None}, {'name': 'x', 'type': 'Int32', 'default': 0}]
2186
+ >>> get_var_names_and_types(Template("{% try %}{{UInt64(x)}}{% except E as e %}{{e}}{% end %}"))
2187
+ [{'name': 'x', 'type': 'UInt64', 'default': None}, {'name': 'E', 'type': 'String', 'default': None}, {'name': 'e', 'type': 'String', 'default': None}]
1785
2188
  >>> get_var_names_and_types(Template("select {{Array(cod_stock_source_type,'Int16', defined=False)}}"))
1786
2189
  [{'name': 'cod_stock_source_type', 'type': 'Array(Int16)', 'defined': False, 'default': None}]
1787
2190
  >>> get_var_names_and_types(Template("select {{Array(cod_stock_source_type, defined=False)}}"))
@@ -1811,7 +2214,7 @@ def get_var_names_and_types(t, node_id=None):
1811
2214
  >>> get_var_names_and_types(Template("SELECT * FROM filter_value WHERE description = {{Float32(with_value, -0.1)}} AND description = {{Float32(zero, 0)}} AND value = {{Float32(no_default)}}"))
1812
2215
  [{'name': 'with_value', 'type': 'Float32', 'default': -0.1}, {'name': 'zero', 'type': 'Float32', 'default': 0}, {'name': 'no_default', 'type': 'Float32', 'default': None}]
1813
2216
  >>> get_var_names_and_types(Template('''SELECT * FROM abcd WHERE hotel_id <> 0 {% if defined(date_from) %} AND script_created_at > {{DateTime(date_from, '2020-09-09 10:10:10', description="This is a description", required=True)(date_from, '2020-09-09', description="Filter script alert creation date", required=False)}} {% end %}'''))
1814
- [{'name': 'date_from', 'type': 'DateTime', 'description': 'This is a description', 'required': True, 'default': '2020-09-09 10:10:10'}, {'name': 'date_from', 'type': 'DateTime', 'description': 'This is a description', 'required': True, 'default': '2020-09-09 10:10:10'}]
2217
+ [{'name': 'date_from', 'type': 'DateTime', 'description': 'This is a description', 'required': True, 'default': '2020-09-09 10:10:10'}]
1815
2218
  >>> get_var_names_and_types(Template("SELECT * FROM filter_value WHERE symbol = {{Int128(symbol_id, 11111, description='Symbol Id', required=True)}} AND user = {{Int256(user_id, 3555, description='User Id')}}"))
1816
2219
  [{'name': 'symbol_id', 'type': 'Int128', 'description': 'Symbol Id', 'required': True, 'default': 11111}, {'name': 'user_id', 'type': 'Int256', 'description': 'User Id', 'default': 3555}]
1817
2220
  >>> get_var_names_and_types(Template("SELECT now() > {{DateTime64(timestamp, '2020-09-09 10:10:10.000')}}"))
@@ -1822,27 +2225,113 @@ def get_var_names_and_types(t, node_id=None):
1822
2225
  [{'name': 'symbol_id', 'type': 'Int64', 'default': '9223372036854775807'}]
1823
2226
  """
1824
2227
  try:
2228
+ # Recursive helper that traverses the template's parsed chunks and collects
2229
+ # variable data including types and defaults.
2230
+ #
2231
+ # Optimization: Instead of calling x.generate(writer) which generates full
2232
+ # Python code (expensive for large templates), we parse just the statement
2233
+ # and recurse into the body separately.
2234
+ #
2235
+ # Backward compatibility: The original implementation called get_var_data on
2236
+ # full generated code, where type functions (Array, Int32, etc.) would overwrite
2237
+ # variables from defined(). To maintain this behavior, we process body expressions
2238
+ # FIRST to get types, then only add variables from control statements if they
2239
+ # weren't already found in body expressions.
2240
+
2241
+ # Track variable names seen from EXPRESSIONS (not control statements)
2242
+ # Used to prevent control statements from adding variables with wrong types
2243
+ typed_names: set[str] = set()
2244
+
2245
+ statement_wraps: dict[str, Callable[[str], str]] = {
2246
+ "control": lambda statement: f"{statement}: pass",
2247
+ "elif": lambda statement: "if False: pass\n" + statement + ": pass",
2248
+ "except": lambda statement: "try: pass\n" + statement + ": pass",
2249
+ }
1825
2250
 
1826
- def _n(chunks, v):
1827
- for x in chunks:
1828
- if type(x).__name__ == "_ChunkList":
1829
- _n(x.chunks, v)
1830
- elif type(x).__name__ == "_Expression":
1831
- var_data = get_var_data(x.expression, node_id=node_id)
1832
- if var_data:
1833
- v += var_data
1834
- elif type(x).__name__ == "_ControlBlock":
1835
- buffer = StringIO()
1836
- writer = CodeWriter(buffer, t)
1837
- x.generate(writer)
1838
- var_data = get_var_data(buffer.getvalue(), node_id=node_id)
1839
- if var_data:
1840
- v += var_data
1841
- _n(x.body.chunks, v)
2251
+ def parse_statement_code_if_new_vars(statement_code: str, *, skip_names: set[str]) -> list[dict[str, Any]]:
2252
+ """Parse statement code and return variables not already typed by expressions."""
2253
+ try:
2254
+ # `{{...}}` inside `{% ... %}` is not template syntax in Tornado templates.
2255
+ # It becomes Python braces (e.g. set literals), which can hide unrelated vars
2256
+ # in the statement. Strip them so we still parse the rest of the statement.
2257
+ if "{{" in statement_code and "}}" in statement_code:
2258
+ statement_code = _EMBEDDED_TEMPLATE_EXPRESSION_RE.sub("None", statement_code)
2259
+ var_data = get_var_data(statement_code, node_id=node_id)
2260
+ except Exception:
2261
+ return []
1842
2262
 
1843
- var = []
1844
- _n(t.file.body.chunks, var)
1845
- return var
2263
+ if not var_data:
2264
+ return []
2265
+
2266
+ return [vd for vd in var_data if vd["name"] not in skip_names]
2267
+
2268
+ def parse_statement(statement: str, *, wrap: Callable[[str], str] | None) -> list[dict[str, Any]]:
2269
+ """Parse a statement and return variables not already typed by expressions."""
2270
+ statement_expr_names: set[str] = set()
2271
+ vars_out: list[dict[str, Any]] = []
2272
+
2273
+ if "{{" in statement and "}}" in statement:
2274
+ for match in _EMBEDDED_TEMPLATE_EXPRESSION_RE.finditer(statement):
2275
+ expr = match.group(1).strip()
2276
+ if not expr:
2277
+ continue
2278
+ try:
2279
+ var_data = get_var_data(expr, node_id=node_id)
2280
+ except Exception:
2281
+ continue
2282
+
2283
+ if not var_data:
2284
+ continue
2285
+
2286
+ for vd in var_data:
2287
+ # Body expressions win for types; do not add statement-derived typed vars.
2288
+ if vd["name"] in typed_names:
2289
+ continue
2290
+ statement_expr_names.add(vd["name"])
2291
+ vars_out.append(vd)
2292
+
2293
+ statement_code = wrap(statement) if wrap else statement
2294
+ vars_out.extend(
2295
+ parse_statement_code_if_new_vars(statement_code, skip_names=typed_names | statement_expr_names)
2296
+ )
2297
+ return vars_out
2298
+
2299
+ def _n(chunks: list, vars_out: list[dict[str, Any]]) -> None:
2300
+ for x in chunks:
2301
+ kind = type(x).__name__
2302
+ match kind:
2303
+ case "_ChunkList":
2304
+ _n(x.chunks, vars_out)
2305
+
2306
+ case "_Expression":
2307
+ # Template expression like {{Int32(num_val, 0)}} - extract type info
2308
+ var_data = get_var_data(x.expression, node_id=node_id)
2309
+ if var_data:
2310
+ typed_names.update(vd["name"] for vd in var_data)
2311
+ vars_out.extend(var_data)
2312
+
2313
+ case "_ControlBlock":
2314
+ # Process body FIRST to get type functions
2315
+ _n(x.body.chunks, vars_out)
2316
+ # Then parse statement, but only add vars NOT already typed by expressions
2317
+ if x.statement != "try":
2318
+ vars_out.extend(parse_statement(x.statement, wrap=statement_wraps["control"]))
2319
+
2320
+ case "_IntermediateControlBlock":
2321
+ # elif/else/except/finally - only add vars not typed by expressions
2322
+ if x.statement.startswith("elif "):
2323
+ vars_out.extend(parse_statement(x.statement, wrap=statement_wraps["elif"]))
2324
+ elif x.statement.startswith("except "):
2325
+ vars_out.extend(parse_statement(x.statement, wrap=statement_wraps["except"]))
2326
+
2327
+ case "_Statement":
2328
+ # {% set x = ... %}, {% break %}, etc.
2329
+ if x.statement not in ("break", "continue") and "{{" not in x.statement:
2330
+ vars_out.extend(parse_statement(x.statement, wrap=None))
2331
+
2332
+ vars_out: list[dict[str, Any]] = []
2333
+ _n(t.file.body.chunks, vars_out)
2334
+ return vars_out
1846
2335
  except SecurityException as e:
1847
2336
  raise SQLTemplateException(e)
1848
2337
 
@@ -1983,7 +2472,7 @@ def preprocess_variables(variables: dict, template_variables_with_types: List[di
1983
2472
  continue
1984
2473
 
1985
2474
  # For now, we only preprocess Array types
1986
- match = re.match(r"Array\((\w+)\)", var_type)
2475
+ match = _ARRAY_TYPE_RE.match(var_type)
1987
2476
  if match is None:
1988
2477
  continue
1989
2478
 
@@ -2317,6 +2806,8 @@ def render_sql_template(
2317
2806
  t, template_variables, variable_warnings = get_template_and_variables(
2318
2807
  sql, name, escape_arrays=escape_split_to_array
2319
2808
  )
2809
+
2810
+ ## TODO: Could we skip running this unless we need it for some variable preprocessing?
2320
2811
  template_variables_with_types = get_var_names_and_types_cached(t)
2321
2812
 
2322
2813
  if variables is not None: