sutra-dev 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2112 @@
1
+ """Recursive-descent parser for the Sutra language.
2
+
3
+ The parser consumes a token stream produced by `Lexer` and builds the
4
+ AST nodes from `ast_nodes`. It does NOT throw on parse errors — it
5
+ records a diagnostic, tries a recovery strategy (usually "skip to the
6
+ next `;` or `}`"), and keeps going, so a single bad token doesn't hide
7
+ the rest of the file from the validator.
8
+
9
+ Grammar covered (v0.1):
10
+
11
+ module = { top_level_item }
12
+ top_level_item = function_decl | method_decl | var_decl | statement
13
+
14
+ function_decl = modifiers? "function" modifiers? type ident
15
+ ("<" type_params ">")? "(" params? ")" block
16
+ | modifiers? "function" modifiers? "operator" op_token
17
+ "(" params? ")" block
18
+ method_decl = modifiers? "method" type ident
19
+ ("<" type_params ">")? "(" params? ")" block
20
+ modifiers = ("public" | "private" | "static")+
21
+
22
+ type = ident ("<" type_args ">")?
23
+ params = param ("," param)*
24
+ param = type ident
25
+
26
+ block = "{" { statement } "}"
27
+ statement = if_stmt | while_stmt | for_stmt | foreach_stmt
28
+ | do_while_stmt | try_stmt | return_stmt
29
+ | var_decl | block | expr_stmt
30
+
31
+ var_decl = ("var" | "const") ident ["=" expr] ";"
32
+ | "const" type ident ["=" expr] ";"
33
+ | type ident ["=" expr] ";"
34
+
35
+ if_stmt = "if" "(" expr ")" block [ "else" (if_stmt | block) ]
36
+ while_stmt = "while" "(" expr ")" block
37
+ for_stmt = "for" "(" [for_init] ";" [expr] ";" [expr] ")" block
38
+ for_init = var_decl_no_semi | expr
39
+ foreach_stmt = "foreach" "(" ("var" | type) ident "in" expr ")" block
40
+ do_while_stmt = "do" block "while" "(" expr ")" ";"
41
+ try_stmt = "try" block "catch" block
42
+ return_stmt = "return" [expr] ";"
43
+ expr_stmt = expr ";"
44
+
45
+ expr = assignment
46
+ assignment = logical_or (assign_op assignment)?
47
+ logical_or = logical_and ("||" logical_and)*
48
+ logical_and = equality ("&&" equality)*
49
+ equality = comparison (("==" | "!=") comparison)*
50
+ comparison = additive (("<" | ">" | "<=" | ">=") additive)*
51
+ additive = multiplicative (("+" | "-") multiplicative)*
52
+ multiplicative = unary (("*" | "/" | "%") unary)*
53
+ unary = ("!" | "-" | "+") unary | postfix
54
+ postfix = primary { call_or_member }
55
+ call_or_member = "." ident | "(" args? ")" | "<" type_args ">" "(" args?")"
56
+ primary = literal | interp_string | ident | "this"
57
+ | paren_or_cast | special_call
58
+
59
+ paren_or_cast = "(" ( type ")" unary | expr ")" )
60
+ special_call = "unsafeCast" "<" type ">" "(" expr ")"
61
+ | "unsafeOverride" "(" expr ")"
62
+ | "defuzzy" "(" expr ")"
63
+ | "embed" "(" expr ")"
64
+
65
+ Ambiguities handled:
66
+
67
+ - `(Type) expr` (cast) vs `(expr)` (group): we save the position, try
68
+ to parse a bare type followed by `)`, and if the next token can
69
+ start a unary expression, commit to cast; otherwise rewind.
70
+ - `Ident < ... > (...)` (generic call) vs `a < b` (comparison): in
71
+ postfix position we look ahead for a balanced `<...>` followed by
72
+ `(`. If the pattern matches, it's a generic call.
73
+ """
74
+
75
+ from __future__ import annotations
76
+
77
+ from typing import List, Optional, Tuple, Union
78
+
79
+ from . import ast_nodes as ast
80
+ from .diagnostics import (
81
+ DiagnosticBag,
82
+ SourcePosition,
83
+ SourceSpan,
84
+ )
85
+ from .lexer import Token, TokenKind
86
+
87
+
88
+ # Tokens that can start a unary/primary expression. Used by the cast
89
+ # disambiguation to decide whether `(X)` is a cast or a group.
90
+ _EXPR_START_TOKENS = {
91
+ TokenKind.INT_LIT,
92
+ TokenKind.FLOAT_LIT,
93
+ TokenKind.IMAG_LIT,
94
+ TokenKind.CHAR_LIT,
95
+ TokenKind.STRING_LIT,
96
+ TokenKind.STRING_INTERP_START,
97
+ TokenKind.TRUE,
98
+ TokenKind.FALSE,
99
+ TokenKind.KW_UNKNOWN,
100
+ TokenKind.KW_WAIT,
101
+ TokenKind.IDENT,
102
+ TokenKind.KW_THIS,
103
+ TokenKind.LPAREN,
104
+ TokenKind.LBRACKET,
105
+ TokenKind.BANG,
106
+ TokenKind.MINUS,
107
+ TokenKind.PLUS,
108
+ }
109
+
110
+ # Primitive type names. The parser treats these like any other type
111
+ # identifier but keeps the set around for nicer error messages.
112
+ _PRIMITIVE_TYPES = {
113
+ "scalar", "vector", "matrix", "tuple", "string",
114
+ "bool", "fuzzy", "void", "permutation", "map",
115
+ "char", "int",
116
+ # trit = three-valued fuzzy (three-way polarizer in defuzz).
117
+ "trit",
118
+ # complex — real/imag pair on synthetic axes 0, 1.
119
+ "complex",
120
+ }
121
+
122
+ # Keywords that can act as a "special function" in expression position.
123
+ _SPECIAL_CALL_NAMES = {"unsafeCast", "unsafeOverride", "defuzzy", "embed"}
124
+
125
+
126
+ class Parser:
127
+ def __init__(
128
+ self,
129
+ tokens: List[Token],
130
+ *,
131
+ file: Optional[str] = None,
132
+ diagnostics: Optional[DiagnosticBag] = None,
133
+ ) -> None:
134
+ self.tokens = tokens
135
+ self.file = file
136
+ self.diagnostics = diagnostics if diagnostics is not None else DiagnosticBag(file=file)
137
+ self._pos = 0
138
+
139
+ # ================================================================
140
+ # Public entry points
141
+ # ================================================================
142
+
143
+ def parse_module(self) -> ast.Module:
144
+ start = self._current_span()
145
+ items: List[ast.TopLevel] = []
146
+ while not self._at_end():
147
+ item = self._parse_top_level()
148
+ if item is not None:
149
+ items.append(item)
150
+ end = self._current_span()
151
+ module_span = SourceSpan(start=start.start, end=end.end)
152
+ return ast.Module(items=items, span=module_span)
153
+
154
+ # ================================================================
155
+ # Token stream helpers
156
+ # ================================================================
157
+
158
+ def _at_end(self) -> bool:
159
+ return self._peek().kind is TokenKind.EOF
160
+
161
+ def _peek(self, offset: int = 0) -> Token:
162
+ idx = self._pos + offset
163
+ if idx >= len(self.tokens):
164
+ return self.tokens[-1]
165
+ return self.tokens[idx]
166
+
167
+ def _current_span(self) -> SourceSpan:
168
+ return self._peek().span
169
+
170
+ def _advance(self) -> Token:
171
+ tok = self.tokens[self._pos]
172
+ if tok.kind is not TokenKind.EOF:
173
+ self._pos += 1
174
+ return tok
175
+
176
+ def _check(self, kind: TokenKind) -> bool:
177
+ return self._peek().kind is kind
178
+
179
+ def _check_any(self, *kinds: TokenKind) -> bool:
180
+ return self._peek().kind in kinds
181
+
182
+ def _match(self, *kinds: TokenKind) -> Optional[Token]:
183
+ if self._peek().kind in kinds:
184
+ return self._advance()
185
+ return None
186
+
187
+ def _expect(self, kind: TokenKind, what: str) -> Optional[Token]:
188
+ if self._check(kind):
189
+ return self._advance()
190
+ tok = self._peek()
191
+ self.diagnostics.error(
192
+ f"expected {what}, got {self._describe(tok)}",
193
+ tok.span,
194
+ code="SUT0100",
195
+ )
196
+ return None
197
+
198
+ def _describe(self, tok: Token) -> str:
199
+ if tok.kind is TokenKind.EOF:
200
+ return "end of file"
201
+ return f"`{tok.lexeme}`"
202
+
203
+ def _synchronize_to(self, *kinds: TokenKind) -> None:
204
+ """Skip tokens until we hit one of `kinds` (inclusive of those
205
+ kinds) or EOF. Used for error recovery after a parse failure.
206
+ """
207
+ while not self._at_end() and self._peek().kind not in kinds:
208
+ self._advance()
209
+
210
+ def _skip_to_statement_boundary(self) -> None:
211
+ # Skip to the next `;` or `}` and consume the `;` if present.
212
+ depth = 0
213
+ while not self._at_end():
214
+ kind = self._peek().kind
215
+ if kind is TokenKind.LBRACE or kind is TokenKind.LPAREN:
216
+ depth += 1
217
+ elif kind is TokenKind.RBRACE or kind is TokenKind.RPAREN:
218
+ if depth == 0:
219
+ return
220
+ depth -= 1
221
+ elif kind is TokenKind.SEMICOLON and depth == 0:
222
+ self._advance()
223
+ return
224
+ self._advance()
225
+
226
+ # ================================================================
227
+ # Top-level
228
+ # ================================================================
229
+
230
+ def _parse_top_level(self) -> Optional[ast.TopLevel]:
231
+ # Peek modifiers + keyword to decide which production to take.
232
+ save = self._pos
233
+ mods = self._parse_modifiers()
234
+ tok = self._peek()
235
+
236
+ if tok.kind is TokenKind.KW_FUNCTION:
237
+ return self._parse_function_decl(mods)
238
+ if tok.kind in (TokenKind.KW_DO_WHILE,
239
+ TokenKind.KW_WHILE_LOOP,
240
+ TokenKind.KW_ITERATIVE_LOOP,
241
+ TokenKind.KW_FOREACH_LOOP):
242
+ if mods.is_public or mods.is_private or mods.is_static:
243
+ self.diagnostics.error(
244
+ "modifiers (`public`/`private`/`static`) are not yet "
245
+ "supported on loop function declarations",
246
+ tok.span,
247
+ code="SUT0101",
248
+ )
249
+ return self._parse_loop_function_decl()
250
+ if tok.kind is TokenKind.KW_INTRINSIC and self._peek(1).kind is TokenKind.KW_FUNCTION:
251
+ # `intrinsic function <ret> <name>(<params>);` — signature
252
+ # only, body lives in the runtime. Used by stdlib files for
253
+ # leaf primitives.
254
+ self._advance() # consume `intrinsic`
255
+ return self._parse_function_decl(mods, is_intrinsic=True)
256
+ if tok.kind is TokenKind.KW_METHOD:
257
+ return self._parse_method_decl(mods)
258
+ if tok.kind is TokenKind.KW_STATIC and self._peek(1).kind is TokenKind.KW_METHOD:
259
+ mods.is_static = True
260
+ return self._parse_method_decl(mods)
261
+ if tok.kind is TokenKind.KW_CLASS:
262
+ # Modifiers don't apply to class declarations in the MVP
263
+ # surface — surface them as an error if any were saved.
264
+ if mods.is_public or mods.is_private or mods.is_static:
265
+ self.diagnostics.error(
266
+ "modifiers (`public`/`private`/`static`) are not yet "
267
+ "supported on class declarations",
268
+ tok.span,
269
+ code="SUT0101",
270
+ )
271
+ return self._parse_class_decl()
272
+ if tok.kind is TokenKind.KW_SLOT:
273
+ # `slot TYPE name = expr;` — only legal at function scope
274
+ # in the MVP, but the parser doesn't enforce that today.
275
+ # Modifiers aren't supported on slot decls.
276
+ if mods.is_public or mods.is_private or mods.is_static:
277
+ self.diagnostics.error(
278
+ "modifiers (`public`/`private`/`static`) are not "
279
+ "supported on slot declarations",
280
+ tok.span,
281
+ code="SUT0101",
282
+ )
283
+ return self._parse_slot_decl()
284
+
285
+ # No function/method. Modifiers only make sense on those, so if
286
+ # we saw any, that's an error; rewind and try as a statement.
287
+ if mods.is_public or mods.is_private or mods.is_static:
288
+ self.diagnostics.error(
289
+ "modifiers (`public`/`private`/`static`) only apply to function and method declarations",
290
+ tok.span,
291
+ code="SUT0101",
292
+ )
293
+ self._pos = save # rewind so the next pass sees the same tokens
294
+
295
+ stmt = self._parse_statement()
296
+ return stmt
297
+
298
+ def _parse_modifiers(self) -> ast.Modifiers:
299
+ mods = ast.Modifiers()
300
+ while True:
301
+ tok = self._peek()
302
+ if tok.kind is TokenKind.KW_PUBLIC:
303
+ mods.is_public = True
304
+ self._advance()
305
+ elif tok.kind is TokenKind.KW_PRIVATE:
306
+ mods.is_private = True
307
+ self._advance()
308
+ elif tok.kind is TokenKind.KW_STATIC:
309
+ # `static` can appear before `method`. Only consume here
310
+ # if what follows is `function` — `method` handles its
311
+ # own `static` check via _parse_top_level.
312
+ if self._peek(1).kind is TokenKind.KW_FUNCTION:
313
+ mods.is_static = True
314
+ self._advance()
315
+ else:
316
+ break
317
+ else:
318
+ break
319
+ return mods
320
+
321
+ # ----------------------------------------------------------------
322
+ # Function / method declarations
323
+ # ----------------------------------------------------------------
324
+
325
+ def _parse_function_decl(
326
+ self, mods: ast.Modifiers, *, is_intrinsic: bool = False,
327
+ ) -> Optional[ast.FunctionDecl]:
328
+ start_span = self._current_span()
329
+ self._expect(TokenKind.KW_FUNCTION, "`function`")
330
+
331
+ # More modifiers may legally appear after `function` in the
332
+ # full internal form: `function public static vector Foo(...)`.
333
+ inner_mods = self._parse_modifiers()
334
+ if inner_mods.is_public:
335
+ mods.is_public = True
336
+ if inner_mods.is_private:
337
+ mods.is_private = True
338
+ if inner_mods.is_static:
339
+ mods.is_static = True
340
+
341
+ # Operator overload? `function operator + (...)`
342
+ if self._check(TokenKind.KW_OPERATOR):
343
+ return self._parse_operator_decl(mods, start_span, is_method=False)
344
+
345
+ return_type = self._parse_type()
346
+ if return_type is None:
347
+ self._skip_to_statement_boundary()
348
+ return None
349
+
350
+ name_tok = self._expect(TokenKind.IDENT, "function name")
351
+ if name_tok is None:
352
+ self._skip_to_statement_boundary()
353
+ return None
354
+
355
+ type_params = self._parse_type_params()
356
+ params = self._parse_param_list()
357
+ if is_intrinsic:
358
+ # Signature only; semicolon in place of body. Fabricate an
359
+ # empty Block so downstream code that assumes .body is a
360
+ # Block doesn't need a special-case.
361
+ semi = self._expect(TokenKind.SEMICOLON, "`;` to close intrinsic declaration")
362
+ end = semi.span.end if semi is not None else self._current_span().end
363
+ body = ast.Block(statements=[], span=SourceSpan(start=end, end=end))
364
+ return ast.FunctionDecl(
365
+ modifiers=mods,
366
+ return_type=return_type,
367
+ name=name_tok.lexeme,
368
+ type_params=type_params,
369
+ params=params,
370
+ body=body,
371
+ is_operator=False,
372
+ is_intrinsic=True,
373
+ span=SourceSpan(start=start_span.start, end=end),
374
+ )
375
+ body = self._parse_block()
376
+ if body is None:
377
+ return None
378
+
379
+ end_span = body.span
380
+ return ast.FunctionDecl(
381
+ modifiers=mods,
382
+ return_type=return_type,
383
+ name=name_tok.lexeme,
384
+ type_params=type_params,
385
+ params=params,
386
+ body=body,
387
+ is_operator=False,
388
+ span=SourceSpan(start=start_span.start, end=end_span.end),
389
+ )
390
+
391
+
392
+ _LOOP_KIND_TOKEN = {
393
+ TokenKind.KW_DO_WHILE: "do_while",
394
+ TokenKind.KW_WHILE_LOOP: "while_loop",
395
+ TokenKind.KW_ITERATIVE_LOOP: "iterative_loop",
396
+ TokenKind.KW_FOREACH_LOOP: "foreach_loop",
397
+ }
398
+
399
+ def _parse_loop_function_decl(self) -> Optional[ast.LoopFunctionDecl]:
400
+ """Parse `<kind> name(condition, type name (= default)?, ...) { body }`.
401
+
402
+ The first item in the paren-list is an expression (the condition for
403
+ while/do_while; the count for iterative; the array for foreach).
404
+ Remaining items are state-parameter declarations with optional
405
+ defaults. State params can be referenced by the condition expression.
406
+ """
407
+ start_span = self._current_span()
408
+ kind_tok = self._advance()
409
+ kind = self._LOOP_KIND_TOKEN[kind_tok.kind]
410
+
411
+ name_tok = self._expect(TokenKind.IDENT, f"loop function name after `{kind}`")
412
+ if name_tok is None:
413
+ self._skip_to_statement_boundary()
414
+ return None
415
+
416
+ if self._expect(TokenKind.LPAREN, "`(` after loop function name") is None:
417
+ self._skip_to_statement_boundary()
418
+ return None
419
+
420
+ # First item: the condition expression.
421
+ condition = self._parse_expr()
422
+ if condition is None:
423
+ self._skip_to_statement_boundary()
424
+ return None
425
+
426
+ # Remaining items: state parameters (TYPE name (= default)?).
427
+ state_params: List[ast.LoopStateParam] = []
428
+ while self._match(TokenKind.COMMA):
429
+ param_start = self._current_span()
430
+ type_ref = self._parse_type()
431
+ if type_ref is None:
432
+ self._skip_to_statement_boundary()
433
+ return None
434
+ param_name_tok = self._expect(TokenKind.IDENT, "state parameter name")
435
+ if param_name_tok is None:
436
+ self._skip_to_statement_boundary()
437
+ return None
438
+ default_expr: Optional[ast.Expr] = None
439
+ if self._match(TokenKind.ASSIGN):
440
+ default_expr = self._parse_expr()
441
+ if default_expr is None:
442
+ self._skip_to_statement_boundary()
443
+ return None
444
+ param_end = self._current_span().end
445
+ state_params.append(
446
+ ast.LoopStateParam(
447
+ type_ref=type_ref,
448
+ name=param_name_tok.lexeme,
449
+ default=default_expr,
450
+ span=SourceSpan(start=param_start.start, end=param_end),
451
+ )
452
+ )
453
+
454
+ if self._expect(TokenKind.RPAREN, "`)` to close loop parameter list") is None:
455
+ self._skip_to_statement_boundary()
456
+ return None
457
+
458
+ body = self._parse_block()
459
+ if body is None:
460
+ return None
461
+
462
+ end_span = body.span
463
+ return ast.LoopFunctionDecl(
464
+ kind=kind,
465
+ name=name_tok.lexeme,
466
+ condition=condition,
467
+ state_params=state_params,
468
+ body=body,
469
+ span=SourceSpan(start=start_span.start, end=end_span.end),
470
+ )
471
+
472
+ def _parse_method_decl(
473
+ self, mods: ast.Modifiers, *, is_intrinsic: bool = False
474
+ ) -> Optional[ast.MethodDecl]:
475
+ start_span = self._current_span()
476
+ # Consume `static` if we got here via static-method detection.
477
+ self._match(TokenKind.KW_STATIC)
478
+ # Consume `intrinsic` if it precedes `method` (handled by the
479
+ # caller normally, but tolerate it here for top-level entry).
480
+ if self._check(TokenKind.KW_INTRINSIC):
481
+ self._advance()
482
+ is_intrinsic = True
483
+ self._expect(TokenKind.KW_METHOD, "`method`")
484
+
485
+ if self._check(TokenKind.KW_OPERATOR):
486
+ if is_intrinsic:
487
+ self.diagnostics.error(
488
+ "operator methods cannot be declared `intrinsic`",
489
+ self._current_span(),
490
+ code="SUT0145",
491
+ )
492
+ fn = self._parse_operator_decl(mods, start_span, is_method=True)
493
+ if fn is None:
494
+ return None
495
+ return ast.MethodDecl(
496
+ modifiers=mods,
497
+ return_type=fn.return_type,
498
+ name=fn.name,
499
+ type_params=fn.type_params,
500
+ params=fn.params,
501
+ body=fn.body,
502
+ is_operator=True,
503
+ span=fn.span,
504
+ )
505
+
506
+ return_type = self._parse_type()
507
+ if return_type is None:
508
+ self._skip_to_statement_boundary()
509
+ return None
510
+
511
+ name_tok = self._expect(TokenKind.IDENT, "method name")
512
+ if name_tok is None:
513
+ self._skip_to_statement_boundary()
514
+ return None
515
+
516
+ type_params = self._parse_type_params()
517
+ params = self._parse_param_list()
518
+ if is_intrinsic:
519
+ # Signature-only declaration; semicolon in place of body.
520
+ semi = self._expect(TokenKind.SEMICOLON,
521
+ "`;` to close intrinsic method declaration")
522
+ end = semi.span.end if semi is not None else self._current_span().end
523
+ body = ast.Block(statements=[], span=SourceSpan(start=end, end=end))
524
+ return ast.MethodDecl(
525
+ modifiers=mods,
526
+ return_type=return_type,
527
+ name=name_tok.lexeme,
528
+ type_params=type_params,
529
+ params=params,
530
+ body=body,
531
+ is_operator=False,
532
+ is_intrinsic=True,
533
+ span=SourceSpan(start=start_span.start, end=end),
534
+ )
535
+
536
+ body = self._parse_block()
537
+ if body is None:
538
+ return None
539
+
540
+ end_span = body.span
541
+ return ast.MethodDecl(
542
+ modifiers=mods,
543
+ return_type=return_type,
544
+ name=name_tok.lexeme,
545
+ type_params=type_params,
546
+ params=params,
547
+ body=body,
548
+ is_operator=False,
549
+ span=SourceSpan(start=start_span.start, end=end_span.end),
550
+ )
551
+
552
+ def _parse_class_decl(self) -> Optional[ast.ClassDecl]:
553
+ """Parse `class Name extends Parent { ... }`.
554
+
555
+ MVP scope: empty body required, single `extends` parent
556
+ required (no implicit object root yet), no modifiers, no
557
+ type parameters, no member declarations inside the braces.
558
+ Any non-empty body is an error directing the user to file
559
+ an issue / wait for the ontology work to land.
560
+ """
561
+ start_span = self._current_span()
562
+ self._expect(TokenKind.KW_CLASS, "`class`")
563
+
564
+ name_tok = self._expect(TokenKind.IDENT, "class name")
565
+ if name_tok is None:
566
+ self._skip_to_statement_boundary()
567
+ return None
568
+
569
+ # `extends Parent` — required in MVP. We could default to
570
+ # `vector` if omitted, but making it explicit is closer to
571
+ # how the user described the design ("inherits from vector").
572
+ extends_tok = self._expect(TokenKind.KW_EXTENDS,
573
+ "`extends ParentName` (required in MVP)")
574
+ if extends_tok is None:
575
+ self._skip_to_statement_boundary()
576
+ return None
577
+ parent_tok = self._expect(TokenKind.IDENT, "parent class name")
578
+ if parent_tok is None:
579
+ self._skip_to_statement_boundary()
580
+ return None
581
+
582
+ self._expect(TokenKind.LBRACE, "`{` to open class body")
583
+ methods: List[ast.MethodDecl] = []
584
+ loop_functions: List[ast.LoopFunctionDecl] = []
585
+ loop_kw_set = (
586
+ TokenKind.KW_DO_WHILE,
587
+ TokenKind.KW_WHILE_LOOP,
588
+ TokenKind.KW_ITERATIVE_LOOP,
589
+ TokenKind.KW_FOREACH_LOOP,
590
+ )
591
+ while not self._check(TokenKind.RBRACE) and self._peek().kind is not TokenKind.EOF:
592
+ tok0 = self._peek()
593
+ tok1 = self._peek(1)
594
+ tok2 = self._peek(2)
595
+ # Detect the four method shapes plus loop function decls.
596
+ is_method_start = False
597
+ is_static = False
598
+ is_intrinsic = False
599
+ if tok0.kind is TokenKind.KW_METHOD:
600
+ is_method_start = True
601
+ elif (tok0.kind is TokenKind.KW_STATIC
602
+ and tok1.kind is TokenKind.KW_METHOD):
603
+ is_method_start = True
604
+ is_static = True
605
+ elif (tok0.kind is TokenKind.KW_INTRINSIC
606
+ and tok1.kind is TokenKind.KW_METHOD):
607
+ is_method_start = True
608
+ is_intrinsic = True
609
+ self._advance()
610
+ elif (tok0.kind is TokenKind.KW_STATIC
611
+ and tok1.kind is TokenKind.KW_INTRINSIC
612
+ and tok2.kind is TokenKind.KW_METHOD):
613
+ is_method_start = True
614
+ is_static = True
615
+ is_intrinsic = True
616
+ self._advance() # static
617
+ self._advance() # intrinsic
618
+ if is_method_start:
619
+ mods = ast.Modifiers()
620
+ if is_static:
621
+ mods.is_static = True
622
+ m = self._parse_method_decl(mods, is_intrinsic=is_intrinsic)
623
+ if m is not None:
624
+ methods.append(m)
625
+ elif tok0.kind in loop_kw_set:
626
+ # Object loop: a loop function declared inside a class
627
+ # body. Same shape as a top-level loop function decl;
628
+ # the codegen emits it with a class-mangled name and
629
+ # routes `loop Class.name(...)` calls to it.
630
+ lf = self._parse_loop_function_decl()
631
+ if lf is not None:
632
+ loop_functions.append(lf)
633
+ else:
634
+ self.diagnostics.error(
635
+ "class bodies accept method and loop-function "
636
+ "declarations only. Field declarations and operator "
637
+ "overloads are deferred",
638
+ self._current_span(),
639
+ code="SUT0140",
640
+ hint="declare the body member as `method <ret> "
641
+ "<name>(...) { ... }`, a loop function "
642
+ "(`do_while`, `while_loop`, `iterative_loop`, "
643
+ "`foreach_loop`), or remove it",
644
+ )
645
+ # Skip forward to a closing brace so the rest of the
646
+ # file still parses.
647
+ depth = 1
648
+ while depth > 0 and self._peek().kind is not TokenKind.EOF:
649
+ nxt = self._advance()
650
+ if nxt.kind is TokenKind.LBRACE:
651
+ depth += 1
652
+ elif nxt.kind is TokenKind.RBRACE:
653
+ depth -= 1
654
+ # We've consumed the closing brace; bail out.
655
+ end_span = self._current_span()
656
+ return ast.ClassDecl(
657
+ name=name_tok.lexeme,
658
+ parent_name=parent_tok.lexeme,
659
+ methods=methods,
660
+ loop_functions=loop_functions,
661
+ span=SourceSpan(start=start_span.start, end=end_span.end),
662
+ )
663
+ close = self._expect(TokenKind.RBRACE, "`}` to close class body")
664
+ if close is None:
665
+ return None
666
+ end_span = self._current_span()
667
+
668
+ return ast.ClassDecl(
669
+ name=name_tok.lexeme,
670
+ parent_name=parent_tok.lexeme,
671
+ methods=methods,
672
+ loop_functions=loop_functions,
673
+ span=SourceSpan(start=start_span.start, end=end_span.end),
674
+ )
675
+
676
+ def _parse_slot_decl(self) -> Optional[ast.VarDecl]:
677
+ """Parse `slot TYPE name [= expr];` — rotation-bound storage
678
+ in the synthetic subspace.
679
+
680
+ The runtime primitives (slot_store / slot_load / rotate_slot)
681
+ are wired in `_VSA`; the codegen integration that threads slot
682
+ state through function scopes is deferred. The parser accepts
683
+ the form; codegen rejects with SUT0150.
684
+ """
685
+ start_span = self._current_span()
686
+ self._expect(TokenKind.KW_SLOT, "`slot`")
687
+
688
+ # `slot TYPE name = expr;` — TYPE is required (slot decls
689
+ # always carry an explicit type because the synthetic-subspace
690
+ # plane allocation is per-type-shape).
691
+ type_ref = self._parse_type()
692
+ if type_ref is None:
693
+ self._skip_to_statement_boundary()
694
+ return None
695
+ name_tok = self._expect(TokenKind.IDENT, "slot variable name")
696
+ if name_tok is None:
697
+ self._skip_to_statement_boundary()
698
+ return None
699
+
700
+ init: Optional[ast.Expr] = None
701
+ if self._match(TokenKind.ASSIGN):
702
+ init = self._parse_expr()
703
+
704
+ end = self._expect(TokenKind.SEMICOLON, "`;` after slot declaration")
705
+ end_span = end.span if end else self._current_span()
706
+ return ast.VarDecl(
707
+ is_const=False,
708
+ is_var_inferred=False,
709
+ type_ref=type_ref,
710
+ name=name_tok.lexeme,
711
+ initializer=init,
712
+ span=SourceSpan(start=start_span.start, end=end_span.end),
713
+ is_slot=True,
714
+ )
715
+
716
+ def _parse_operator_decl(
717
+ self,
718
+ mods: ast.Modifiers,
719
+ start_span: SourceSpan,
720
+ *,
721
+ is_method: bool,
722
+ ) -> Optional[ast.FunctionDecl]:
723
+ """Handle `operator <op>` in function/method declarations.
724
+
725
+ Returns a FunctionDecl for uniform handling by the caller; the
726
+ caller can wrap in a MethodDecl if `is_method=True`.
727
+ """
728
+ self._expect(TokenKind.KW_OPERATOR, "`operator`")
729
+
730
+ # The return type can come BEFORE `operator` in the short form
731
+ # or AFTER it in the form `function operator +(...)` — the spec
732
+ # shows both shapes. We already consumed `operator`, so whatever
733
+ # follows is the return type if it's an identifier, or the op
734
+ # token itself if the return type was implicit.
735
+ #
736
+ # Looking at the spec examples:
737
+ # function operator +(vector a, vector b) { ... }
738
+ # function public static scalar operator +(scalar a, scalar b) { ... }
739
+ #
740
+ # In the second form, the return type precedes `operator`, which
741
+ # means we never reach this branch — the type-then-`operator`
742
+ # sequence would have been consumed by _parse_function_decl
743
+ # before we got here. So: at this point the next token is the
744
+ # operator itself.
745
+
746
+ op_tok = self._advance()
747
+ op_name = op_tok.lexeme
748
+ if op_tok.kind not in {
749
+ TokenKind.PLUS, TokenKind.MINUS, TokenKind.STAR, TokenKind.SLASH,
750
+ TokenKind.PERCENT, TokenKind.EQ, TokenKind.NEQ, TokenKind.LT,
751
+ TokenKind.GT, TokenKind.LE, TokenKind.GE, TokenKind.BANG,
752
+ }:
753
+ self.diagnostics.error(
754
+ f"`{op_name}` is not an overloadable operator",
755
+ op_tok.span,
756
+ code="SUT0102",
757
+ )
758
+
759
+ params = self._parse_param_list()
760
+ body = self._parse_block()
761
+ if body is None:
762
+ return None
763
+
764
+ # Operator overloads implicitly return the same type as the
765
+ # first parameter in our AST placeholder; the validator can
766
+ # tighten this later.
767
+ implicit_type = ast.TypeRef(name="vector", type_args=[], span=op_tok.span)
768
+ return ast.FunctionDecl(
769
+ modifiers=mods,
770
+ return_type=implicit_type,
771
+ name=f"operator{op_name}",
772
+ type_params=[],
773
+ params=params,
774
+ body=body,
775
+ is_operator=True,
776
+ span=SourceSpan(start=start_span.start, end=body.span.end),
777
+ )
778
+
779
+ def _parse_type_params(self) -> List[str]:
780
+ """Parse `<T, U>` if present, return list of names."""
781
+ if not self._check(TokenKind.LT):
782
+ return []
783
+ # Check look-ahead: we only consume `<` if we see a balanced
784
+ # close before a newline-ish structure. For type params on a
785
+ # declaration this is almost always unambiguous because the
786
+ # surrounding context is clear.
787
+ save = self._pos
788
+ self._advance()
789
+ names: List[str] = []
790
+ while True:
791
+ tok = self._expect(TokenKind.IDENT, "type parameter name")
792
+ if tok is None:
793
+ self._pos = save
794
+ return []
795
+ names.append(tok.lexeme)
796
+ if self._match(TokenKind.COMMA):
797
+ continue
798
+ break
799
+ if not self._expect(TokenKind.GT, "`>` to close type parameter list"):
800
+ self._pos = save
801
+ return []
802
+ return names
803
+
804
+ def _parse_param_list(self) -> List[ast.Param]:
805
+ params: List[ast.Param] = []
806
+ if not self._expect(TokenKind.LPAREN, "`(`"):
807
+ return params
808
+ if self._match(TokenKind.RPAREN):
809
+ return params
810
+ while True:
811
+ start = self._current_span()
812
+ type_ref = self._parse_type()
813
+ if type_ref is None:
814
+ self._synchronize_to(TokenKind.COMMA, TokenKind.RPAREN)
815
+ if self._match(TokenKind.COMMA):
816
+ continue
817
+ break
818
+ name_tok = self._expect(TokenKind.IDENT, "parameter name")
819
+ if name_tok is None:
820
+ self._synchronize_to(TokenKind.COMMA, TokenKind.RPAREN)
821
+ if self._match(TokenKind.COMMA):
822
+ continue
823
+ break
824
+ params.append(
825
+ ast.Param(
826
+ type_ref=type_ref,
827
+ name=name_tok.lexeme,
828
+ span=SourceSpan(start=start.start, end=name_tok.span.end),
829
+ )
830
+ )
831
+ if self._match(TokenKind.COMMA):
832
+ continue
833
+ break
834
+ self._expect(TokenKind.RPAREN, "`)` to close parameter list")
835
+ return params
836
+
837
+ def _parse_type(self) -> Optional[ast.TypeRef]:
838
+ name_tok = self._peek()
839
+ if name_tok.kind is not TokenKind.IDENT:
840
+ return None
841
+ self._advance()
842
+ type_args: List[ast.TypeRef] = []
843
+ if self._check(TokenKind.LT):
844
+ save = self._pos
845
+ self._advance()
846
+ args_ok = True
847
+ while True:
848
+ inner = self._parse_type()
849
+ if inner is None:
850
+ args_ok = False
851
+ break
852
+ type_args.append(inner)
853
+ if self._match(TokenKind.COMMA):
854
+ continue
855
+ break
856
+ if not args_ok or not self._match(TokenKind.GT):
857
+ # Not actually a generic — rewind.
858
+ self._pos = save
859
+ type_args = []
860
+ end_pos = self.tokens[self._pos - 1].span.end
861
+ return ast.TypeRef(
862
+ name=name_tok.lexeme,
863
+ type_args=type_args,
864
+ span=SourceSpan(start=name_tok.span.start, end=end_pos),
865
+ )
866
+
867
+ # ================================================================
868
+ # Statements
869
+ # ================================================================
870
+
871
+ def _parse_block(self) -> Optional[ast.Block]:
872
+ start = self._current_span()
873
+ if not self._expect(TokenKind.LBRACE, "`{`"):
874
+ return None
875
+ stmts: List[ast.Stmt] = []
876
+ while not self._at_end() and not self._check(TokenKind.RBRACE):
877
+ stmt = self._parse_statement()
878
+ if stmt is not None:
879
+ stmts.append(stmt)
880
+ end_tok = self._expect(TokenKind.RBRACE, "`}` to close block")
881
+ end_span = end_tok.span if end_tok else self._current_span()
882
+ return ast.Block(
883
+ statements=stmts,
884
+ span=SourceSpan(start=start.start, end=end_span.end),
885
+ )
886
+
887
+ def _parse_statement(self) -> Optional[ast.Stmt]:
888
+ tok = self._peek()
889
+
890
+ if tok.kind is TokenKind.LBRACE:
891
+ return self._parse_block()
892
+ if tok.kind is TokenKind.KW_IF:
893
+ return self._parse_if()
894
+ if tok.kind is TokenKind.KW_WHILE:
895
+ return self._parse_while()
896
+ if tok.kind is TokenKind.KW_FOR:
897
+ return self._parse_for()
898
+ if tok.kind is TokenKind.KW_FOREACH:
899
+ return self._parse_foreach()
900
+ if tok.kind is TokenKind.KW_DO:
901
+ return self._parse_do_while()
902
+ if tok.kind is TokenKind.KW_LOOP:
903
+ return self._parse_loop()
904
+ if tok.kind is TokenKind.KW_TRY:
905
+ return self._parse_try()
906
+ if tok.kind is TokenKind.KW_RETURN:
907
+ return self._parse_return()
908
+ if tok.kind is TokenKind.KW_PASS:
909
+ return self._parse_pass()
910
+ if tok.kind in (TokenKind.KW_VAR, TokenKind.KW_CONST):
911
+ return self._parse_var_or_const()
912
+ if tok.kind is TokenKind.KW_SLOT:
913
+ return self._parse_slot_decl()
914
+ # Contextual `role` keyword: at statement-start, `role IDENT = ...`
915
+ # is a role declaration; elsewhere `role` is a normal identifier.
916
+ # We look for IDENT("role") IDENT ASSIGN to disambiguate.
917
+ if (tok.kind is TokenKind.IDENT and tok.lexeme == "role"
918
+ and self._peek(1).kind is TokenKind.IDENT
919
+ and self._peek(2).kind is TokenKind.ASSIGN):
920
+ return self._parse_var_or_const()
921
+ # Nested function/method declarations aren't explicitly
922
+ # forbidden; delegate to top-level handling if encountered.
923
+ if tok.kind is TokenKind.KW_FUNCTION:
924
+ return self._parse_function_decl(ast.Modifiers())
925
+ if tok.kind is TokenKind.KW_METHOD:
926
+ return self._parse_method_decl(ast.Modifiers())
927
+
928
+ # Could be a typed declaration (`vector x = ...;`) or an
929
+ # expression statement. We distinguish by look-ahead:
930
+ # IDENT IDENT is a declaration, IDENT<...> IDENT is a generic
931
+ # declaration, anything else is an expression.
932
+ if self._looks_like_typed_decl():
933
+ return self._parse_typed_var_decl()
934
+
935
+ return self._parse_expr_stmt()
936
+
937
+ def _looks_like_typed_decl(self) -> bool:
938
+ if self._peek().kind is not TokenKind.IDENT:
939
+ return False
940
+ # Skip type args <...> if present
941
+ offset = 1
942
+ if self._peek(offset).kind is TokenKind.LT:
943
+ depth = 1
944
+ offset += 1
945
+ while offset < len(self.tokens) and depth > 0:
946
+ k = self._peek(offset).kind
947
+ if k is TokenKind.LT:
948
+ depth += 1
949
+ elif k is TokenKind.GT:
950
+ depth -= 1
951
+ elif k in (TokenKind.SEMICOLON, TokenKind.LBRACE, TokenKind.RBRACE):
952
+ return False
953
+ offset += 1
954
+ # After the type, we need another IDENT then `=` or `;` or `,`.
955
+ if self._peek(offset).kind is TokenKind.IDENT:
956
+ nxt = self._peek(offset + 1).kind
957
+ if nxt in (TokenKind.ASSIGN, TokenKind.SEMICOLON):
958
+ return True
959
+ return False
960
+
961
+ def _parse_typed_var_decl(self) -> Optional[ast.VarDecl]:
962
+ start = self._current_span()
963
+ type_ref = self._parse_type()
964
+ if type_ref is None:
965
+ self._skip_to_statement_boundary()
966
+ return None
967
+ name_tok = self._expect(TokenKind.IDENT, "variable name")
968
+ if name_tok is None:
969
+ self._skip_to_statement_boundary()
970
+ return None
971
+ init: Optional[ast.Expr] = None
972
+ if self._match(TokenKind.ASSIGN):
973
+ init = self._parse_expr()
974
+ end = self._expect(TokenKind.SEMICOLON, "`;` after declaration")
975
+ end_span = end.span if end else self._current_span()
976
+ return ast.VarDecl(
977
+ is_const=False,
978
+ is_var_inferred=False,
979
+ type_ref=type_ref,
980
+ name=name_tok.lexeme,
981
+ initializer=init,
982
+ span=SourceSpan(start=start.start, end=end_span.end),
983
+ )
984
+
985
+ def _parse_var_or_const(self) -> Optional[ast.VarDecl]:
986
+ start = self._current_span()
987
+ keyword = self._advance() # var, const, or IDENT("role")
988
+ is_const = keyword.kind is TokenKind.KW_CONST
989
+ # `role` is a contextual keyword — the lexer emits IDENT for it,
990
+ # and the parser dispatched us here when it saw IDENT("role")
991
+ # followed by IDENT + ASSIGN (a role declaration pattern).
992
+ is_role = (keyword.kind is TokenKind.IDENT
993
+ and keyword.lexeme == "role")
994
+ is_var = keyword.kind is TokenKind.KW_VAR
995
+
996
+ array_size: Optional[int] = None
997
+ if is_var and self._check(TokenKind.LBRACKET):
998
+ self._advance() # [
999
+ size_tok = self._expect(TokenKind.INT_LIT, "array size (integer literal)")
1000
+ if size_tok is not None:
1001
+ try:
1002
+ array_size = int(size_tok.lexeme)
1003
+ except ValueError:
1004
+ array_size = None
1005
+ self._expect(TokenKind.RBRACKET, "`]` after array size")
1006
+
1007
+ # `const TYPE x = ...` is legal. `var TYPE x` is explicitly
1008
+ # forbidden; we still parse it and emit an error so the rest of
1009
+ # the file can be validated.
1010
+ type_ref: Optional[ast.TypeRef] = None
1011
+ is_var_inferred = is_var # `var` is inferred unless colon-typed
1012
+ if is_const and self._peek().kind is TokenKind.IDENT and self._peek(1).kind is TokenKind.IDENT:
1013
+ type_ref = self._parse_type()
1014
+ elif is_var and self._peek().kind is TokenKind.IDENT and self._peek(1).kind is TokenKind.IDENT:
1015
+ # `var TYPE x` — illegal per the syntax-decisions doc.
1016
+ # Note: `var x : TYPE` is legal (handled below after the
1017
+ # name); this branch catches the no-colon form only.
1018
+ bad_type = self._parse_type()
1019
+ self.diagnostics.error(
1020
+ "`var` cannot be combined with a space-separated type; "
1021
+ "use colon syntax instead (`var x : TYPE`)",
1022
+ SourceSpan(start=keyword.span.start, end=bad_type.span.end if bad_type else keyword.span.end),
1023
+ code="SUT0103",
1024
+ hint="write either `var x = ...;` (inferred), "
1025
+ "`var x : TYPE;` (explicit slot), or "
1026
+ "`TYPE x = ...;` (classic typed declaration)",
1027
+ )
1028
+ type_ref = bad_type
1029
+ is_var_inferred = False
1030
+
1031
+ name_tok = self._expect(TokenKind.IDENT, "variable name")
1032
+ if name_tok is None:
1033
+ self._skip_to_statement_boundary()
1034
+ return None
1035
+
1036
+ # `var x : TYPE` — the rotation-bound colon syntax from Candidate B.
1037
+ # Only valid on var (not const, not role). role is always inferred
1038
+ # from the RHS for now; the learned_from/semantic-role side of the
1039
+ # type system comes with the deferred learned-matrix work.
1040
+ is_var_colon = False
1041
+ if is_var and self._match(TokenKind.COLON):
1042
+ parsed_type = self._parse_type()
1043
+ if parsed_type is not None:
1044
+ type_ref = parsed_type
1045
+ is_var_colon = True
1046
+ is_var_inferred = False
1047
+
1048
+ init: Optional[ast.Expr] = None
1049
+ if self._match(TokenKind.ASSIGN):
1050
+ init = self._parse_expr()
1051
+
1052
+ # `role x` always needs an initializer — a role without a
1053
+ # binding source is semantically empty (unlike `var x : T`
1054
+ # which allocates a zero slot).
1055
+ if is_role and init is None:
1056
+ self.diagnostics.error(
1057
+ "`role` declaration needs an initializer (e.g. "
1058
+ "`role capital_of = learned_from(...)`). "
1059
+ "Uninitialized roles are not meaningful in Sutra — use "
1060
+ "`var x : TYPE;` for an empty slot instead.",
1061
+ SourceSpan(start=keyword.span.start, end=self._current_span().end),
1062
+ code="SUT0104",
1063
+ hint="add `= <expr>` to the role declaration",
1064
+ )
1065
+
1066
+ end = self._expect(TokenKind.SEMICOLON, "`;` after declaration")
1067
+ end_span = end.span if end else self._current_span()
1068
+ return ast.VarDecl(
1069
+ is_const=is_const,
1070
+ is_var_inferred=is_var_inferred and type_ref is None,
1071
+ type_ref=type_ref,
1072
+ name=name_tok.lexeme,
1073
+ initializer=init,
1074
+ span=SourceSpan(start=start.start, end=end_span.end),
1075
+ is_role=is_role,
1076
+ is_var_colon=is_var_colon,
1077
+ array_size=array_size,
1078
+ )
1079
+
1080
+ def _parse_if(self) -> Optional[ast.IfStmt]:
1081
+ start = self._current_span()
1082
+ self._advance() # if
1083
+ self._expect(TokenKind.LPAREN, "`(` after `if`")
1084
+ cond = self._parse_expr()
1085
+ self._expect(TokenKind.RPAREN, "`)` to close `if` condition")
1086
+ then_branch = self._parse_block()
1087
+ if then_branch is None:
1088
+ return None
1089
+ else_branch: Optional[Union[ast.IfStmt, ast.Block]] = None
1090
+ if self._match(TokenKind.KW_ELSE):
1091
+ if self._check(TokenKind.KW_IF):
1092
+ else_branch = self._parse_if()
1093
+ else:
1094
+ else_branch = self._parse_block()
1095
+ end_span = else_branch.span if else_branch else then_branch.span
1096
+ return ast.IfStmt(
1097
+ condition=cond,
1098
+ then_branch=then_branch,
1099
+ else_branch=else_branch,
1100
+ span=SourceSpan(start=start.start, end=end_span.end),
1101
+ )
1102
+
1103
+ def _parse_while(self) -> Optional[ast.WhileStmt]:
1104
+ start = self._current_span()
1105
+ self._advance() # while
1106
+ self._expect(TokenKind.LPAREN, "`(` after `while`")
1107
+ cond = self._parse_expr()
1108
+ self._expect(TokenKind.RPAREN, "`)` to close `while` condition")
1109
+ body = self._parse_block()
1110
+ if body is None:
1111
+ return None
1112
+ return ast.WhileStmt(
1113
+ condition=cond,
1114
+ body=body,
1115
+ span=SourceSpan(start=start.start, end=body.span.end),
1116
+ )
1117
+
1118
+ def _parse_for(self) -> Optional[ast.ForStmt]:
1119
+ start = self._current_span()
1120
+ self._advance() # for
1121
+ self._expect(TokenKind.LPAREN, "`(` after `for`")
1122
+
1123
+ init: Optional[ast.Stmt] = None
1124
+ if not self._check(TokenKind.SEMICOLON):
1125
+ # Init is either a var/const decl (with trailing `;`) or an
1126
+ # expression statement.
1127
+ if self._check_any(TokenKind.KW_VAR, TokenKind.KW_CONST):
1128
+ init = self._parse_var_or_const()
1129
+ elif self._looks_like_typed_decl():
1130
+ init = self._parse_typed_var_decl()
1131
+ else:
1132
+ init = self._parse_expr_stmt()
1133
+ # var/expr statements consume their trailing `;` already.
1134
+ else:
1135
+ self._advance() # consume the empty-init `;`
1136
+
1137
+ cond: Optional[ast.Expr] = None
1138
+ if not self._check(TokenKind.SEMICOLON):
1139
+ cond = self._parse_expr()
1140
+ self._expect(TokenKind.SEMICOLON, "`;` between `for` clauses")
1141
+
1142
+ step: Optional[ast.Expr] = None
1143
+ if not self._check(TokenKind.RPAREN):
1144
+ step = self._parse_expr()
1145
+ self._expect(TokenKind.RPAREN, "`)` to close `for` header")
1146
+
1147
+ body = self._parse_block()
1148
+ if body is None:
1149
+ return None
1150
+ return ast.ForStmt(
1151
+ init=init,
1152
+ condition=cond,
1153
+ step=step,
1154
+ body=body,
1155
+ span=SourceSpan(start=start.start, end=body.span.end),
1156
+ )
1157
+
1158
+ def _parse_foreach(self) -> Optional[ast.ForeachStmt]:
1159
+ start = self._current_span()
1160
+ self._advance() # foreach
1161
+ self._expect(TokenKind.LPAREN, "`(` after `foreach`")
1162
+
1163
+ var_type: Optional[ast.TypeRef] = None
1164
+ if self._match(TokenKind.KW_VAR):
1165
+ pass # inferred
1166
+ else:
1167
+ var_type = self._parse_type()
1168
+
1169
+ name_tok = self._expect(TokenKind.IDENT, "loop variable name")
1170
+ name = name_tok.lexeme if name_tok else ""
1171
+ self._expect(TokenKind.KW_IN, "`in`")
1172
+ iterable = self._parse_expr()
1173
+ self._expect(TokenKind.RPAREN, "`)` to close `foreach` header")
1174
+ body = self._parse_block()
1175
+ if body is None:
1176
+ return None
1177
+ return ast.ForeachStmt(
1178
+ var_type=var_type,
1179
+ var_name=name,
1180
+ iterable=iterable,
1181
+ body=body,
1182
+ span=SourceSpan(start=start.start, end=body.span.end),
1183
+ )
1184
+
1185
+ def _parse_do_while(self) -> Optional[ast.DoWhileStmt]:
1186
+ start = self._current_span()
1187
+ self._advance() # do
1188
+ body = self._parse_block()
1189
+ if body is None:
1190
+ return None
1191
+ self._expect(TokenKind.KW_WHILE, "`while` after `do` block")
1192
+ self._expect(TokenKind.LPAREN, "`(`")
1193
+ cond = self._parse_expr()
1194
+ self._expect(TokenKind.RPAREN, "`)`")
1195
+ end = self._expect(TokenKind.SEMICOLON, "`;` after do-while")
1196
+ end_span = end.span if end else self._current_span()
1197
+ return ast.DoWhileStmt(
1198
+ body=body,
1199
+ condition=cond,
1200
+ span=SourceSpan(start=start.start, end=end_span.end),
1201
+ )
1202
+
1203
+ def _parse_loop(self):
1204
+ """Parse a `loop` statement.
1205
+
1206
+ Forms:
1207
+ loop (10) { ... } bounded, unrolls at compile time
1208
+ loop (10 as i) { ... } bounded with index variable
1209
+ loop (expr) { ... } eigenrotation (condition-based)
1210
+ loop NAME(cond, args, ...); invoke a loop function
1211
+ (2026-04-30 redesign — see
1212
+ _parse_loop_function_decl)
1213
+
1214
+ Disambiguation by what follows `loop`:
1215
+ IDENT → loop call (new function-decl form)
1216
+ LPAREN → existing bounded/eigenrotation forms
1217
+ """
1218
+ start = self._current_span()
1219
+ self._advance() # loop
1220
+
1221
+ if self._check(TokenKind.IDENT):
1222
+ name_tok = self._advance()
1223
+ full_name = name_tok.lexeme
1224
+ if self._match(TokenKind.DOT):
1225
+ method_tok = self._expect(TokenKind.IDENT,
1226
+ "method name after `.` in loop call")
1227
+ if method_tok is None:
1228
+ self._skip_to_statement_boundary()
1229
+ return None
1230
+ full_name = f"{name_tok.lexeme}.{method_tok.lexeme}"
1231
+ if self._expect(TokenKind.LPAREN, "`(` after loop function name") is None:
1232
+ self._skip_to_statement_boundary()
1233
+ return None
1234
+ condition_arg = self._parse_expr()
1235
+ if condition_arg is None:
1236
+ self._skip_to_statement_boundary()
1237
+ return None
1238
+ state_arg_names: List[str] = []
1239
+ while self._match(TokenKind.COMMA):
1240
+ arg_tok = self._expect(
1241
+ TokenKind.IDENT,
1242
+ "state argument must be an identifier (slot variable name)",
1243
+ )
1244
+ if arg_tok is None:
1245
+ self._skip_to_statement_boundary()
1246
+ return None
1247
+ state_arg_names.append(arg_tok.lexeme)
1248
+ self._expect(TokenKind.RPAREN, "`)` to close loop call argument list")
1249
+ end = self._expect(TokenKind.SEMICOLON, "`;` after loop call")
1250
+ end_span = end.span if end else self._current_span()
1251
+ return ast.LoopCallStmt(
1252
+ name=full_name,
1253
+ condition_arg=condition_arg,
1254
+ state_arg_names=state_arg_names,
1255
+ span=SourceSpan(start=start.start, end=end_span.end),
1256
+ )
1257
+
1258
+ # Existing forms.
1259
+ self._expect(TokenKind.LPAREN, "`(` after `loop`")
1260
+
1261
+ # Try to determine if this is a bounded loop (integer literal)
1262
+ # or a condition-based loop (any other expression).
1263
+ count: Optional[ast.Expr] = None
1264
+ index_var: Optional[str] = None
1265
+ condition: Optional[ast.Expr] = None
1266
+
1267
+ expr = self._parse_expr()
1268
+
1269
+ # Check if this is a bounded loop: the expression is an integer
1270
+ # literal, possibly followed by `as identifier`.
1271
+ if isinstance(expr, ast.IntLiteral):
1272
+ count = expr
1273
+ if self._match(TokenKind.KW_AS):
1274
+ name_tok = self._expect(TokenKind.IDENT, "index variable name after `as`")
1275
+ index_var = name_tok.lexeme if name_tok else "_i"
1276
+ else:
1277
+ # Condition-based (eigenrotation) loop.
1278
+ condition = expr
1279
+
1280
+ self._expect(TokenKind.RPAREN, "`)` to close `loop` header")
1281
+ body = self._parse_block()
1282
+ if body is None:
1283
+ return None
1284
+ return ast.LoopStmt(
1285
+ count=count,
1286
+ index_var=index_var,
1287
+ condition=condition,
1288
+ body=body,
1289
+ span=SourceSpan(start=start.start, end=body.span.end),
1290
+ )
1291
+
1292
+ def _parse_try(self) -> Optional[ast.TryStmt]:
1293
+ start = self._current_span()
1294
+ self._advance() # try
1295
+ try_body = self._parse_block()
1296
+ if try_body is None:
1297
+ return None
1298
+ self._expect(TokenKind.KW_CATCH, "`catch` after `try` block")
1299
+ catch_body = self._parse_block()
1300
+ if catch_body is None:
1301
+ return None
1302
+ return ast.TryStmt(
1303
+ try_body=try_body,
1304
+ catch_body=catch_body,
1305
+ span=SourceSpan(start=start.start, end=catch_body.span.end),
1306
+ )
1307
+
1308
+ def _parse_pass(self):
1309
+ """Parse `pass <expr_or_replace>, ...;` — tail-recursive yield in
1310
+ a loop function body. Each item is either an expression or the
1311
+ `replace` keyword (carries the input value through). The number
1312
+ of items must match the enclosing loop's state-param count;
1313
+ validation happens at codegen.
1314
+ """
1315
+ start = self._current_span()
1316
+ self._advance() # pass
1317
+ values: List = []
1318
+ # `pass;` with zero items would be unusual but parser accepts it
1319
+ # (codegen will catch it if the loop has state params).
1320
+ if not self._check(TokenKind.SEMICOLON):
1321
+ values.append(self._parse_pass_value())
1322
+ while self._match(TokenKind.COMMA):
1323
+ values.append(self._parse_pass_value())
1324
+ end = self._expect(TokenKind.SEMICOLON, "`;` after `pass`")
1325
+ end_span = end.span if end else self._current_span()
1326
+ return ast.PassStmt(
1327
+ values=values,
1328
+ span=SourceSpan(start=start.start, end=end_span.end),
1329
+ )
1330
+
1331
+ def _parse_pass_value(self):
1332
+ """One item in a pass list: either `replace` or a regular expression."""
1333
+ tok = self._peek()
1334
+ if tok.kind is TokenKind.KW_REPLACE:
1335
+ self._advance()
1336
+ return ast.ReplaceMarker(span=tok.span)
1337
+ return self._parse_expr()
1338
+
1339
+ def _parse_return(self) -> Optional[ast.ReturnStmt]:
1340
+ start = self._current_span()
1341
+ self._advance() # return
1342
+ value: Optional[ast.Expr] = None
1343
+ if not self._check(TokenKind.SEMICOLON):
1344
+ value = self._parse_expr()
1345
+ end = self._expect(TokenKind.SEMICOLON, "`;` after `return`")
1346
+ end_span = end.span if end else self._current_span()
1347
+ return ast.ReturnStmt(
1348
+ value=value,
1349
+ span=SourceSpan(start=start.start, end=end_span.end),
1350
+ )
1351
+
1352
+ def _parse_expr_stmt(self) -> Optional[ast.ExprStmt]:
1353
+ start = self._current_span()
1354
+ expr = self._parse_expr()
1355
+ if expr is None:
1356
+ self._skip_to_statement_boundary()
1357
+ return None
1358
+ end = self._expect(TokenKind.SEMICOLON, "`;` after expression")
1359
+ end_span = end.span if end else self._current_span()
1360
+ return ast.ExprStmt(
1361
+ expr=expr,
1362
+ span=SourceSpan(start=start.start, end=end_span.end),
1363
+ )
1364
+
1365
+ # ================================================================
1366
+ # Expressions (Pratt-style via cascaded precedence methods)
1367
+ # ================================================================
1368
+
1369
+ def _parse_expr(self) -> ast.Expr:
1370
+ return self._parse_pipe_forward()
1371
+
1372
+ def _parse_pipe_forward(self) -> ast.Expr:
1373
+ # The `|>` operator is explicitly forbidden by the spec. The
1374
+ # validator emits SUT0110 for every occurrence via a token
1375
+ # walk. We still parse it here as a low-precedence left-assoc
1376
+ # binary operator so the rest of the expression parses cleanly
1377
+ # and the user only sees the root-cause diagnostic, not a
1378
+ # cascade of "expected `;`" recoveries.
1379
+ left = self._parse_assignment()
1380
+ while self._match(TokenKind.PIPE_FORWARD):
1381
+ right = self._parse_assignment()
1382
+ left = ast.BinaryOp(
1383
+ op="|>", left=left, right=right,
1384
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1385
+ )
1386
+ return left
1387
+
1388
+ def _parse_assignment(self) -> ast.Expr:
1389
+ left = self._parse_logical_or()
1390
+ assign_kinds = {
1391
+ TokenKind.ASSIGN: "=",
1392
+ TokenKind.PLUS_ASSIGN: "+=",
1393
+ TokenKind.MINUS_ASSIGN: "-=",
1394
+ TokenKind.STAR_ASSIGN: "*=",
1395
+ TokenKind.SLASH_ASSIGN: "/=",
1396
+ }
1397
+ if self._peek().kind in assign_kinds:
1398
+ op_tok = self._advance()
1399
+ op = assign_kinds[op_tok.kind]
1400
+ value = self._parse_assignment()
1401
+ return ast.Assignment(
1402
+ op=op,
1403
+ target=left,
1404
+ value=value,
1405
+ span=SourceSpan(start=left.span.start, end=value.span.end),
1406
+ )
1407
+ return left
1408
+
1409
+ # Logical operator precedence (lowest to highest):
1410
+ # || or (parse_logical_or)
1411
+ # xor xnor iff nand (parse_logical_xor)
1412
+ # && and (parse_logical_and)
1413
+ # == != (parse_equality)
1414
+ # < <= > >= (parse_comparison)
1415
+ # Symbolic and keyword forms produce the same op-string in the
1416
+ # AST so the inliner can lower uniformly. The keyword forms
1417
+ # (`and`, `or`, `nand`, `xor`, `xnor`, `iff`, `not`) are
1418
+ # CONTEXTUAL — they lex as IDENT and the parser checks their
1419
+ # lowercased lexeme here so user identifiers with the same
1420
+ # spelling (e.g. `Iff`, `Nand`) keep working.
1421
+ _LOGICAL_OR_KW = {"or"} # binary, op="||"
1422
+ _LOGICAL_XOR_KW = { # binary, op as named
1423
+ "xor": "xor",
1424
+ "xnor": "xnor",
1425
+ "iff": "xnor",
1426
+ "nand": "nand",
1427
+ }
1428
+ _LOGICAL_AND_KW = {"and"} # binary, op="&&"
1429
+ _LOGICAL_NOT_KW = {"not"} # unary, op="!"
1430
+
1431
+ def _ident_lex_lower(self) -> Optional[str]:
1432
+ """Return the lowercased lexeme of the current token if it's
1433
+ an IDENT, else None. Used by the logical-keyword check."""
1434
+ tok = self._peek()
1435
+ if tok.kind is TokenKind.IDENT:
1436
+ return tok.lexeme.lower()
1437
+ return None
1438
+
1439
+ def _parse_logical_or(self) -> ast.Expr:
1440
+ left = self._parse_logical_xor()
1441
+ while True:
1442
+ tok = self._peek()
1443
+ ident_lower = self._ident_lex_lower()
1444
+ if tok.kind is TokenKind.OR:
1445
+ self._advance()
1446
+ op = "||"
1447
+ elif ident_lower in self._LOGICAL_OR_KW:
1448
+ self._advance()
1449
+ op = "||"
1450
+ else:
1451
+ break
1452
+ right = self._parse_logical_xor()
1453
+ left = ast.BinaryOp(
1454
+ op=op, left=left, right=right,
1455
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1456
+ )
1457
+ return left
1458
+
1459
+ def _parse_logical_xor(self) -> ast.Expr:
1460
+ left = self._parse_logical_and()
1461
+ while True:
1462
+ ident_lower = self._ident_lex_lower()
1463
+ if ident_lower in self._LOGICAL_XOR_KW:
1464
+ op = self._LOGICAL_XOR_KW[ident_lower]
1465
+ self._advance()
1466
+ else:
1467
+ break
1468
+ right = self._parse_logical_and()
1469
+ left = ast.BinaryOp(
1470
+ op=op, left=left, right=right,
1471
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1472
+ )
1473
+ return left
1474
+
1475
+ def _parse_logical_and(self) -> ast.Expr:
1476
+ left = self._parse_equality()
1477
+ while True:
1478
+ tok = self._peek()
1479
+ ident_lower = self._ident_lex_lower()
1480
+ if tok.kind is TokenKind.AND:
1481
+ self._advance()
1482
+ op = "&&"
1483
+ elif ident_lower in self._LOGICAL_AND_KW:
1484
+ self._advance()
1485
+ op = "&&"
1486
+ else:
1487
+ break
1488
+ right = self._parse_equality()
1489
+ left = ast.BinaryOp(
1490
+ op=op, left=left, right=right,
1491
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1492
+ )
1493
+ return left
1494
+
1495
+ _CHAIN_COMPARISON_TOKENS = frozenset({
1496
+ TokenKind.EQ, TokenKind.NEQ,
1497
+ TokenKind.LT, TokenKind.GT, TokenKind.LE, TokenKind.GE,
1498
+ })
1499
+
1500
+ def _parse_equality(self) -> ast.Expr:
1501
+ # Equality + comparison are merged into one chain-aware parser.
1502
+ # Python's chained-comparison semantics with Sutra-specific
1503
+ # reductions for transitive same-op chains.
1504
+ return self._parse_chained_comparison()
1505
+
1506
+ def _parse_comparison(self) -> ast.Expr:
1507
+ # Kept as a separate level for additive-precedence callers
1508
+ # that don't want chain detection. Today only _parse_equality
1509
+ # is the entry; this method is here so subclassing parsers
1510
+ # that want plain non-chained comparison can override.
1511
+ return self._parse_chained_comparison()
1512
+
1513
+ def _parse_chained_comparison(self) -> ast.Expr:
1514
+ first = self._parse_additive()
1515
+ # Collect a chain of (op_string, operand) pairs.
1516
+ ops: List[str] = []
1517
+ operands: List[ast.Expr] = [first]
1518
+ while self._peek().kind in self._CHAIN_COMPARISON_TOKENS:
1519
+ op_tok = self._advance()
1520
+ op = op_tok.lexeme
1521
+ # Token lexeme differs from canonical op string for !=
1522
+ if op_tok.kind is TokenKind.NEQ:
1523
+ op = "!="
1524
+ elif op_tok.kind is TokenKind.EQ:
1525
+ op = "=="
1526
+ right = self._parse_additive()
1527
+ ops.append(op)
1528
+ operands.append(right)
1529
+ if not ops:
1530
+ return first
1531
+ if len(ops) == 1:
1532
+ # Single comparison: emit BinaryOp as before.
1533
+ return ast.BinaryOp(
1534
+ op=ops[0],
1535
+ left=operands[0],
1536
+ right=operands[1],
1537
+ span=SourceSpan(
1538
+ start=operands[0].span.start,
1539
+ end=operands[1].span.end,
1540
+ ),
1541
+ )
1542
+ span = SourceSpan(
1543
+ start=operands[0].span.start,
1544
+ end=operands[-1].span.end,
1545
+ )
1546
+ op_set = set(ops)
1547
+ # Uniform `==` chain.
1548
+ if op_set == {"=="}:
1549
+ return ast.Call(
1550
+ callee=ast.Identifier(name="Equals", span=span),
1551
+ type_args=[],
1552
+ args=operands,
1553
+ span=span,
1554
+ )
1555
+ # Uniform strict-ordering chain.
1556
+ if op_set == {"<"}:
1557
+ return ast.Call(
1558
+ callee=ast.Identifier(name="hasOrder", span=span),
1559
+ type_args=[],
1560
+ args=operands,
1561
+ span=span,
1562
+ )
1563
+ if op_set == {">"}:
1564
+ return ast.Call(
1565
+ callee=ast.Identifier(name="hasOrder", span=span),
1566
+ type_args=[],
1567
+ args=list(reversed(operands)),
1568
+ span=span,
1569
+ )
1570
+ if op_set == {"<="}:
1571
+ return ast.Call(
1572
+ callee=ast.Identifier(name="hasOrderOrEqual", span=span),
1573
+ type_args=[],
1574
+ args=operands,
1575
+ span=span,
1576
+ )
1577
+ if op_set == {">="}:
1578
+ return ast.Call(
1579
+ callee=ast.Identifier(name="hasOrderOrEqual", span=span),
1580
+ type_args=[],
1581
+ args=list(reversed(operands)),
1582
+ span=span,
1583
+ )
1584
+ # Mixed `==` + uniform-direction ordering — group adjacent
1585
+ # `==` operands and pass the groups as args to hasOrder /
1586
+ # hasOrderOrEqual. Each group is either a bare operand or a
1587
+ # `Call(Equals, [members])` (multi-element). Args always in
1588
+ # ascending order — descending source has its group list
1589
+ # reversed before the Call is built.
1590
+ ordering_ops_set = {"<", "<=", ">", ">="}
1591
+ if "!=" not in op_set and op_set.issubset({"=="} | ordering_ops_set):
1592
+ ascending_set = {"<", "<="}
1593
+ descending_set = {">", ">="}
1594
+ present_ordering = op_set & ordering_ops_set
1595
+ is_ascending = bool(present_ordering) and present_ordering.issubset(ascending_set)
1596
+ is_descending = bool(present_ordering) and present_ordering.issubset(descending_set)
1597
+ if is_ascending or is_descending:
1598
+ # Walk left-to-right, gathering adjacent == operands
1599
+ # into one group and starting a new group at every
1600
+ # ordering op.
1601
+ groups: List[List[ast.Expr]] = []
1602
+ current = [operands[0]]
1603
+ for i, op_i in enumerate(ops):
1604
+ nxt = operands[i + 1]
1605
+ if op_i == "==":
1606
+ current.append(nxt)
1607
+ else:
1608
+ groups.append(current)
1609
+ current = [nxt]
1610
+ groups.append(current)
1611
+ # Single-element groups stay flat; multi-element
1612
+ # groups wrap in Equals(...).
1613
+ group_args: List[ast.Expr] = []
1614
+ for g in groups:
1615
+ if len(g) == 1:
1616
+ group_args.append(g[0])
1617
+ else:
1618
+ gspan = SourceSpan(
1619
+ start=g[0].span.start, end=g[-1].span.end,
1620
+ )
1621
+ group_args.append(ast.Call(
1622
+ callee=ast.Identifier(name="Equals", span=gspan),
1623
+ type_args=[],
1624
+ args=g,
1625
+ span=gspan,
1626
+ ))
1627
+ if is_descending:
1628
+ group_args = list(reversed(group_args))
1629
+ # Any non-strict ordering op present -> hasOrderOrEqual.
1630
+ non_strict = bool(present_ordering & {"<=", ">="})
1631
+ callee_name = "hasOrderOrEqual" if non_strict else "hasOrder"
1632
+ return ast.Call(
1633
+ callee=ast.Identifier(name=callee_name, span=span),
1634
+ type_args=[],
1635
+ args=group_args,
1636
+ span=span,
1637
+ )
1638
+ # Fallback: AND-chain expansion of pairwise BinaryOps. Each
1639
+ # pair goes through the inliner's normal comparison-lowering
1640
+ # pipeline (`<` -> `lt(a, b)` -> `b > a`, etc.), so the final
1641
+ # emitted form is the polynomial chain.
1642
+ and_chain: ast.Expr = ast.BinaryOp(
1643
+ op=ops[0],
1644
+ left=operands[0],
1645
+ right=operands[1],
1646
+ span=SourceSpan(
1647
+ start=operands[0].span.start, end=operands[1].span.end,
1648
+ ),
1649
+ )
1650
+ for i, op in enumerate(ops[1:], start=1):
1651
+ pair = ast.BinaryOp(
1652
+ op=op,
1653
+ left=operands[i],
1654
+ right=operands[i + 1],
1655
+ span=SourceSpan(
1656
+ start=operands[i].span.start,
1657
+ end=operands[i + 1].span.end,
1658
+ ),
1659
+ )
1660
+ and_chain = ast.BinaryOp(
1661
+ op="&&",
1662
+ left=and_chain,
1663
+ right=pair,
1664
+ span=SourceSpan(
1665
+ start=and_chain.span.start, end=pair.span.end,
1666
+ ),
1667
+ )
1668
+ return and_chain
1669
+
1670
+ def _parse_additive(self) -> ast.Expr:
1671
+ left = self._parse_multiplicative()
1672
+ while self._peek().kind in (TokenKind.PLUS, TokenKind.MINUS):
1673
+ op_tok = self._advance()
1674
+ right = self._parse_multiplicative()
1675
+ op = op_tok.lexeme
1676
+ left = ast.BinaryOp(
1677
+ op=op, left=left, right=right,
1678
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1679
+ )
1680
+ return left
1681
+
1682
+ def _parse_multiplicative(self) -> ast.Expr:
1683
+ left = self._parse_unary()
1684
+ while self._peek().kind in (
1685
+ TokenKind.STAR, TokenKind.SLASH, TokenKind.PERCENT
1686
+ ):
1687
+ op_tok = self._advance()
1688
+ right = self._parse_unary()
1689
+ op = op_tok.lexeme
1690
+ left = ast.BinaryOp(
1691
+ op=op, left=left, right=right,
1692
+ span=SourceSpan(start=left.span.start, end=right.span.end),
1693
+ )
1694
+ return left
1695
+
1696
+ def _parse_unary(self) -> ast.Expr:
1697
+ # `!`, `~`, and the `not` keyword (case-insensitive, contextual
1698
+ # — lexes as IDENT) all produce the same UnaryOp("!") AST so
1699
+ # the inliner lowers them uniformly to logical_not. `+` and
1700
+ # `-` stay as arithmetic unary operators.
1701
+ kind = self._peek().kind
1702
+ ident_lower = self._ident_lex_lower()
1703
+ is_logical_not = (
1704
+ kind in (TokenKind.BANG, TokenKind.TILDE)
1705
+ or ident_lower in self._LOGICAL_NOT_KW
1706
+ )
1707
+ if is_logical_not:
1708
+ op_tok = self._advance()
1709
+ operand = self._parse_unary()
1710
+ return ast.UnaryOp(
1711
+ op="!",
1712
+ operand=operand,
1713
+ span=SourceSpan(start=op_tok.span.start, end=operand.span.end),
1714
+ )
1715
+ if kind in (TokenKind.MINUS, TokenKind.PLUS):
1716
+ op_tok = self._advance()
1717
+ operand = self._parse_unary()
1718
+ return ast.UnaryOp(
1719
+ op=op_tok.lexeme,
1720
+ operand=operand,
1721
+ span=SourceSpan(start=op_tok.span.start, end=operand.span.end),
1722
+ )
1723
+ return self._parse_postfix()
1724
+
1725
+ def _parse_postfix(self) -> ast.Expr:
1726
+ expr = self._parse_primary()
1727
+ while True:
1728
+ tok = self._peek()
1729
+ if tok.kind is TokenKind.DOT:
1730
+ self._advance()
1731
+ member_tok = self._expect(TokenKind.IDENT, "member name")
1732
+ if member_tok is None:
1733
+ return expr
1734
+ expr = ast.MemberAccess(
1735
+ obj=expr,
1736
+ member=member_tok.lexeme,
1737
+ span=SourceSpan(start=expr.span.start, end=member_tok.span.end),
1738
+ )
1739
+ continue
1740
+ if tok.kind is TokenKind.LPAREN:
1741
+ args, end_pos = self._parse_arg_list()
1742
+ expr = ast.Call(
1743
+ callee=expr,
1744
+ type_args=[],
1745
+ args=args,
1746
+ span=SourceSpan(start=expr.span.start, end=end_pos),
1747
+ )
1748
+ continue
1749
+ if tok.kind is TokenKind.LT and self._looks_like_generic_call():
1750
+ type_args = self._parse_type_arg_list()
1751
+ args, end_pos = self._parse_arg_list()
1752
+ expr = ast.Call(
1753
+ callee=expr,
1754
+ type_args=type_args,
1755
+ args=args,
1756
+ span=SourceSpan(start=expr.span.start, end=end_pos),
1757
+ )
1758
+ continue
1759
+ if tok.kind is TokenKind.LBRACKET:
1760
+ # Postfix subscript: `target[index]`. Used for map
1761
+ # lookups and (future) array indexing.
1762
+ self._advance()
1763
+ index = self._parse_expr()
1764
+ close = self._expect(
1765
+ TokenKind.RBRACKET, "`]` to close subscript"
1766
+ )
1767
+ end = close.span.end if close else self._current_span().end
1768
+ expr = ast.Subscript(
1769
+ target=expr,
1770
+ index=index,
1771
+ span=SourceSpan(start=expr.span.start, end=end),
1772
+ )
1773
+ continue
1774
+ if tok.kind in (TokenKind.PLUS_PLUS, TokenKind.MINUS_MINUS):
1775
+ self._advance()
1776
+ expr = ast.PostfixOp(
1777
+ op=tok.lexeme,
1778
+ operand=expr,
1779
+ span=SourceSpan(start=expr.span.start, end=tok.span.end),
1780
+ )
1781
+ continue
1782
+ break
1783
+ return expr
1784
+
1785
+ def _looks_like_generic_call(self) -> bool:
1786
+ """Peek ahead to decide if `<` opens a generic call.
1787
+
1788
+ Pattern: `< type (, type)* > (`
1789
+ We require the closing `>` to appear before any token that
1790
+ wouldn't fit in a type list, and we require a `(` immediately
1791
+ after the `>`.
1792
+ """
1793
+ assert self._peek().kind is TokenKind.LT
1794
+ offset = 1
1795
+ depth = 1
1796
+ while self._pos + offset < len(self.tokens):
1797
+ k = self._peek(offset).kind
1798
+ if k is TokenKind.LT:
1799
+ depth += 1
1800
+ elif k is TokenKind.GT:
1801
+ depth -= 1
1802
+ if depth == 0:
1803
+ return self._peek(offset + 1).kind is TokenKind.LPAREN
1804
+ elif k in (
1805
+ TokenKind.IDENT,
1806
+ TokenKind.COMMA,
1807
+ TokenKind.DOT,
1808
+ ):
1809
+ pass
1810
+ else:
1811
+ return False
1812
+ offset += 1
1813
+ return False
1814
+
1815
+ def _parse_type_arg_list(self) -> List[ast.TypeRef]:
1816
+ self._expect(TokenKind.LT, "`<`")
1817
+ args: List[ast.TypeRef] = []
1818
+ while True:
1819
+ t = self._parse_type()
1820
+ if t is None:
1821
+ break
1822
+ args.append(t)
1823
+ if self._match(TokenKind.COMMA):
1824
+ continue
1825
+ break
1826
+ self._expect(TokenKind.GT, "`>`")
1827
+ return args
1828
+
1829
+ def _parse_arg_list(self) -> Tuple[List[ast.Expr], SourcePosition]:
1830
+ self._expect(TokenKind.LPAREN, "`(`")
1831
+ args: List[ast.Expr] = []
1832
+ if self._check(TokenKind.RPAREN):
1833
+ close = self._advance()
1834
+ return args, close.span.end
1835
+ while True:
1836
+ expr = self._parse_expr()
1837
+ args.append(expr)
1838
+ if self._match(TokenKind.COMMA):
1839
+ continue
1840
+ break
1841
+ close = self._expect(TokenKind.RPAREN, "`)` to close argument list")
1842
+ end = close.span.end if close else self._current_span().end
1843
+ return args, end
1844
+
1845
+ # ----------------------------------------------------------------
1846
+ # Primary expressions
1847
+ # ----------------------------------------------------------------
1848
+
1849
+ def _parse_primary(self) -> ast.Expr:
1850
+ tok = self._peek()
1851
+
1852
+ if tok.kind is TokenKind.INT_LIT:
1853
+ self._advance()
1854
+ return ast.IntLiteral(value=int(tok.value) if tok.value is not None else 0, span=tok.span)
1855
+ if tok.kind is TokenKind.FLOAT_LIT:
1856
+ self._advance()
1857
+ return ast.FloatLiteral(value=float(tok.value) if tok.value is not None else 0.0, span=tok.span)
1858
+ if tok.kind is TokenKind.IMAG_LIT:
1859
+ self._advance()
1860
+ return ast.ImaginaryLiteral(
1861
+ value=float(tok.value) if tok.value is not None else 0.0,
1862
+ span=tok.span,
1863
+ )
1864
+ if tok.kind is TokenKind.STRING_LIT:
1865
+ self._advance()
1866
+ return ast.StringLiteral(value=str(tok.value) if tok.value is not None else "", span=tok.span)
1867
+ if tok.kind is TokenKind.CHAR_LIT:
1868
+ self._advance()
1869
+ return ast.CharLiteral(value=int(tok.value) if tok.value is not None else 0, span=tok.span)
1870
+ if tok.kind is TokenKind.STRING_INTERP_START:
1871
+ return self._parse_interp_string()
1872
+ if tok.kind is TokenKind.TRUE:
1873
+ self._advance()
1874
+ return ast.BoolLiteral(value=True, span=tok.span)
1875
+ if tok.kind is TokenKind.FALSE:
1876
+ self._advance()
1877
+ return ast.BoolLiteral(value=False, span=tok.span)
1878
+ if tok.kind is TokenKind.KW_UNKNOWN:
1879
+ self._advance()
1880
+ return ast.UnknownLiteral(span=tok.span)
1881
+ if tok.kind is TokenKind.KW_WAIT:
1882
+ # `wait` parses as a primary expression so the rest of the
1883
+ # declaration grammar (`int i = wait;`) works. Position
1884
+ # restriction (only as a var-decl initializer) is enforced
1885
+ # by the validator, not the parser — same approach used
1886
+ # for other context-sensitive constructs.
1887
+ self._advance()
1888
+ return ast.WaitLiteral(span=tok.span)
1889
+ if tok.kind is TokenKind.KW_THIS:
1890
+ self._advance()
1891
+ return ast.ThisExpr(span=tok.span)
1892
+ if tok.kind is TokenKind.IDENT:
1893
+ # Handle special built-in calls syntactically.
1894
+ if tok.lexeme in _SPECIAL_CALL_NAMES:
1895
+ return self._parse_special_call(tok)
1896
+ self._advance()
1897
+ return ast.Identifier(name=tok.lexeme, span=tok.span)
1898
+ if tok.kind is TokenKind.KW_FUNCTION and self._peek(1).kind is TokenKind.DOT:
1899
+ # The `function.` disambiguation prefix: documented in
1900
+ # examples/02-functions-vs-methods.su. Resolves an ambiguous
1901
+ # bareword call to the free-function namespace. We treat
1902
+ # the literal `function` keyword as an identifier in this
1903
+ # position so the rest of the postfix chain parses normally.
1904
+ self._advance()
1905
+ return ast.Identifier(name="function", span=tok.span)
1906
+ if tok.kind is TokenKind.LPAREN:
1907
+ return self._parse_paren_or_cast()
1908
+ if tok.kind is TokenKind.LBRACKET:
1909
+ return self._parse_array_literal()
1910
+ if tok.kind is TokenKind.LBRACE:
1911
+ return self._parse_map_literal()
1912
+
1913
+ # Unknown — emit error and return a placeholder identifier so
1914
+ # higher-level code keeps making progress.
1915
+ self.diagnostics.error(
1916
+ f"expected expression, got {self._describe(tok)}",
1917
+ tok.span,
1918
+ code="SUT0104",
1919
+ )
1920
+ self._advance()
1921
+ return ast.Identifier(name="<error>", span=tok.span)
1922
+
1923
+ def _parse_interp_string(self) -> ast.InterpolatedString:
1924
+ start_tok = self._advance() # STRING_INTERP_START
1925
+ parts: List[Union[str, ast.Expr]] = []
1926
+ while True:
1927
+ tok = self._peek()
1928
+ if tok.kind is TokenKind.STRING_INTERP_END:
1929
+ end = self._advance()
1930
+ return ast.InterpolatedString(
1931
+ parts=parts,
1932
+ span=SourceSpan(start=start_tok.span.start, end=end.span.end),
1933
+ )
1934
+ if tok.kind is TokenKind.STRING_LIT_CHUNK:
1935
+ self._advance()
1936
+ parts.append(str(tok.value) if tok.value is not None else tok.lexeme)
1937
+ continue
1938
+ if tok.kind is TokenKind.INTERP_OPEN:
1939
+ self._advance()
1940
+ expr = self._parse_expr()
1941
+ self._expect(TokenKind.INTERP_CLOSE, "`}` to close interpolation")
1942
+ parts.append(expr)
1943
+ continue
1944
+ # Anything else inside an interpolated string is a lexer
1945
+ # bug (or EOF after unterminated literal). Bail.
1946
+ self.diagnostics.error(
1947
+ "unterminated interpolated string literal",
1948
+ tok.span,
1949
+ code="SUT0002",
1950
+ )
1951
+ return ast.InterpolatedString(
1952
+ parts=parts,
1953
+ span=SourceSpan(start=start_tok.span.start, end=tok.span.end),
1954
+ )
1955
+
1956
+ def _parse_map_literal(self) -> ast.Expr:
1957
+ """Parse `{k1: v1, k2: v2, ...}` — an inline map literal.
1958
+
1959
+ Only called from `_parse_primary`, so we're guaranteed to be
1960
+ in expression position. Block statements are handled by
1961
+ `_parse_statement` before any expression parsing begins, so
1962
+ the only way to reach this helper is from inside an
1963
+ expression context (after `=`, `return`, as a call argument,
1964
+ etc.). An empty map literal `{}` is legal; trailing commas
1965
+ are not, to match the rest of the grammar.
1966
+ """
1967
+ lbrace = self._advance() # consume {
1968
+ keys: List[ast.Expr] = []
1969
+ values: List[ast.Expr] = []
1970
+ if self._check(TokenKind.RBRACE):
1971
+ close = self._advance()
1972
+ return ast.MapLiteral(
1973
+ keys=keys,
1974
+ values=values,
1975
+ span=SourceSpan(start=lbrace.span.start, end=close.span.end),
1976
+ )
1977
+ while True:
1978
+ key = self._parse_expr()
1979
+ self._expect(TokenKind.COLON, "`:` between map key and value")
1980
+ value = self._parse_expr()
1981
+ keys.append(key)
1982
+ values.append(value)
1983
+ if self._match(TokenKind.COMMA):
1984
+ continue
1985
+ break
1986
+ close = self._expect(TokenKind.RBRACE, "`}` to close map literal")
1987
+ end = close.span.end if close else self._current_span().end
1988
+ return ast.MapLiteral(
1989
+ keys=keys,
1990
+ values=values,
1991
+ span=SourceSpan(start=lbrace.span.start, end=end),
1992
+ )
1993
+
1994
+ def _parse_array_literal(self) -> ast.Expr:
1995
+ """Parse `[elem, elem, ...]` — an inline array literal.
1996
+
1997
+ Called from `_parse_primary` when the current token is `[`.
1998
+ An empty array literal `[]` is legal; trailing commas are not
1999
+ permitted (matches the rest of the expression grammar).
2000
+ """
2001
+ lbracket = self._advance() # consume [
2002
+ elements: List[ast.Expr] = []
2003
+ if self._check(TokenKind.RBRACKET):
2004
+ close = self._advance()
2005
+ return ast.ArrayLiteral(
2006
+ elements=elements,
2007
+ span=SourceSpan(start=lbracket.span.start, end=close.span.end),
2008
+ )
2009
+ while True:
2010
+ elements.append(self._parse_expr())
2011
+ if self._match(TokenKind.COMMA):
2012
+ continue
2013
+ break
2014
+ close = self._expect(TokenKind.RBRACKET, "`]` to close array literal")
2015
+ end = close.span.end if close else self._current_span().end
2016
+ return ast.ArrayLiteral(
2017
+ elements=elements,
2018
+ span=SourceSpan(start=lbracket.span.start, end=end),
2019
+ )
2020
+
2021
+ def _parse_paren_or_cast(self) -> ast.Expr:
2022
+ # Save state so we can rewind if the cast attempt fails.
2023
+ save = self._pos
2024
+ lparen = self._advance() # (
2025
+
2026
+ # Try to read a type followed by `)` followed by a token that
2027
+ # starts a unary expression. If that succeeds, it's a cast.
2028
+ type_ref = self._try_parse_type_for_cast()
2029
+ if (
2030
+ type_ref is not None
2031
+ and self._check(TokenKind.RPAREN)
2032
+ and self._peek(1).kind in _EXPR_START_TOKENS
2033
+ and self._peek(1).kind is not TokenKind.LPAREN # avoid ambiguity with call
2034
+ ):
2035
+ self._advance() # )
2036
+ operand = self._parse_unary()
2037
+ return ast.CastExpr(
2038
+ target_type=type_ref,
2039
+ expr=operand,
2040
+ span=SourceSpan(start=lparen.span.start, end=operand.span.end),
2041
+ )
2042
+
2043
+ # Not a cast — rewind and parse as a parenthesized expression.
2044
+ self._pos = save
2045
+ self._advance() # (
2046
+ inner = self._parse_expr()
2047
+ close = self._expect(TokenKind.RPAREN, "`)` to close parenthesized expression")
2048
+ end = close.span.end if close else inner.span.end
2049
+ return ast.Parenthesized(
2050
+ inner=inner,
2051
+ span=SourceSpan(start=lparen.span.start, end=end),
2052
+ )
2053
+
2054
+ def _try_parse_type_for_cast(self) -> Optional[ast.TypeRef]:
2055
+ """Attempt to parse a type without committing to it.
2056
+
2057
+ Returns None on failure and rewinds its own position. The
2058
+ caller is responsible for deciding whether to commit based on
2059
+ what follows.
2060
+ """
2061
+ save = self._pos
2062
+ tok = self._peek()
2063
+ if tok.kind is not TokenKind.IDENT:
2064
+ return None
2065
+ t = self._parse_type()
2066
+ if t is None:
2067
+ self._pos = save
2068
+ return None
2069
+ return t
2070
+
2071
+ def _parse_special_call(self, name_tok: Token) -> ast.Expr:
2072
+ name = name_tok.lexeme
2073
+ self._advance() # name
2074
+ type_args: List[ast.TypeRef] = []
2075
+ if self._check(TokenKind.LT):
2076
+ type_args = self._parse_type_arg_list()
2077
+ if not self._expect(TokenKind.LPAREN, f"`(` after `{name}`"):
2078
+ return ast.Identifier(name=name, span=name_tok.span)
2079
+ inner = self._parse_expr()
2080
+ close = self._expect(TokenKind.RPAREN, f"`)` to close `{name}` call")
2081
+ end = close.span.end if close else inner.span.end
2082
+ full_span = SourceSpan(start=name_tok.span.start, end=end)
2083
+
2084
+ if name == "unsafeCast":
2085
+ if not type_args:
2086
+ self.diagnostics.error(
2087
+ "`unsafeCast` requires a type argument: `unsafeCast<Type>(value)`",
2088
+ full_span,
2089
+ code="SUT0105",
2090
+ )
2091
+ return ast.UnsafeCastExpr(
2092
+ target_type=ast.TypeRef(name="<missing>", type_args=[], span=full_span),
2093
+ expr=inner,
2094
+ span=full_span,
2095
+ )
2096
+ return ast.UnsafeCastExpr(
2097
+ target_type=type_args[0], expr=inner, span=full_span
2098
+ )
2099
+ if name == "unsafeOverride":
2100
+ return ast.UnsafeOverrideExpr(expr=inner, span=full_span)
2101
+ if name == "defuzzy":
2102
+ return ast.DefuzzyExpr(expr=inner, span=full_span)
2103
+ if name == "embed":
2104
+ return ast.EmbedExpr(expr=inner, span=full_span)
2105
+
2106
+ # Shouldn't get here because we checked _SPECIAL_CALL_NAMES.
2107
+ return ast.Call(
2108
+ callee=ast.Identifier(name=name, span=name_tok.span),
2109
+ type_args=type_args,
2110
+ args=[inner],
2111
+ span=full_span,
2112
+ )