snail-lang 0.6.1__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {snail_lang-0.6.1 → snail_lang-0.6.2}/Cargo.lock +7 -7
  2. {snail_lang-0.6.1 → snail_lang-0.6.2}/PKG-INFO +15 -3
  3. {snail_lang-0.6.1 → snail_lang-0.6.2}/README.md +14 -2
  4. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/Cargo.toml +1 -1
  5. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/Cargo.toml +1 -1
  6. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/src/lib.rs +22 -10
  7. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/Cargo.toml +1 -1
  8. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/Cargo.toml +1 -1
  9. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/Cargo.toml +1 -1
  10. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/lib.rs +146 -20
  11. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/snail.pest +12 -1
  12. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/errors.rs +39 -14
  13. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/Cargo.toml +1 -1
  14. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/src/lib.rs +22 -15
  15. {snail_lang-0.6.1 → snail_lang-0.6.2}/pyproject.toml +1 -1
  16. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/cli.py +7 -7
  17. {snail_lang-0.6.1 → snail_lang-0.6.2}/Cargo.toml +0 -0
  18. {snail_lang-0.6.1 → snail_lang-0.6.2}/LICENSE +0 -0
  19. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/README.md +0 -0
  20. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/ast.rs +0 -0
  21. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/awk.rs +0 -0
  22. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/lib.rs +0 -0
  23. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/README.md +0 -0
  24. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/README.md +0 -0
  25. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/src/lib.rs +0 -0
  26. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/README.md +0 -0
  27. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/awk.rs +0 -0
  28. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/constants.rs +0 -0
  29. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/expr.rs +0 -0
  30. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/helpers.rs +0 -0
  31. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/lib.rs +0 -0
  32. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/map.rs +0 -0
  33. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/operators.rs +0 -0
  34. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/program.rs +0 -0
  35. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/py_ast.rs +0 -0
  36. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/stmt.rs +0 -0
  37. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/README.md +0 -0
  38. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/awk.rs +0 -0
  39. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/expr.rs +0 -0
  40. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/literal.rs +0 -0
  41. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/stmt.rs +0 -0
  42. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/string.rs +0 -0
  43. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/util.rs +0 -0
  44. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/common.rs +0 -0
  45. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/parser.rs +0 -0
  46. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/statements.rs +0 -0
  47. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/syntax_expressions.rs +0 -0
  48. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/syntax_strings.rs +0 -0
  49. {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/build.rs +0 -0
  50. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/__init__.py +0 -0
  51. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/__init__.py +0 -0
  52. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/augmented.py +0 -0
  53. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/compact_try.py +0 -0
  54. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/lazy_text.py +0 -0
  55. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/regex.py +0 -0
  56. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/structured_accessor.py +0 -0
  57. {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/subprocess.py +0 -0
@@ -485,11 +485,11 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
485
485
 
486
486
  [[package]]
487
487
  name = "snail-ast"
488
- version = "0.6.1"
488
+ version = "0.6.2"
489
489
 
490
490
  [[package]]
491
491
  name = "snail-core"
492
- version = "0.6.1"
492
+ version = "0.6.2"
493
493
  dependencies = [
494
494
  "pyo3",
495
495
  "snail-ast",
@@ -500,14 +500,14 @@ dependencies = [
500
500
 
501
501
  [[package]]
502
502
  name = "snail-error"
503
- version = "0.6.1"
503
+ version = "0.6.2"
504
504
  dependencies = [
505
505
  "snail-ast",
506
506
  ]
507
507
 
508
508
  [[package]]
509
509
  name = "snail-lower"
510
- version = "0.6.1"
510
+ version = "0.6.2"
511
511
  dependencies = [
512
512
  "pyo3",
513
513
  "snail-ast",
@@ -516,7 +516,7 @@ dependencies = [
516
516
 
517
517
  [[package]]
518
518
  name = "snail-parser"
519
- version = "0.6.1"
519
+ version = "0.6.2"
520
520
  dependencies = [
521
521
  "pest",
522
522
  "pest_derive",
@@ -526,7 +526,7 @@ dependencies = [
526
526
 
527
527
  [[package]]
528
528
  name = "snail-proptest"
529
- version = "0.6.1"
529
+ version = "0.6.2"
530
530
  dependencies = [
531
531
  "proptest",
532
532
  "pyo3",
@@ -540,7 +540,7 @@ dependencies = [
540
540
 
541
541
  [[package]]
542
542
  name = "snail-python"
543
- version = "0.6.1"
543
+ version = "0.6.2"
544
544
  dependencies = [
545
545
  "pyo3",
546
546
  "snail-core",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snail-lang
3
- Version: 0.6.1
3
+ Version: 0.6.2
4
4
  Requires-Dist: jmespath>=1.0.1
5
5
  Requires-Dist: maturin>=1.5 ; extra == 'dev'
6
6
  Requires-Dist: pytest ; extra == 'dev'
@@ -57,8 +57,10 @@ semicolons are optional. You can separate statements with newlines.
57
57
  Process files line-by-line with familiar awk semantics:
58
58
 
59
59
  ```snail-awk("hello world\nfoo bar\n")
60
+ BEGIN { print("start") }
60
61
  /hello/ { print("matched:", $0) }
61
62
  { print($1, "->", $2) }
63
+ END { print("done") }
62
64
  ```
63
65
 
64
66
  **Built-in variables:**
@@ -73,7 +75,11 @@ Process files line-by-line with familiar awk semantics:
73
75
  | `$p` | Current file path |
74
76
  | `$m` | Last regex match object |
75
77
 
76
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
78
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
79
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
80
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
81
+ `BEGIN` and `END` are reserved keywords in all modes.
82
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
77
83
  ```bash
78
84
  echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
79
85
  ```
@@ -83,8 +89,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
83
89
  Process files one at a time instead of line-by-line:
84
90
 
85
91
  ```snail-map
92
+ BEGIN { print("start") }
86
93
  print("File:", $src)
87
94
  print("Size:", len($text), "bytes")
95
+ END { print("done") }
88
96
  ```
89
97
 
90
98
  **Built-in variables:**
@@ -95,7 +103,11 @@ print("Size:", len($text), "bytes")
95
103
  | `$fd` | Open file handle for the current file |
96
104
  | `$text` | Lazy text view of the current file contents |
97
105
 
98
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
106
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
107
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
108
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
109
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
110
+ `BEGIN` and `END` are reserved keywords in all modes.
99
111
  ```bash
100
112
  snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
101
113
  ```
@@ -45,8 +45,10 @@ semicolons are optional. You can separate statements with newlines.
45
45
  Process files line-by-line with familiar awk semantics:
46
46
 
47
47
  ```snail-awk("hello world\nfoo bar\n")
48
+ BEGIN { print("start") }
48
49
  /hello/ { print("matched:", $0) }
49
50
  { print($1, "->", $2) }
51
+ END { print("done") }
50
52
  ```
51
53
 
52
54
  **Built-in variables:**
@@ -61,7 +63,11 @@ Process files line-by-line with familiar awk semantics:
61
63
  | `$p` | Current file path |
62
64
  | `$m` | Last regex match object |
63
65
 
64
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
66
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
67
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
68
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
69
+ `BEGIN` and `END` are reserved keywords in all modes.
70
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
65
71
  ```bash
66
72
  echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
67
73
  ```
@@ -71,8 +77,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
71
77
  Process files one at a time instead of line-by-line:
72
78
 
73
79
  ```snail-map
80
+ BEGIN { print("start") }
74
81
  print("File:", $src)
75
82
  print("Size:", len($text), "bytes")
83
+ END { print("done") }
76
84
  ```
77
85
 
78
86
  **Built-in variables:**
@@ -83,7 +91,11 @@ print("Size:", len($text), "bytes")
83
91
  | `$fd` | Open file handle for the current file |
84
92
  | `$text` | Lazy text view of the current file contents |
85
93
 
86
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
94
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
95
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
96
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
97
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
98
+ `BEGIN` and `END` are reserved keywords in all modes.
87
99
  ```bash
88
100
  snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
89
101
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-ast"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-core"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition.workspace = true
5
5
  readme = "README.md"
6
6
 
@@ -33,15 +33,22 @@ pub fn compile_snail_source_with_auto_print(
33
33
  Ok(module)
34
34
  }
35
35
  CompileMode::Map => {
36
- let program = parse_map_program(source)?;
37
- let module = lower_map_program_with_auto_print(py, &program, auto_print_last)?;
36
+ let (program, begin_blocks, end_blocks) = parse_map_program_with_begin_end(source)?;
37
+ let module = lower_map_program_with_begin_end(
38
+ py,
39
+ &program,
40
+ &begin_blocks,
41
+ &end_blocks,
42
+ auto_print_last,
43
+ )?;
38
44
  Ok(module)
39
45
  }
40
46
  }
41
47
  }
42
48
 
43
49
  /// Compile an awk program with separate begin and end code blocks.
44
- /// Each begin/end source is parsed as a regular Snail program.
50
+ /// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
51
+ /// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
45
52
  pub fn compile_awk_source_with_begin_end(
46
53
  py: Python<'_>,
47
54
  main_source: &str,
@@ -55,7 +62,8 @@ pub fn compile_awk_source_with_begin_end(
55
62
  }
56
63
 
57
64
  /// Compile a map program with separate begin and end code blocks.
58
- /// Each begin/end source is parsed as a map program.
65
+ /// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
66
+ /// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
59
67
  pub fn compile_map_source_with_begin_end(
60
68
  py: Python<'_>,
61
69
  main_source: &str,
@@ -63,17 +71,21 @@ pub fn compile_map_source_with_begin_end(
63
71
  end_sources: &[&str],
64
72
  auto_print_last: bool,
65
73
  ) -> Result<PyObject, SnailError> {
66
- let program = parse_map_program(main_source)?;
67
- let mut begin_blocks = Vec::new();
74
+ let (program, mut begin_blocks, mut end_blocks) =
75
+ parse_map_program_with_begin_end(main_source)?;
76
+
77
+ let mut cli_begin_blocks = Vec::new();
68
78
  for source in begin_sources {
69
- let begin_program = parse_map_program(source)?;
79
+ let begin_program = parse_program(source)?;
70
80
  if !begin_program.stmts.is_empty() {
71
- begin_blocks.push(begin_program.stmts);
81
+ cli_begin_blocks.push(begin_program.stmts);
72
82
  }
73
83
  }
74
- let mut end_blocks = Vec::new();
84
+ cli_begin_blocks.extend(begin_blocks);
85
+ begin_blocks = cli_begin_blocks;
86
+
75
87
  for source in end_sources {
76
- let end_program = parse_map_program(source)?;
88
+ let end_program = parse_program(source)?;
77
89
  if !end_program.stmts.is_empty() {
78
90
  end_blocks.push(end_program.stmts);
79
91
  }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-error"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-lower"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-parser"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -12,13 +12,15 @@ mod string;
12
12
  mod util;
13
13
 
14
14
  use awk::parse_awk_rule;
15
- use stmt::parse_stmt_list;
16
- use util::{error_with_span, full_span, parse_error_from_pest};
15
+ use stmt::{parse_block, parse_stmt, parse_stmt_list};
16
+ use util::{error_with_span, full_span, parse_error_from_pest, span_from_offset, span_from_pair};
17
17
 
18
18
  #[derive(Parser)]
19
19
  #[grammar = "snail.pest"]
20
20
  pub struct SnailParser;
21
21
 
22
+ pub type MapProgramWithBeginEnd = (Program, Vec<Vec<Stmt>>, Vec<Vec<Stmt>>);
23
+
22
24
  pub fn parse_program(source: &str) -> Result<Program, ParseError> {
23
25
  let mut pairs = SnailParser::parse(Rule::program, source)
24
26
  .map_err(|err| parse_error_from_pest(err, source))?;
@@ -45,29 +47,44 @@ pub fn parse_awk_program(source: &str) -> Result<AwkProgram, ParseError> {
45
47
  .ok_or_else(|| ParseError::new("missing awk program root"))?;
46
48
  let span = full_span(source);
47
49
 
50
+ let mut begin_blocks = Vec::new();
48
51
  let mut rules = Vec::new();
52
+ let mut end_blocks = Vec::new();
49
53
 
50
54
  for inner in pair.into_inner() {
51
55
  if inner.as_rule() == Rule::awk_entry_list {
52
56
  for entry in inner.into_inner() {
53
- if entry.as_rule() == Rule::awk_rule {
54
- rules.push(parse_awk_rule(entry, source)?);
57
+ match entry.as_rule() {
58
+ Rule::awk_begin => {
59
+ let block = parse_begin_end_block(entry, source, "BEGIN")?;
60
+ if !block.is_empty() {
61
+ begin_blocks.push(block);
62
+ }
63
+ }
64
+ Rule::awk_end => {
65
+ let block = parse_begin_end_block(entry, source, "END")?;
66
+ if !block.is_empty() {
67
+ end_blocks.push(block);
68
+ }
69
+ }
70
+ Rule::awk_rule => rules.push(parse_awk_rule(entry, source)?),
71
+ _ => {}
55
72
  }
56
73
  }
57
74
  }
58
75
  }
59
76
 
60
77
  Ok(AwkProgram {
61
- begin_blocks: Vec::new(),
78
+ begin_blocks,
62
79
  rules,
63
- end_blocks: Vec::new(),
80
+ end_blocks,
64
81
  span,
65
82
  })
66
83
  }
67
84
 
68
85
  /// Parses an awk program with separate begin and end code sources.
69
- /// Each begin/end source is parsed as a regular Snail program and its statements
70
- /// are injected into the resulting AwkProgram.
86
+ /// Each begin/end source is parsed as a regular Snail program and merged so CLI BEGIN
87
+ /// blocks run before in-file BEGIN blocks, and CLI END blocks run after in-file END blocks.
71
88
  pub fn parse_awk_program_with_begin_end(
72
89
  main_source: &str,
73
90
  begin_sources: &[&str],
@@ -75,21 +92,24 @@ pub fn parse_awk_program_with_begin_end(
75
92
  ) -> Result<AwkProgram, ParseError> {
76
93
  let mut program = parse_awk_program(main_source)?;
77
94
 
78
- // Parse each begin source as a regular program and extract statements
95
+ let mut cli_begin_blocks = Vec::new();
79
96
  for source in begin_sources {
80
97
  let begin_program = parse_program(source)?;
81
98
  if !begin_program.stmts.is_empty() {
82
- program.begin_blocks.push(begin_program.stmts);
99
+ cli_begin_blocks.push(begin_program.stmts);
83
100
  }
84
101
  }
102
+ cli_begin_blocks.extend(program.begin_blocks);
103
+ program.begin_blocks = cli_begin_blocks;
85
104
 
86
- // Parse each end source as a regular program and extract statements
105
+ let mut end_blocks = program.end_blocks;
87
106
  for source in end_sources {
88
107
  let end_program = parse_program(source)?;
89
108
  if !end_program.stmts.is_empty() {
90
- program.end_blocks.push(end_program.stmts);
109
+ end_blocks.push(end_program.stmts);
91
110
  }
92
111
  }
112
+ program.end_blocks = end_blocks;
93
113
 
94
114
  Ok(program)
95
115
  }
@@ -102,27 +122,133 @@ const MAP_ONLY_MESSAGE: &str = "map variables are only valid in map mode; use --
102
122
 
103
123
  /// Parses a map program that processes files one at a time.
104
124
  /// Allows map variables ($src, $fd, $text) but rejects awk variables.
125
+ /// In-file BEGIN/END blocks are validated but not returned; use
126
+ /// `parse_map_program_with_begin_end` to access them.
105
127
  pub fn parse_map_program(source: &str) -> Result<Program, ParseError> {
106
- let mut pairs = SnailParser::parse(Rule::program, source)
128
+ let (program, _, _) = parse_map_program_with_begin_end(source)?;
129
+ Ok(program)
130
+ }
131
+
132
+ fn validate_no_awk_syntax_for_map(program: &Program, source: &str) -> Result<(), ParseError> {
133
+ for stmt in &program.stmts {
134
+ validate_stmt_for_map(stmt, source)?;
135
+ }
136
+ Ok(())
137
+ }
138
+
139
+ /// Parses a map program with in-file BEGIN/END blocks.
140
+ /// BEGIN/END blocks are parsed as regular Snail statement blocks (no map/awk vars).
141
+ pub fn parse_map_program_with_begin_end(
142
+ source: &str,
143
+ ) -> Result<MapProgramWithBeginEnd, ParseError> {
144
+ let mut pairs = SnailParser::parse(Rule::map_program, source)
107
145
  .map_err(|err| parse_error_from_pest(err, source))?;
108
146
  let pair = pairs
109
147
  .next()
110
- .ok_or_else(|| ParseError::new("missing program root"))?;
148
+ .ok_or_else(|| ParseError::new("missing map program root"))?;
111
149
  let span = full_span(source);
112
150
  let mut stmts = Vec::new();
151
+ let mut begin_blocks = Vec::new();
152
+ let mut end_blocks = Vec::new();
153
+ let mut entries = Vec::new();
154
+
113
155
  for inner in pair.into_inner() {
114
- if inner.as_rule() == Rule::stmt_list {
115
- stmts = parse_stmt_list(inner, source)?;
156
+ if inner.as_rule() == Rule::map_entry_list {
157
+ for entry in inner.into_inner() {
158
+ if entry.as_rule() != Rule::map_entry {
159
+ continue;
160
+ }
161
+ let entry_span = span_from_pair(&entry, source);
162
+ let mut entry_inner = entry.into_inner();
163
+ let entry_pair = entry_inner.next().ok_or_else(|| {
164
+ error_with_span("missing map entry", entry_span.clone(), source)
165
+ })?;
166
+ match entry_pair.as_rule() {
167
+ Rule::map_begin => {
168
+ let block = parse_begin_end_block(entry_pair, source, "BEGIN")?;
169
+ if !block.is_empty() {
170
+ begin_blocks.push(block);
171
+ }
172
+ entries.push((entry_span, MapEntryKind::BeginEnd));
173
+ }
174
+ Rule::map_end => {
175
+ let block = parse_begin_end_block(entry_pair, source, "END")?;
176
+ if !block.is_empty() {
177
+ end_blocks.push(block);
178
+ }
179
+ entries.push((entry_span, MapEntryKind::BeginEnd));
180
+ }
181
+ _ => {
182
+ let stmt = parse_stmt(entry_pair, source)?;
183
+ entries.push((entry_span, map_entry_kind_for_stmt(&stmt)));
184
+ stmts.push(stmt);
185
+ }
186
+ }
187
+ }
116
188
  }
117
189
  }
190
+
191
+ validate_map_entry_separators(&entries, source)?;
192
+
118
193
  let program = Program { stmts, span };
119
194
  validate_no_awk_syntax_for_map(&program, source)?;
120
- Ok(program)
195
+ Ok((program, begin_blocks, end_blocks))
121
196
  }
122
197
 
123
- fn validate_no_awk_syntax_for_map(program: &Program, source: &str) -> Result<(), ParseError> {
124
- for stmt in &program.stmts {
125
- validate_stmt_for_map(stmt, source)?;
198
+ fn parse_begin_end_block(
199
+ pair: pest::iterators::Pair<'_, Rule>,
200
+ source: &str,
201
+ label: &str,
202
+ ) -> Result<Vec<Stmt>, ParseError> {
203
+ let span = span_from_pair(&pair, source);
204
+ let mut inner = pair.into_inner();
205
+ let block_pair = inner
206
+ .next()
207
+ .ok_or_else(|| error_with_span(format!("missing {label} block"), span.clone(), source))?;
208
+ let block = parse_block(block_pair, source)?;
209
+ validate_block(&block, source)?;
210
+ Ok(block)
211
+ }
212
+
213
+ #[derive(Clone, Copy)]
214
+ enum MapEntryKind {
215
+ BeginEnd,
216
+ Simple,
217
+ Compound,
218
+ }
219
+
220
+ fn map_entry_kind_for_stmt(stmt: &Stmt) -> MapEntryKind {
221
+ match stmt {
222
+ Stmt::If { .. }
223
+ | Stmt::While { .. }
224
+ | Stmt::For { .. }
225
+ | Stmt::Def { .. }
226
+ | Stmt::Class { .. }
227
+ | Stmt::Try { .. }
228
+ | Stmt::With { .. } => MapEntryKind::Compound,
229
+ _ => MapEntryKind::Simple,
230
+ }
231
+ }
232
+
233
+ fn validate_map_entry_separators(
234
+ entries: &[(SourceSpan, MapEntryKind)],
235
+ source: &str,
236
+ ) -> Result<(), ParseError> {
237
+ for window in entries.windows(2) {
238
+ let (prev_span, prev_kind) = &window[0];
239
+ let (next_span, next_kind) = &window[1];
240
+ let gap = &source[prev_span.end.offset..next_span.start.offset];
241
+ let has_sep = gap.contains('\n') || gap.contains(';');
242
+ if !has_sep
243
+ && matches!(prev_kind, MapEntryKind::Simple)
244
+ && !matches!(next_kind, MapEntryKind::BeginEnd)
245
+ {
246
+ return Err(error_with_span(
247
+ "expected statement separator",
248
+ span_from_offset(next_span.start.offset, next_span.start.offset, source),
249
+ source,
250
+ ));
251
+ }
126
252
  }
127
253
  Ok(())
128
254
  }
@@ -1,13 +1,23 @@
1
1
  // Top-level program entry points
2
2
  program = { SOI ~ stmt_sep* ~ stmt_list? ~ stmt_sep* ~ EOI }
3
3
  awk_program = { SOI ~ stmt_sep* ~ awk_entry_list? ~ stmt_sep* ~ EOI }
4
+ map_program = { SOI ~ stmt_sep* ~ map_entry_list? ~ stmt_sep* ~ EOI }
4
5
 
5
6
  // AWK mode: pattern-action rules
6
7
  awk_entry_list = { awk_entry ~ (stmt_sep* ~ awk_entry)* ~ stmt_sep* }
7
- awk_entry = _{ awk_rule }
8
+ awk_entry = _{ awk_begin | awk_end | awk_rule }
9
+ awk_begin = { "BEGIN" ~ block }
10
+ awk_end = { "END" ~ block }
8
11
  awk_rule = { block | awk_pattern ~ block? }
9
12
  awk_pattern = { expr }
10
13
 
14
+ // Map mode: program with optional BEGIN/END blocks
15
+ map_entry_list = { map_entry ~ (stmt_sep* ~ map_entry)* ~ stmt_sep* }
16
+ map_entry = { map_begin_end | stmt }
17
+ map_begin_end = _{ map_begin | map_end }
18
+ map_begin = { "BEGIN" ~ block }
19
+ map_end = { "END" ~ block }
20
+
11
21
  // Statements: separated by semicolons or newlines
12
22
  // Compound statements (ending with }) don't need trailing separators
13
23
  // Simple statements need separators unless they're the last statement
@@ -261,6 +271,7 @@ keyword = _{
261
271
  | "return" | "break" | "continue" | "pass" | "raise" | "try" | "except"
262
272
  | "finally" | "with" | "assert" | "del" | "and" | "or" | "not" | "import" | "from" | "as"
263
273
  | "let"
274
+ | "BEGIN" | "END"
264
275
  | "True" | "False" | "None") ~ !ident_continue
265
276
  }
266
277
 
@@ -1,7 +1,10 @@
1
1
  mod common;
2
2
 
3
3
  use common::*;
4
- use snail_parser::{parse_awk_program, parse_awk_program_with_begin_end, parse_program};
4
+ use snail_parser::{
5
+ parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
6
+ parse_program,
7
+ };
5
8
 
6
9
  #[test]
7
10
  fn reports_parse_error_with_location() {
@@ -203,20 +206,12 @@ fn parser_rejects_invalid_parameter_syntax() {
203
206
  // ========== AWK Mode Parser Tests ==========
204
207
 
205
208
  #[test]
206
- fn awk_begin_end_parsed_as_patterns() {
207
- // BEGIN and END are no longer special keywords; they parse as regular pattern names
208
- // Use -b/-e CLI flags for begin/end blocks instead
209
- let program = parse_awk_program("BEGIN { print(1) }").expect("should parse");
209
+ fn awk_begin_end_parsed_as_blocks() {
210
+ let program = parse_awk_program("BEGIN { print(1) } /foo/ { print($0) } END { print(2) }")
211
+ .expect("should parse");
210
212
  assert_eq!(program.rules.len(), 1);
211
- assert!(program.begin_blocks.is_empty());
212
- // BEGIN is parsed as pattern (identifier), not a special begin block
213
- assert!(program.rules[0].pattern.is_some());
214
-
215
- let program = parse_awk_program("END { print(1) }").expect("should parse");
216
- assert_eq!(program.rules.len(), 1);
217
- assert!(program.end_blocks.is_empty());
218
- // END is parsed as pattern (identifier), not a special end block
219
- assert!(program.rules[0].pattern.is_some());
213
+ assert_eq!(program.begin_blocks.len(), 1);
214
+ assert_eq!(program.end_blocks.len(), 1);
220
215
  }
221
216
 
222
217
  #[test]
@@ -246,6 +241,36 @@ fn awk_with_empty_begin_end() {
246
241
  assert!(program.end_blocks.is_empty());
247
242
  }
248
243
 
244
+ #[test]
245
+ fn awk_begin_end_rejects_awk_vars() {
246
+ let err = parse_awk_program("BEGIN { print($0) }").expect_err("should reject awk vars");
247
+ assert!(err.to_string().contains("$0"));
248
+ }
249
+
250
+ #[test]
251
+ fn map_begin_end_parsed_as_blocks() {
252
+ let (program, begin_blocks, end_blocks) =
253
+ parse_map_program_with_begin_end("BEGIN { print(1) } print($src) END { print(2) }")
254
+ .expect("should parse");
255
+ assert_eq!(program.stmts.len(), 1);
256
+ assert_eq!(begin_blocks.len(), 1);
257
+ assert_eq!(end_blocks.len(), 1);
258
+ }
259
+
260
+ #[test]
261
+ fn map_begin_end_rejects_map_vars() {
262
+ let err = parse_map_program_with_begin_end("BEGIN { print($src) }\nprint($src)")
263
+ .expect_err("should reject map vars in BEGIN/END");
264
+ assert!(err.to_string().contains("$src"));
265
+ }
266
+
267
+ #[test]
268
+ fn map_requires_separators_between_simple_statements() {
269
+ let err = parse_map_program_with_begin_end("print($src) print($src)")
270
+ .expect_err("should reject missing separators");
271
+ assert!(err.to_string().contains("expected statement separator"));
272
+ }
273
+
249
274
  // ========== F-String Interpolation Tests ==========
250
275
 
251
276
  #[test]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-python"
3
- version = "0.6.1"
3
+ version = "0.6.2"
4
4
  edition.workspace = true
5
5
  build = "build.rs"
6
6
 
@@ -5,9 +5,10 @@ use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PySystemExit};
5
5
  use pyo3::prelude::*;
6
6
  use pyo3::types::{PyDict, PyList, PyModule, PyTuple};
7
7
  use snail_core::{
8
- CompileMode, ParseError, Program, compile_awk_source_with_begin_end,
8
+ CompileMode, ParseError, Program, Stmt, compile_awk_source_with_begin_end,
9
9
  compile_map_source_with_begin_end, compile_snail_source_with_auto_print, format_snail_error,
10
- parse_awk_program, parse_awk_program_with_begin_end, parse_map_program, parse_program,
10
+ parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
11
+ parse_program,
11
12
  };
12
13
  use std::sync::OnceLock;
13
14
  use std::time::Instant;
@@ -320,8 +321,8 @@ fn exec_py(
320
321
  #[derive(Debug)]
321
322
  struct MapAst {
322
323
  program: Program,
323
- begin_blocks: Vec<Program>,
324
- end_blocks: Vec<Program>,
324
+ begin_blocks: Vec<Vec<Stmt>>,
325
+ end_blocks: Vec<Vec<Stmt>>,
325
326
  }
326
327
 
327
328
  #[pyfunction(name = "parse_ast")]
@@ -351,24 +352,30 @@ fn parse_ast_py(
351
352
  Ok(format!("{:#?}", program))
352
353
  }
353
354
  CompileMode::Map => {
354
- let program = parse_map_program(source).map_err(err_to_syntax)?;
355
- if begin_code.is_empty() && end_code.is_empty() {
356
- return Ok(format!("{:#?}", program));
357
- }
358
- let mut begin_blocks = Vec::new();
355
+ let (program, mut begin_blocks, mut end_blocks) =
356
+ parse_map_program_with_begin_end(source).map_err(err_to_syntax)?;
357
+
358
+ let mut cli_begin_blocks = Vec::new();
359
359
  for source in &begin_code {
360
- let begin_program = parse_map_program(source).map_err(err_to_syntax)?;
360
+ let begin_program = parse_program(source).map_err(err_to_syntax)?;
361
361
  if !begin_program.stmts.is_empty() {
362
- begin_blocks.push(begin_program);
362
+ cli_begin_blocks.push(begin_program.stmts);
363
363
  }
364
364
  }
365
- let mut end_blocks = Vec::new();
365
+ cli_begin_blocks.extend(begin_blocks);
366
+ begin_blocks = cli_begin_blocks;
367
+
366
368
  for source in &end_code {
367
- let end_program = parse_map_program(source).map_err(err_to_syntax)?;
369
+ let end_program = parse_program(source).map_err(err_to_syntax)?;
368
370
  if !end_program.stmts.is_empty() {
369
- end_blocks.push(end_program);
371
+ end_blocks.push(end_program.stmts);
370
372
  }
371
373
  }
374
+
375
+ if begin_blocks.is_empty() && end_blocks.is_empty() {
376
+ return Ok(format!("{:#?}", program));
377
+ }
378
+
372
379
  let map_ast = MapAst {
373
380
  program,
374
381
  begin_blocks,
@@ -389,7 +396,7 @@ fn parse_py(source: &str, mode: &str, filename: &str) -> PyResult<()> {
389
396
  CompileMode::Awk => parse_awk_program(source)
390
397
  .map(|_| ())
391
398
  .map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
392
- CompileMode::Map => parse_map_program(source)
399
+ CompileMode::Map => parse_map_program_with_begin_end(source)
393
400
  .map(|_| ())
394
401
  .map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
395
402
  }
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "snail-lang"
7
- version = "0.6.1"
7
+ version = "0.6.2"
8
8
  description = "Snail programming language interpreter"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -187,13 +187,12 @@ def _parse_args(argv: list[str]) -> _Args:
187
187
  args.args = argv[idx + 1 :]
188
188
  return args
189
189
  if token == "-" or not token.startswith("-"):
190
- if code_found:
191
- # Already found code, rest are args
192
- args.args.extend(argv[idx:])
193
- return args
194
- # This is the code, continue parsing for -b/--begin and -e/--end after
195
- args.args = [token]
196
- code_found = True
190
+ if not code_found:
191
+ # This is the code (or the first arg when -f is used)
192
+ args.args = [token]
193
+ code_found = True
194
+ else:
195
+ args.args.append(token)
197
196
  idx += 1
198
197
  continue
199
198
  if token in ("-h", "--help"):
@@ -231,6 +230,7 @@ def _parse_args(argv: list[str]) -> _Args:
231
230
  if idx + 1 >= len(argv):
232
231
  raise ValueError("option -f requires an argument")
233
232
  args.file = argv[idx + 1]
233
+ code_found = True
234
234
  idx += 2
235
235
  continue
236
236
  if token in ("-b", "--begin"):
File without changes
File without changes