snail-lang 0.6.1__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {snail_lang-0.6.1 → snail_lang-0.6.3}/Cargo.lock +7 -7
  2. {snail_lang-0.6.1 → snail_lang-0.6.3}/PKG-INFO +15 -3
  3. {snail_lang-0.6.1 → snail_lang-0.6.3}/README.md +14 -2
  4. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-ast/Cargo.toml +1 -1
  5. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-core/Cargo.toml +1 -1
  6. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-core/src/lib.rs +22 -10
  7. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-error/Cargo.toml +1 -1
  8. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/Cargo.toml +1 -1
  9. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/constants.rs +1 -0
  10. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/map.rs +6 -6
  11. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/Cargo.toml +1 -1
  12. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/lib.rs +146 -20
  13. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/snail.pest +12 -1
  14. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/errors.rs +39 -14
  15. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-python/Cargo.toml +1 -1
  16. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-python/src/lib.rs +22 -15
  17. {snail_lang-0.6.1 → snail_lang-0.6.3}/pyproject.toml +1 -1
  18. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/cli.py +7 -7
  19. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/__init__.py +11 -0
  20. snail_lang-0.6.3/python/snail/runtime/lazy_file.py +41 -0
  21. {snail_lang-0.6.1 → snail_lang-0.6.3}/Cargo.toml +0 -0
  22. {snail_lang-0.6.1 → snail_lang-0.6.3}/LICENSE +0 -0
  23. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-ast/README.md +0 -0
  24. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-ast/src/ast.rs +0 -0
  25. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-ast/src/awk.rs +0 -0
  26. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-ast/src/lib.rs +0 -0
  27. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-core/README.md +0 -0
  28. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-error/README.md +0 -0
  29. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-error/src/lib.rs +0 -0
  30. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/README.md +0 -0
  31. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/awk.rs +0 -0
  32. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/expr.rs +0 -0
  33. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/helpers.rs +0 -0
  34. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/lib.rs +0 -0
  35. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/operators.rs +0 -0
  36. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/program.rs +0 -0
  37. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/py_ast.rs +0 -0
  38. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-lower/src/stmt.rs +0 -0
  39. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/README.md +0 -0
  40. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/awk.rs +0 -0
  41. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/expr.rs +0 -0
  42. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/literal.rs +0 -0
  43. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/stmt.rs +0 -0
  44. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/string.rs +0 -0
  45. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/src/util.rs +0 -0
  46. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/common.rs +0 -0
  47. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/parser.rs +0 -0
  48. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/statements.rs +0 -0
  49. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/syntax_expressions.rs +0 -0
  50. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-parser/tests/syntax_strings.rs +0 -0
  51. {snail_lang-0.6.1 → snail_lang-0.6.3}/crates/snail-python/build.rs +0 -0
  52. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/__init__.py +0 -0
  53. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/augmented.py +0 -0
  54. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/compact_try.py +0 -0
  55. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/lazy_text.py +0 -0
  56. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/regex.py +0 -0
  57. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/structured_accessor.py +0 -0
  58. {snail_lang-0.6.1 → snail_lang-0.6.3}/python/snail/runtime/subprocess.py +0 -0
@@ -485,11 +485,11 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
485
485
 
486
486
  [[package]]
487
487
  name = "snail-ast"
488
- version = "0.6.1"
488
+ version = "0.6.3"
489
489
 
490
490
  [[package]]
491
491
  name = "snail-core"
492
- version = "0.6.1"
492
+ version = "0.6.3"
493
493
  dependencies = [
494
494
  "pyo3",
495
495
  "snail-ast",
@@ -500,14 +500,14 @@ dependencies = [
500
500
 
501
501
  [[package]]
502
502
  name = "snail-error"
503
- version = "0.6.1"
503
+ version = "0.6.3"
504
504
  dependencies = [
505
505
  "snail-ast",
506
506
  ]
507
507
 
508
508
  [[package]]
509
509
  name = "snail-lower"
510
- version = "0.6.1"
510
+ version = "0.6.3"
511
511
  dependencies = [
512
512
  "pyo3",
513
513
  "snail-ast",
@@ -516,7 +516,7 @@ dependencies = [
516
516
 
517
517
  [[package]]
518
518
  name = "snail-parser"
519
- version = "0.6.1"
519
+ version = "0.6.3"
520
520
  dependencies = [
521
521
  "pest",
522
522
  "pest_derive",
@@ -526,7 +526,7 @@ dependencies = [
526
526
 
527
527
  [[package]]
528
528
  name = "snail-proptest"
529
- version = "0.6.1"
529
+ version = "0.6.3"
530
530
  dependencies = [
531
531
  "proptest",
532
532
  "pyo3",
@@ -540,7 +540,7 @@ dependencies = [
540
540
 
541
541
  [[package]]
542
542
  name = "snail-python"
543
- version = "0.6.1"
543
+ version = "0.6.3"
544
544
  dependencies = [
545
545
  "pyo3",
546
546
  "snail-core",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snail-lang
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Requires-Dist: jmespath>=1.0.1
5
5
  Requires-Dist: maturin>=1.5 ; extra == 'dev'
6
6
  Requires-Dist: pytest ; extra == 'dev'
@@ -57,8 +57,10 @@ semicolons are optional. You can separate statements with newlines.
57
57
  Process files line-by-line with familiar awk semantics:
58
58
 
59
59
  ```snail-awk("hello world\nfoo bar\n")
60
+ BEGIN { print("start") }
60
61
  /hello/ { print("matched:", $0) }
61
62
  { print($1, "->", $2) }
63
+ END { print("done") }
62
64
  ```
63
65
 
64
66
  **Built-in variables:**
@@ -73,7 +75,11 @@ Process files line-by-line with familiar awk semantics:
73
75
  | `$p` | Current file path |
74
76
  | `$m` | Last regex match object |
75
77
 
76
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
78
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
79
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
80
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
81
+ `BEGIN` and `END` are reserved keywords in all modes.
82
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
77
83
  ```bash
78
84
  echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
79
85
  ```
@@ -83,8 +89,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
83
89
  Process files one at a time instead of line-by-line:
84
90
 
85
91
  ```snail-map
92
+ BEGIN { print("start") }
86
93
  print("File:", $src)
87
94
  print("Size:", len($text), "bytes")
95
+ END { print("done") }
88
96
  ```
89
97
 
90
98
  **Built-in variables:**
@@ -95,7 +103,11 @@ print("Size:", len($text), "bytes")
95
103
  | `$fd` | Open file handle for the current file |
96
104
  | `$text` | Lazy text view of the current file contents |
97
105
 
98
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
106
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
107
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
108
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
109
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
110
+ `BEGIN` and `END` are reserved keywords in all modes.
99
111
  ```bash
100
112
  snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
101
113
  ```
@@ -45,8 +45,10 @@ semicolons are optional. You can separate statements with newlines.
45
45
  Process files line-by-line with familiar awk semantics:
46
46
 
47
47
  ```snail-awk("hello world\nfoo bar\n")
48
+ BEGIN { print("start") }
48
49
  /hello/ { print("matched:", $0) }
49
50
  { print($1, "->", $2) }
51
+ END { print("done") }
50
52
  ```
51
53
 
52
54
  **Built-in variables:**
@@ -61,7 +63,11 @@ Process files line-by-line with familiar awk semantics:
61
63
  | `$p` | Current file path |
62
64
  | `$m` | Last regex match object |
63
65
 
64
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
66
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
67
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
68
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
69
+ `BEGIN` and `END` are reserved keywords in all modes.
70
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
65
71
  ```bash
66
72
  echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
67
73
  ```
@@ -71,8 +77,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
71
77
  Process files one at a time instead of line-by-line:
72
78
 
73
79
  ```snail-map
80
+ BEGIN { print("start") }
74
81
  print("File:", $src)
75
82
  print("Size:", len($text), "bytes")
83
+ END { print("done") }
76
84
  ```
77
85
 
78
86
  **Built-in variables:**
@@ -83,7 +91,11 @@ print("Size:", len($text), "bytes")
83
91
  | `$fd` | Open file handle for the current file |
84
92
  | `$text` | Lazy text view of the current file contents |
85
93
 
86
- Begin/end blocks use CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown:
94
+ Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
95
+ via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
96
+ before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
97
+ BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
98
+ `BEGIN` and `END` are reserved keywords in all modes.
87
99
  ```bash
88
100
  snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
89
101
  ```
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-ast"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-core"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition.workspace = true
5
5
  readme = "README.md"
6
6
 
@@ -33,15 +33,22 @@ pub fn compile_snail_source_with_auto_print(
33
33
  Ok(module)
34
34
  }
35
35
  CompileMode::Map => {
36
- let program = parse_map_program(source)?;
37
- let module = lower_map_program_with_auto_print(py, &program, auto_print_last)?;
36
+ let (program, begin_blocks, end_blocks) = parse_map_program_with_begin_end(source)?;
37
+ let module = lower_map_program_with_begin_end(
38
+ py,
39
+ &program,
40
+ &begin_blocks,
41
+ &end_blocks,
42
+ auto_print_last,
43
+ )?;
38
44
  Ok(module)
39
45
  }
40
46
  }
41
47
  }
42
48
 
43
49
  /// Compile an awk program with separate begin and end code blocks.
44
- /// Each begin/end source is parsed as a regular Snail program.
50
+ /// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
51
+ /// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
45
52
  pub fn compile_awk_source_with_begin_end(
46
53
  py: Python<'_>,
47
54
  main_source: &str,
@@ -55,7 +62,8 @@ pub fn compile_awk_source_with_begin_end(
55
62
  }
56
63
 
57
64
  /// Compile a map program with separate begin and end code blocks.
58
- /// Each begin/end source is parsed as a map program.
65
+ /// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
66
+ /// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
59
67
  pub fn compile_map_source_with_begin_end(
60
68
  py: Python<'_>,
61
69
  main_source: &str,
@@ -63,17 +71,21 @@ pub fn compile_map_source_with_begin_end(
63
71
  end_sources: &[&str],
64
72
  auto_print_last: bool,
65
73
  ) -> Result<PyObject, SnailError> {
66
- let program = parse_map_program(main_source)?;
67
- let mut begin_blocks = Vec::new();
74
+ let (program, mut begin_blocks, mut end_blocks) =
75
+ parse_map_program_with_begin_end(main_source)?;
76
+
77
+ let mut cli_begin_blocks = Vec::new();
68
78
  for source in begin_sources {
69
- let begin_program = parse_map_program(source)?;
79
+ let begin_program = parse_program(source)?;
70
80
  if !begin_program.stmts.is_empty() {
71
- begin_blocks.push(begin_program.stmts);
81
+ cli_begin_blocks.push(begin_program.stmts);
72
82
  }
73
83
  }
74
- let mut end_blocks = Vec::new();
84
+ cli_begin_blocks.extend(begin_blocks);
85
+ begin_blocks = cli_begin_blocks;
86
+
75
87
  for source in end_sources {
76
- let end_program = parse_map_program(source)?;
88
+ let end_program = parse_program(source)?;
77
89
  if !end_program.stmts.is_empty() {
78
90
  end_blocks.push(end_program.stmts);
79
91
  }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-error"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-lower"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -41,6 +41,7 @@ pub(crate) const SNAIL_MAP_SRC_PYVAR: &str = "__snail_src";
41
41
  pub(crate) const SNAIL_MAP_FD_PYVAR: &str = "__snail_fd";
42
42
  pub(crate) const SNAIL_MAP_TEXT_PYVAR: &str = "__snail_text";
43
43
  pub const SNAIL_LAZY_TEXT_CLASS: &str = "__SnailLazyText";
44
+ pub const SNAIL_LAZY_FILE_CLASS: &str = "__SnailLazyFile";
44
45
 
45
46
  pub(crate) fn injected_py_name(name: &str) -> Option<&'static str> {
46
47
  match name {
@@ -156,7 +156,7 @@ fn lower_map_file_loop(
156
156
  auto_print_last: bool,
157
157
  ) -> Result<PyObject, LowerError> {
158
158
  // for __snail_src in __snail_paths:
159
- // with open(__snail_src, 'r') as __snail_fd:
159
+ // with __SnailLazyFile(__snail_src, 'r') as __snail_fd:
160
160
  // __snail_text = __SnailLazyText(__snail_fd)
161
161
  // # user code
162
162
 
@@ -201,14 +201,14 @@ fn lower_map_file_loop(
201
201
  lower_block_with_auto_print(builder, &program.stmts, auto_print_last, &program.span)?;
202
202
  with_body.extend(user_code);
203
203
 
204
- // open(__snail_src, 'r')
205
- let open_call = builder
204
+ // __SnailLazyFile(__snail_src, 'r')
205
+ let lazy_file_call = builder
206
206
  .call_node(
207
207
  "Call",
208
208
  vec![
209
209
  name_expr(
210
210
  builder,
211
- "open",
211
+ SNAIL_LAZY_FILE_CLASS,
212
212
  span,
213
213
  builder.load_ctx().map_err(py_err_to_lower)?,
214
214
  )?,
@@ -231,12 +231,12 @@ fn lower_map_file_loop(
231
231
  )
232
232
  .map_err(py_err_to_lower)?;
233
233
 
234
- // with open(...) as __snail_fd:
234
+ // with __SnailLazyFile(...) as __snail_fd:
235
235
  let with_item = builder
236
236
  .call_node_no_loc(
237
237
  "withitem",
238
238
  vec![
239
- open_call,
239
+ lazy_file_call,
240
240
  name_expr(
241
241
  builder,
242
242
  SNAIL_MAP_FD_PYVAR,
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-parser"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -12,13 +12,15 @@ mod string;
12
12
  mod util;
13
13
 
14
14
  use awk::parse_awk_rule;
15
- use stmt::parse_stmt_list;
16
- use util::{error_with_span, full_span, parse_error_from_pest};
15
+ use stmt::{parse_block, parse_stmt, parse_stmt_list};
16
+ use util::{error_with_span, full_span, parse_error_from_pest, span_from_offset, span_from_pair};
17
17
 
18
18
  #[derive(Parser)]
19
19
  #[grammar = "snail.pest"]
20
20
  pub struct SnailParser;
21
21
 
22
+ pub type MapProgramWithBeginEnd = (Program, Vec<Vec<Stmt>>, Vec<Vec<Stmt>>);
23
+
22
24
  pub fn parse_program(source: &str) -> Result<Program, ParseError> {
23
25
  let mut pairs = SnailParser::parse(Rule::program, source)
24
26
  .map_err(|err| parse_error_from_pest(err, source))?;
@@ -45,29 +47,44 @@ pub fn parse_awk_program(source: &str) -> Result<AwkProgram, ParseError> {
45
47
  .ok_or_else(|| ParseError::new("missing awk program root"))?;
46
48
  let span = full_span(source);
47
49
 
50
+ let mut begin_blocks = Vec::new();
48
51
  let mut rules = Vec::new();
52
+ let mut end_blocks = Vec::new();
49
53
 
50
54
  for inner in pair.into_inner() {
51
55
  if inner.as_rule() == Rule::awk_entry_list {
52
56
  for entry in inner.into_inner() {
53
- if entry.as_rule() == Rule::awk_rule {
54
- rules.push(parse_awk_rule(entry, source)?);
57
+ match entry.as_rule() {
58
+ Rule::awk_begin => {
59
+ let block = parse_begin_end_block(entry, source, "BEGIN")?;
60
+ if !block.is_empty() {
61
+ begin_blocks.push(block);
62
+ }
63
+ }
64
+ Rule::awk_end => {
65
+ let block = parse_begin_end_block(entry, source, "END")?;
66
+ if !block.is_empty() {
67
+ end_blocks.push(block);
68
+ }
69
+ }
70
+ Rule::awk_rule => rules.push(parse_awk_rule(entry, source)?),
71
+ _ => {}
55
72
  }
56
73
  }
57
74
  }
58
75
  }
59
76
 
60
77
  Ok(AwkProgram {
61
- begin_blocks: Vec::new(),
78
+ begin_blocks,
62
79
  rules,
63
- end_blocks: Vec::new(),
80
+ end_blocks,
64
81
  span,
65
82
  })
66
83
  }
67
84
 
68
85
  /// Parses an awk program with separate begin and end code sources.
69
- /// Each begin/end source is parsed as a regular Snail program and its statements
70
- /// are injected into the resulting AwkProgram.
86
+ /// Each begin/end source is parsed as a regular Snail program and merged so CLI BEGIN
87
+ /// blocks run before in-file BEGIN blocks, and CLI END blocks run after in-file END blocks.
71
88
  pub fn parse_awk_program_with_begin_end(
72
89
  main_source: &str,
73
90
  begin_sources: &[&str],
@@ -75,21 +92,24 @@ pub fn parse_awk_program_with_begin_end(
75
92
  ) -> Result<AwkProgram, ParseError> {
76
93
  let mut program = parse_awk_program(main_source)?;
77
94
 
78
- // Parse each begin source as a regular program and extract statements
95
+ let mut cli_begin_blocks = Vec::new();
79
96
  for source in begin_sources {
80
97
  let begin_program = parse_program(source)?;
81
98
  if !begin_program.stmts.is_empty() {
82
- program.begin_blocks.push(begin_program.stmts);
99
+ cli_begin_blocks.push(begin_program.stmts);
83
100
  }
84
101
  }
102
+ cli_begin_blocks.extend(program.begin_blocks);
103
+ program.begin_blocks = cli_begin_blocks;
85
104
 
86
- // Parse each end source as a regular program and extract statements
105
+ let mut end_blocks = program.end_blocks;
87
106
  for source in end_sources {
88
107
  let end_program = parse_program(source)?;
89
108
  if !end_program.stmts.is_empty() {
90
- program.end_blocks.push(end_program.stmts);
109
+ end_blocks.push(end_program.stmts);
91
110
  }
92
111
  }
112
+ program.end_blocks = end_blocks;
93
113
 
94
114
  Ok(program)
95
115
  }
@@ -102,27 +122,133 @@ const MAP_ONLY_MESSAGE: &str = "map variables are only valid in map mode; use --
102
122
 
103
123
  /// Parses a map program that processes files one at a time.
104
124
  /// Allows map variables ($src, $fd, $text) but rejects awk variables.
125
+ /// In-file BEGIN/END blocks are validated but not returned; use
126
+ /// `parse_map_program_with_begin_end` to access them.
105
127
  pub fn parse_map_program(source: &str) -> Result<Program, ParseError> {
106
- let mut pairs = SnailParser::parse(Rule::program, source)
128
+ let (program, _, _) = parse_map_program_with_begin_end(source)?;
129
+ Ok(program)
130
+ }
131
+
132
+ fn validate_no_awk_syntax_for_map(program: &Program, source: &str) -> Result<(), ParseError> {
133
+ for stmt in &program.stmts {
134
+ validate_stmt_for_map(stmt, source)?;
135
+ }
136
+ Ok(())
137
+ }
138
+
139
+ /// Parses a map program with in-file BEGIN/END blocks.
140
+ /// BEGIN/END blocks are parsed as regular Snail statement blocks (no map/awk vars).
141
+ pub fn parse_map_program_with_begin_end(
142
+ source: &str,
143
+ ) -> Result<MapProgramWithBeginEnd, ParseError> {
144
+ let mut pairs = SnailParser::parse(Rule::map_program, source)
107
145
  .map_err(|err| parse_error_from_pest(err, source))?;
108
146
  let pair = pairs
109
147
  .next()
110
- .ok_or_else(|| ParseError::new("missing program root"))?;
148
+ .ok_or_else(|| ParseError::new("missing map program root"))?;
111
149
  let span = full_span(source);
112
150
  let mut stmts = Vec::new();
151
+ let mut begin_blocks = Vec::new();
152
+ let mut end_blocks = Vec::new();
153
+ let mut entries = Vec::new();
154
+
113
155
  for inner in pair.into_inner() {
114
- if inner.as_rule() == Rule::stmt_list {
115
- stmts = parse_stmt_list(inner, source)?;
156
+ if inner.as_rule() == Rule::map_entry_list {
157
+ for entry in inner.into_inner() {
158
+ if entry.as_rule() != Rule::map_entry {
159
+ continue;
160
+ }
161
+ let entry_span = span_from_pair(&entry, source);
162
+ let mut entry_inner = entry.into_inner();
163
+ let entry_pair = entry_inner.next().ok_or_else(|| {
164
+ error_with_span("missing map entry", entry_span.clone(), source)
165
+ })?;
166
+ match entry_pair.as_rule() {
167
+ Rule::map_begin => {
168
+ let block = parse_begin_end_block(entry_pair, source, "BEGIN")?;
169
+ if !block.is_empty() {
170
+ begin_blocks.push(block);
171
+ }
172
+ entries.push((entry_span, MapEntryKind::BeginEnd));
173
+ }
174
+ Rule::map_end => {
175
+ let block = parse_begin_end_block(entry_pair, source, "END")?;
176
+ if !block.is_empty() {
177
+ end_blocks.push(block);
178
+ }
179
+ entries.push((entry_span, MapEntryKind::BeginEnd));
180
+ }
181
+ _ => {
182
+ let stmt = parse_stmt(entry_pair, source)?;
183
+ entries.push((entry_span, map_entry_kind_for_stmt(&stmt)));
184
+ stmts.push(stmt);
185
+ }
186
+ }
187
+ }
116
188
  }
117
189
  }
190
+
191
+ validate_map_entry_separators(&entries, source)?;
192
+
118
193
  let program = Program { stmts, span };
119
194
  validate_no_awk_syntax_for_map(&program, source)?;
120
- Ok(program)
195
+ Ok((program, begin_blocks, end_blocks))
121
196
  }
122
197
 
123
- fn validate_no_awk_syntax_for_map(program: &Program, source: &str) -> Result<(), ParseError> {
124
- for stmt in &program.stmts {
125
- validate_stmt_for_map(stmt, source)?;
198
+ fn parse_begin_end_block(
199
+ pair: pest::iterators::Pair<'_, Rule>,
200
+ source: &str,
201
+ label: &str,
202
+ ) -> Result<Vec<Stmt>, ParseError> {
203
+ let span = span_from_pair(&pair, source);
204
+ let mut inner = pair.into_inner();
205
+ let block_pair = inner
206
+ .next()
207
+ .ok_or_else(|| error_with_span(format!("missing {label} block"), span.clone(), source))?;
208
+ let block = parse_block(block_pair, source)?;
209
+ validate_block(&block, source)?;
210
+ Ok(block)
211
+ }
212
+
213
+ #[derive(Clone, Copy)]
214
+ enum MapEntryKind {
215
+ BeginEnd,
216
+ Simple,
217
+ Compound,
218
+ }
219
+
220
+ fn map_entry_kind_for_stmt(stmt: &Stmt) -> MapEntryKind {
221
+ match stmt {
222
+ Stmt::If { .. }
223
+ | Stmt::While { .. }
224
+ | Stmt::For { .. }
225
+ | Stmt::Def { .. }
226
+ | Stmt::Class { .. }
227
+ | Stmt::Try { .. }
228
+ | Stmt::With { .. } => MapEntryKind::Compound,
229
+ _ => MapEntryKind::Simple,
230
+ }
231
+ }
232
+
233
+ fn validate_map_entry_separators(
234
+ entries: &[(SourceSpan, MapEntryKind)],
235
+ source: &str,
236
+ ) -> Result<(), ParseError> {
237
+ for window in entries.windows(2) {
238
+ let (prev_span, prev_kind) = &window[0];
239
+ let (next_span, next_kind) = &window[1];
240
+ let gap = &source[prev_span.end.offset..next_span.start.offset];
241
+ let has_sep = gap.contains('\n') || gap.contains(';');
242
+ if !has_sep
243
+ && matches!(prev_kind, MapEntryKind::Simple)
244
+ && !matches!(next_kind, MapEntryKind::BeginEnd)
245
+ {
246
+ return Err(error_with_span(
247
+ "expected statement separator",
248
+ span_from_offset(next_span.start.offset, next_span.start.offset, source),
249
+ source,
250
+ ));
251
+ }
126
252
  }
127
253
  Ok(())
128
254
  }
@@ -1,13 +1,23 @@
1
1
  // Top-level program entry points
2
2
  program = { SOI ~ stmt_sep* ~ stmt_list? ~ stmt_sep* ~ EOI }
3
3
  awk_program = { SOI ~ stmt_sep* ~ awk_entry_list? ~ stmt_sep* ~ EOI }
4
+ map_program = { SOI ~ stmt_sep* ~ map_entry_list? ~ stmt_sep* ~ EOI }
4
5
 
5
6
  // AWK mode: pattern-action rules
6
7
  awk_entry_list = { awk_entry ~ (stmt_sep* ~ awk_entry)* ~ stmt_sep* }
7
- awk_entry = _{ awk_rule }
8
+ awk_entry = _{ awk_begin | awk_end | awk_rule }
9
+ awk_begin = { "BEGIN" ~ block }
10
+ awk_end = { "END" ~ block }
8
11
  awk_rule = { block | awk_pattern ~ block? }
9
12
  awk_pattern = { expr }
10
13
 
14
+ // Map mode: program with optional BEGIN/END blocks
15
+ map_entry_list = { map_entry ~ (stmt_sep* ~ map_entry)* ~ stmt_sep* }
16
+ map_entry = { map_begin_end | stmt }
17
+ map_begin_end = _{ map_begin | map_end }
18
+ map_begin = { "BEGIN" ~ block }
19
+ map_end = { "END" ~ block }
20
+
11
21
  // Statements: separated by semicolons or newlines
12
22
  // Compound statements (ending with }) don't need trailing separators
13
23
  // Simple statements need separators unless they're the last statement
@@ -261,6 +271,7 @@ keyword = _{
261
271
  | "return" | "break" | "continue" | "pass" | "raise" | "try" | "except"
262
272
  | "finally" | "with" | "assert" | "del" | "and" | "or" | "not" | "import" | "from" | "as"
263
273
  | "let"
274
+ | "BEGIN" | "END"
264
275
  | "True" | "False" | "None") ~ !ident_continue
265
276
  }
266
277
 
@@ -1,7 +1,10 @@
1
1
  mod common;
2
2
 
3
3
  use common::*;
4
- use snail_parser::{parse_awk_program, parse_awk_program_with_begin_end, parse_program};
4
+ use snail_parser::{
5
+ parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
6
+ parse_program,
7
+ };
5
8
 
6
9
  #[test]
7
10
  fn reports_parse_error_with_location() {
@@ -203,20 +206,12 @@ fn parser_rejects_invalid_parameter_syntax() {
203
206
  // ========== AWK Mode Parser Tests ==========
204
207
 
205
208
  #[test]
206
- fn awk_begin_end_parsed_as_patterns() {
207
- // BEGIN and END are no longer special keywords; they parse as regular pattern names
208
- // Use -b/-e CLI flags for begin/end blocks instead
209
- let program = parse_awk_program("BEGIN { print(1) }").expect("should parse");
209
+ fn awk_begin_end_parsed_as_blocks() {
210
+ let program = parse_awk_program("BEGIN { print(1) } /foo/ { print($0) } END { print(2) }")
211
+ .expect("should parse");
210
212
  assert_eq!(program.rules.len(), 1);
211
- assert!(program.begin_blocks.is_empty());
212
- // BEGIN is parsed as pattern (identifier), not a special begin block
213
- assert!(program.rules[0].pattern.is_some());
214
-
215
- let program = parse_awk_program("END { print(1) }").expect("should parse");
216
- assert_eq!(program.rules.len(), 1);
217
- assert!(program.end_blocks.is_empty());
218
- // END is parsed as pattern (identifier), not a special end block
219
- assert!(program.rules[0].pattern.is_some());
213
+ assert_eq!(program.begin_blocks.len(), 1);
214
+ assert_eq!(program.end_blocks.len(), 1);
220
215
  }
221
216
 
222
217
  #[test]
@@ -246,6 +241,36 @@ fn awk_with_empty_begin_end() {
246
241
  assert!(program.end_blocks.is_empty());
247
242
  }
248
243
 
244
+ #[test]
245
+ fn awk_begin_end_rejects_awk_vars() {
246
+ let err = parse_awk_program("BEGIN { print($0) }").expect_err("should reject awk vars");
247
+ assert!(err.to_string().contains("$0"));
248
+ }
249
+
250
+ #[test]
251
+ fn map_begin_end_parsed_as_blocks() {
252
+ let (program, begin_blocks, end_blocks) =
253
+ parse_map_program_with_begin_end("BEGIN { print(1) } print($src) END { print(2) }")
254
+ .expect("should parse");
255
+ assert_eq!(program.stmts.len(), 1);
256
+ assert_eq!(begin_blocks.len(), 1);
257
+ assert_eq!(end_blocks.len(), 1);
258
+ }
259
+
260
+ #[test]
261
+ fn map_begin_end_rejects_map_vars() {
262
+ let err = parse_map_program_with_begin_end("BEGIN { print($src) }\nprint($src)")
263
+ .expect_err("should reject map vars in BEGIN/END");
264
+ assert!(err.to_string().contains("$src"));
265
+ }
266
+
267
+ #[test]
268
+ fn map_requires_separators_between_simple_statements() {
269
+ let err = parse_map_program_with_begin_end("print($src) print($src)")
270
+ .expect_err("should reject missing separators");
271
+ assert!(err.to_string().contains("expected statement separator"));
272
+ }
273
+
249
274
  // ========== F-String Interpolation Tests ==========
250
275
 
251
276
  #[test]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-python"
3
- version = "0.6.1"
3
+ version = "0.6.3"
4
4
  edition.workspace = true
5
5
  build = "build.rs"
6
6
 
@@ -5,9 +5,10 @@ use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PySystemExit};
5
5
  use pyo3::prelude::*;
6
6
  use pyo3::types::{PyDict, PyList, PyModule, PyTuple};
7
7
  use snail_core::{
8
- CompileMode, ParseError, Program, compile_awk_source_with_begin_end,
8
+ CompileMode, ParseError, Program, Stmt, compile_awk_source_with_begin_end,
9
9
  compile_map_source_with_begin_end, compile_snail_source_with_auto_print, format_snail_error,
10
- parse_awk_program, parse_awk_program_with_begin_end, parse_map_program, parse_program,
10
+ parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
11
+ parse_program,
11
12
  };
12
13
  use std::sync::OnceLock;
13
14
  use std::time::Instant;
@@ -320,8 +321,8 @@ fn exec_py(
320
321
  #[derive(Debug)]
321
322
  struct MapAst {
322
323
  program: Program,
323
- begin_blocks: Vec<Program>,
324
- end_blocks: Vec<Program>,
324
+ begin_blocks: Vec<Vec<Stmt>>,
325
+ end_blocks: Vec<Vec<Stmt>>,
325
326
  }
326
327
 
327
328
  #[pyfunction(name = "parse_ast")]
@@ -351,24 +352,30 @@ fn parse_ast_py(
351
352
  Ok(format!("{:#?}", program))
352
353
  }
353
354
  CompileMode::Map => {
354
- let program = parse_map_program(source).map_err(err_to_syntax)?;
355
- if begin_code.is_empty() && end_code.is_empty() {
356
- return Ok(format!("{:#?}", program));
357
- }
358
- let mut begin_blocks = Vec::new();
355
+ let (program, mut begin_blocks, mut end_blocks) =
356
+ parse_map_program_with_begin_end(source).map_err(err_to_syntax)?;
357
+
358
+ let mut cli_begin_blocks = Vec::new();
359
359
  for source in &begin_code {
360
- let begin_program = parse_map_program(source).map_err(err_to_syntax)?;
360
+ let begin_program = parse_program(source).map_err(err_to_syntax)?;
361
361
  if !begin_program.stmts.is_empty() {
362
- begin_blocks.push(begin_program);
362
+ cli_begin_blocks.push(begin_program.stmts);
363
363
  }
364
364
  }
365
- let mut end_blocks = Vec::new();
365
+ cli_begin_blocks.extend(begin_blocks);
366
+ begin_blocks = cli_begin_blocks;
367
+
366
368
  for source in &end_code {
367
- let end_program = parse_map_program(source).map_err(err_to_syntax)?;
369
+ let end_program = parse_program(source).map_err(err_to_syntax)?;
368
370
  if !end_program.stmts.is_empty() {
369
- end_blocks.push(end_program);
371
+ end_blocks.push(end_program.stmts);
370
372
  }
371
373
  }
374
+
375
+ if begin_blocks.is_empty() && end_blocks.is_empty() {
376
+ return Ok(format!("{:#?}", program));
377
+ }
378
+
372
379
  let map_ast = MapAst {
373
380
  program,
374
381
  begin_blocks,
@@ -389,7 +396,7 @@ fn parse_py(source: &str, mode: &str, filename: &str) -> PyResult<()> {
389
396
  CompileMode::Awk => parse_awk_program(source)
390
397
  .map(|_| ())
391
398
  .map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
392
- CompileMode::Map => parse_map_program(source)
399
+ CompileMode::Map => parse_map_program_with_begin_end(source)
393
400
  .map(|_| ())
394
401
  .map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
395
402
  }
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "snail-lang"
7
- version = "0.6.1"
7
+ version = "0.6.3"
8
8
  description = "Snail programming language interpreter"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -187,13 +187,12 @@ def _parse_args(argv: list[str]) -> _Args:
187
187
  args.args = argv[idx + 1 :]
188
188
  return args
189
189
  if token == "-" or not token.startswith("-"):
190
- if code_found:
191
- # Already found code, rest are args
192
- args.args.extend(argv[idx:])
193
- return args
194
- # This is the code, continue parsing for -b/--begin and -e/--end after
195
- args.args = [token]
196
- code_found = True
190
+ if not code_found:
191
+ # This is the code (or the first arg when -f is used)
192
+ args.args = [token]
193
+ code_found = True
194
+ else:
195
+ args.args.append(token)
197
196
  idx += 1
198
197
  continue
199
198
  if token in ("-h", "--help"):
@@ -231,6 +230,7 @@ def _parse_args(argv: list[str]) -> _Args:
231
230
  if idx + 1 >= len(argv):
232
231
  raise ValueError("option -f requires an argument")
233
232
  args.file = argv[idx + 1]
233
+ code_found = True
234
234
  idx += 2
235
235
  continue
236
236
  if token in ("-b", "--begin"):
@@ -42,6 +42,7 @@ _subprocess_status = None
42
42
  _jmespath_query = None
43
43
  _js = None
44
44
  _lazy_text_class = None
45
+ _lazy_file_class = None
45
46
  _incr_attr = None
46
47
  _incr_index = None
47
48
  _aug_attr = None
@@ -120,6 +121,15 @@ def _get_lazy_text_class():
120
121
  return _lazy_text_class
121
122
 
122
123
 
124
+ def _get_lazy_file_class():
125
+ global _lazy_file_class
126
+ if _lazy_file_class is None:
127
+ from .lazy_file import LazyFile
128
+
129
+ _lazy_file_class = LazyFile
130
+ return _lazy_file_class
131
+
132
+
123
133
  def _get_incr_attr():
124
134
  global _incr_attr
125
135
  if _incr_attr is None:
@@ -236,3 +246,4 @@ def install_helpers(globals_dict: dict) -> None:
236
246
  globals_dict["__snail_aug_index"] = _lazy_aug_index
237
247
  globals_dict["js"] = _lazy_js
238
248
  globals_dict["__SnailLazyText"] = _get_lazy_text_class()
249
+ globals_dict["__SnailLazyFile"] = _get_lazy_file_class()
@@ -0,0 +1,41 @@
1
+ """Lazy file opener for map mode."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class LazyFile:
7
+ """Context manager that opens the file on first access."""
8
+
9
+ __slots__ = ("_path", "_mode", "_kwargs", "_fd", "_closed")
10
+
11
+ def __init__(self, path, mode="r", **kwargs):
12
+ self._path = path
13
+ self._mode = mode
14
+ self._kwargs = kwargs
15
+ self._fd = None
16
+ self._closed = False
17
+
18
+ def _ensure_open(self):
19
+ if self._closed:
20
+ raise ValueError("I/O operation on closed file.")
21
+ if self._fd is None:
22
+ self._fd = open(self._path, self._mode, **self._kwargs)
23
+ return self._fd
24
+
25
+ def __enter__(self):
26
+ return self
27
+
28
+ def __exit__(self, exc_type, exc, tb):
29
+ self._closed = True
30
+ if self._fd is not None:
31
+ self._fd.close()
32
+ return False
33
+
34
+ def __getattr__(self, name):
35
+ return getattr(self._ensure_open(), name)
36
+
37
+ def __iter__(self):
38
+ return iter(self._ensure_open())
39
+
40
+ def __next__(self):
41
+ return next(self._ensure_open())
File without changes
File without changes