snail-lang 0.6.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snail_lang-0.6.1 → snail_lang-0.6.2}/Cargo.lock +7 -7
- {snail_lang-0.6.1 → snail_lang-0.6.2}/PKG-INFO +15 -3
- {snail_lang-0.6.1 → snail_lang-0.6.2}/README.md +14 -2
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/src/lib.rs +22 -10
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/lib.rs +146 -20
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/snail.pest +12 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/errors.rs +39 -14
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/Cargo.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/src/lib.rs +22 -15
- {snail_lang-0.6.1 → snail_lang-0.6.2}/pyproject.toml +1 -1
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/cli.py +7 -7
- {snail_lang-0.6.1 → snail_lang-0.6.2}/Cargo.toml +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/LICENSE +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/README.md +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/ast.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/awk.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-ast/src/lib.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-core/README.md +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/README.md +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-error/src/lib.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/README.md +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/awk.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/constants.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/expr.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/helpers.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/lib.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/map.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/operators.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/program.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/py_ast.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-lower/src/stmt.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/README.md +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/awk.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/expr.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/literal.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/stmt.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/string.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/src/util.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/common.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/parser.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/statements.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/syntax_expressions.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-parser/tests/syntax_strings.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/crates/snail-python/build.rs +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/__init__.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/__init__.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/augmented.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/compact_try.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/lazy_text.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/regex.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/structured_accessor.py +0 -0
- {snail_lang-0.6.1 → snail_lang-0.6.2}/python/snail/runtime/subprocess.py +0 -0
|
@@ -485,11 +485,11 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
|
|
485
485
|
|
|
486
486
|
[[package]]
|
|
487
487
|
name = "snail-ast"
|
|
488
|
-
version = "0.6.
|
|
488
|
+
version = "0.6.2"
|
|
489
489
|
|
|
490
490
|
[[package]]
|
|
491
491
|
name = "snail-core"
|
|
492
|
-
version = "0.6.
|
|
492
|
+
version = "0.6.2"
|
|
493
493
|
dependencies = [
|
|
494
494
|
"pyo3",
|
|
495
495
|
"snail-ast",
|
|
@@ -500,14 +500,14 @@ dependencies = [
|
|
|
500
500
|
|
|
501
501
|
[[package]]
|
|
502
502
|
name = "snail-error"
|
|
503
|
-
version = "0.6.
|
|
503
|
+
version = "0.6.2"
|
|
504
504
|
dependencies = [
|
|
505
505
|
"snail-ast",
|
|
506
506
|
]
|
|
507
507
|
|
|
508
508
|
[[package]]
|
|
509
509
|
name = "snail-lower"
|
|
510
|
-
version = "0.6.
|
|
510
|
+
version = "0.6.2"
|
|
511
511
|
dependencies = [
|
|
512
512
|
"pyo3",
|
|
513
513
|
"snail-ast",
|
|
@@ -516,7 +516,7 @@ dependencies = [
|
|
|
516
516
|
|
|
517
517
|
[[package]]
|
|
518
518
|
name = "snail-parser"
|
|
519
|
-
version = "0.6.
|
|
519
|
+
version = "0.6.2"
|
|
520
520
|
dependencies = [
|
|
521
521
|
"pest",
|
|
522
522
|
"pest_derive",
|
|
@@ -526,7 +526,7 @@ dependencies = [
|
|
|
526
526
|
|
|
527
527
|
[[package]]
|
|
528
528
|
name = "snail-proptest"
|
|
529
|
-
version = "0.6.
|
|
529
|
+
version = "0.6.2"
|
|
530
530
|
dependencies = [
|
|
531
531
|
"proptest",
|
|
532
532
|
"pyo3",
|
|
@@ -540,7 +540,7 @@ dependencies = [
|
|
|
540
540
|
|
|
541
541
|
[[package]]
|
|
542
542
|
name = "snail-python"
|
|
543
|
-
version = "0.6.
|
|
543
|
+
version = "0.6.2"
|
|
544
544
|
dependencies = [
|
|
545
545
|
"pyo3",
|
|
546
546
|
"snail-core",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snail-lang
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Requires-Dist: jmespath>=1.0.1
|
|
5
5
|
Requires-Dist: maturin>=1.5 ; extra == 'dev'
|
|
6
6
|
Requires-Dist: pytest ; extra == 'dev'
|
|
@@ -57,8 +57,10 @@ semicolons are optional. You can separate statements with newlines.
|
|
|
57
57
|
Process files line-by-line with familiar awk semantics:
|
|
58
58
|
|
|
59
59
|
```snail-awk("hello world\nfoo bar\n")
|
|
60
|
+
BEGIN { print("start") }
|
|
60
61
|
/hello/ { print("matched:", $0) }
|
|
61
62
|
{ print($1, "->", $2) }
|
|
63
|
+
END { print("done") }
|
|
62
64
|
```
|
|
63
65
|
|
|
64
66
|
**Built-in variables:**
|
|
@@ -73,7 +75,11 @@ Process files line-by-line with familiar awk semantics:
|
|
|
73
75
|
| `$p` | Current file path |
|
|
74
76
|
| `$m` | Last regex match object |
|
|
75
77
|
|
|
76
|
-
Begin/end blocks
|
|
78
|
+
Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
|
|
79
|
+
via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
|
|
80
|
+
before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
|
|
81
|
+
`BEGIN` and `END` are reserved keywords in all modes.
|
|
82
|
+
BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
|
|
77
83
|
```bash
|
|
78
84
|
echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
|
|
79
85
|
```
|
|
@@ -83,8 +89,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
|
|
|
83
89
|
Process files one at a time instead of line-by-line:
|
|
84
90
|
|
|
85
91
|
```snail-map
|
|
92
|
+
BEGIN { print("start") }
|
|
86
93
|
print("File:", $src)
|
|
87
94
|
print("Size:", len($text), "bytes")
|
|
95
|
+
END { print("done") }
|
|
88
96
|
```
|
|
89
97
|
|
|
90
98
|
**Built-in variables:**
|
|
@@ -95,7 +103,11 @@ print("Size:", len($text), "bytes")
|
|
|
95
103
|
| `$fd` | Open file handle for the current file |
|
|
96
104
|
| `$text` | Lazy text view of the current file contents |
|
|
97
105
|
|
|
98
|
-
Begin/end blocks
|
|
106
|
+
Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
|
|
107
|
+
via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
|
|
108
|
+
before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
|
|
109
|
+
BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
|
|
110
|
+
`BEGIN` and `END` are reserved keywords in all modes.
|
|
99
111
|
```bash
|
|
100
112
|
snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
|
|
101
113
|
```
|
|
@@ -45,8 +45,10 @@ semicolons are optional. You can separate statements with newlines.
|
|
|
45
45
|
Process files line-by-line with familiar awk semantics:
|
|
46
46
|
|
|
47
47
|
```snail-awk("hello world\nfoo bar\n")
|
|
48
|
+
BEGIN { print("start") }
|
|
48
49
|
/hello/ { print("matched:", $0) }
|
|
49
50
|
{ print($1, "->", $2) }
|
|
51
|
+
END { print("done") }
|
|
50
52
|
```
|
|
51
53
|
|
|
52
54
|
**Built-in variables:**
|
|
@@ -61,7 +63,11 @@ Process files line-by-line with familiar awk semantics:
|
|
|
61
63
|
| `$p` | Current file path |
|
|
62
64
|
| `$m` | Last regex match object |
|
|
63
65
|
|
|
64
|
-
Begin/end blocks
|
|
66
|
+
Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
|
|
67
|
+
via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
|
|
68
|
+
before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
|
|
69
|
+
`BEGIN` and `END` are reserved keywords in all modes.
|
|
70
|
+
BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
|
|
65
71
|
```bash
|
|
66
72
|
echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
|
|
67
73
|
```
|
|
@@ -71,8 +77,10 @@ echo -e "5\n4\n3\n2\n1" | snail --awk --begin 'total = 0' --end 'print("Sum:", t
|
|
|
71
77
|
Process files one at a time instead of line-by-line:
|
|
72
78
|
|
|
73
79
|
```snail-map
|
|
80
|
+
BEGIN { print("start") }
|
|
74
81
|
print("File:", $src)
|
|
75
82
|
print("Size:", len($text), "bytes")
|
|
83
|
+
END { print("done") }
|
|
76
84
|
```
|
|
77
85
|
|
|
78
86
|
**Built-in variables:**
|
|
@@ -83,7 +91,11 @@ print("Size:", len($text), "bytes")
|
|
|
83
91
|
| `$fd` | Open file handle for the current file |
|
|
84
92
|
| `$text` | Lazy text view of the current file contents |
|
|
85
93
|
|
|
86
|
-
Begin/end blocks
|
|
94
|
+
Begin/end blocks can live in the source file (`BEGIN { ... }` / `END { ... }`) or be supplied
|
|
95
|
+
via CLI flags (`-b`/`--begin`, `-e`/`--end`) for setup and teardown. CLI BEGIN blocks run
|
|
96
|
+
before in-file BEGIN blocks; CLI END blocks run after in-file END blocks.
|
|
97
|
+
BEGIN/END blocks are regular Snail blocks, so awk/map-only `$` variables are not available inside them.
|
|
98
|
+
`BEGIN` and `END` are reserved keywords in all modes.
|
|
87
99
|
```bash
|
|
88
100
|
snail --map --begin "print('start')" --end "print('done')" "print($src)" *.txt
|
|
89
101
|
```
|
|
@@ -33,15 +33,22 @@ pub fn compile_snail_source_with_auto_print(
|
|
|
33
33
|
Ok(module)
|
|
34
34
|
}
|
|
35
35
|
CompileMode::Map => {
|
|
36
|
-
let program =
|
|
37
|
-
let module =
|
|
36
|
+
let (program, begin_blocks, end_blocks) = parse_map_program_with_begin_end(source)?;
|
|
37
|
+
let module = lower_map_program_with_begin_end(
|
|
38
|
+
py,
|
|
39
|
+
&program,
|
|
40
|
+
&begin_blocks,
|
|
41
|
+
&end_blocks,
|
|
42
|
+
auto_print_last,
|
|
43
|
+
)?;
|
|
38
44
|
Ok(module)
|
|
39
45
|
}
|
|
40
46
|
}
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
/// Compile an awk program with separate begin and end code blocks.
|
|
44
|
-
///
|
|
50
|
+
/// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
|
|
51
|
+
/// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
|
|
45
52
|
pub fn compile_awk_source_with_begin_end(
|
|
46
53
|
py: Python<'_>,
|
|
47
54
|
main_source: &str,
|
|
@@ -55,7 +62,8 @@ pub fn compile_awk_source_with_begin_end(
|
|
|
55
62
|
}
|
|
56
63
|
|
|
57
64
|
/// Compile a map program with separate begin and end code blocks.
|
|
58
|
-
///
|
|
65
|
+
/// In-file BEGIN/END blocks are merged with CLI blocks so CLI BEGIN runs first
|
|
66
|
+
/// and CLI END runs last. CLI blocks are parsed as regular Snail programs.
|
|
59
67
|
pub fn compile_map_source_with_begin_end(
|
|
60
68
|
py: Python<'_>,
|
|
61
69
|
main_source: &str,
|
|
@@ -63,17 +71,21 @@ pub fn compile_map_source_with_begin_end(
|
|
|
63
71
|
end_sources: &[&str],
|
|
64
72
|
auto_print_last: bool,
|
|
65
73
|
) -> Result<PyObject, SnailError> {
|
|
66
|
-
let program
|
|
67
|
-
|
|
74
|
+
let (program, mut begin_blocks, mut end_blocks) =
|
|
75
|
+
parse_map_program_with_begin_end(main_source)?;
|
|
76
|
+
|
|
77
|
+
let mut cli_begin_blocks = Vec::new();
|
|
68
78
|
for source in begin_sources {
|
|
69
|
-
let begin_program =
|
|
79
|
+
let begin_program = parse_program(source)?;
|
|
70
80
|
if !begin_program.stmts.is_empty() {
|
|
71
|
-
|
|
81
|
+
cli_begin_blocks.push(begin_program.stmts);
|
|
72
82
|
}
|
|
73
83
|
}
|
|
74
|
-
|
|
84
|
+
cli_begin_blocks.extend(begin_blocks);
|
|
85
|
+
begin_blocks = cli_begin_blocks;
|
|
86
|
+
|
|
75
87
|
for source in end_sources {
|
|
76
|
-
let end_program =
|
|
88
|
+
let end_program = parse_program(source)?;
|
|
77
89
|
if !end_program.stmts.is_empty() {
|
|
78
90
|
end_blocks.push(end_program.stmts);
|
|
79
91
|
}
|
|
@@ -12,13 +12,15 @@ mod string;
|
|
|
12
12
|
mod util;
|
|
13
13
|
|
|
14
14
|
use awk::parse_awk_rule;
|
|
15
|
-
use stmt::parse_stmt_list;
|
|
16
|
-
use util::{error_with_span, full_span, parse_error_from_pest};
|
|
15
|
+
use stmt::{parse_block, parse_stmt, parse_stmt_list};
|
|
16
|
+
use util::{error_with_span, full_span, parse_error_from_pest, span_from_offset, span_from_pair};
|
|
17
17
|
|
|
18
18
|
#[derive(Parser)]
|
|
19
19
|
#[grammar = "snail.pest"]
|
|
20
20
|
pub struct SnailParser;
|
|
21
21
|
|
|
22
|
+
pub type MapProgramWithBeginEnd = (Program, Vec<Vec<Stmt>>, Vec<Vec<Stmt>>);
|
|
23
|
+
|
|
22
24
|
pub fn parse_program(source: &str) -> Result<Program, ParseError> {
|
|
23
25
|
let mut pairs = SnailParser::parse(Rule::program, source)
|
|
24
26
|
.map_err(|err| parse_error_from_pest(err, source))?;
|
|
@@ -45,29 +47,44 @@ pub fn parse_awk_program(source: &str) -> Result<AwkProgram, ParseError> {
|
|
|
45
47
|
.ok_or_else(|| ParseError::new("missing awk program root"))?;
|
|
46
48
|
let span = full_span(source);
|
|
47
49
|
|
|
50
|
+
let mut begin_blocks = Vec::new();
|
|
48
51
|
let mut rules = Vec::new();
|
|
52
|
+
let mut end_blocks = Vec::new();
|
|
49
53
|
|
|
50
54
|
for inner in pair.into_inner() {
|
|
51
55
|
if inner.as_rule() == Rule::awk_entry_list {
|
|
52
56
|
for entry in inner.into_inner() {
|
|
53
|
-
|
|
54
|
-
|
|
57
|
+
match entry.as_rule() {
|
|
58
|
+
Rule::awk_begin => {
|
|
59
|
+
let block = parse_begin_end_block(entry, source, "BEGIN")?;
|
|
60
|
+
if !block.is_empty() {
|
|
61
|
+
begin_blocks.push(block);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
Rule::awk_end => {
|
|
65
|
+
let block = parse_begin_end_block(entry, source, "END")?;
|
|
66
|
+
if !block.is_empty() {
|
|
67
|
+
end_blocks.push(block);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
Rule::awk_rule => rules.push(parse_awk_rule(entry, source)?),
|
|
71
|
+
_ => {}
|
|
55
72
|
}
|
|
56
73
|
}
|
|
57
74
|
}
|
|
58
75
|
}
|
|
59
76
|
|
|
60
77
|
Ok(AwkProgram {
|
|
61
|
-
begin_blocks
|
|
78
|
+
begin_blocks,
|
|
62
79
|
rules,
|
|
63
|
-
end_blocks
|
|
80
|
+
end_blocks,
|
|
64
81
|
span,
|
|
65
82
|
})
|
|
66
83
|
}
|
|
67
84
|
|
|
68
85
|
/// Parses an awk program with separate begin and end code sources.
|
|
69
|
-
/// Each begin/end source is parsed as a regular Snail program and
|
|
70
|
-
///
|
|
86
|
+
/// Each begin/end source is parsed as a regular Snail program and merged so CLI BEGIN
|
|
87
|
+
/// blocks run before in-file BEGIN blocks, and CLI END blocks run after in-file END blocks.
|
|
71
88
|
pub fn parse_awk_program_with_begin_end(
|
|
72
89
|
main_source: &str,
|
|
73
90
|
begin_sources: &[&str],
|
|
@@ -75,21 +92,24 @@ pub fn parse_awk_program_with_begin_end(
|
|
|
75
92
|
) -> Result<AwkProgram, ParseError> {
|
|
76
93
|
let mut program = parse_awk_program(main_source)?;
|
|
77
94
|
|
|
78
|
-
|
|
95
|
+
let mut cli_begin_blocks = Vec::new();
|
|
79
96
|
for source in begin_sources {
|
|
80
97
|
let begin_program = parse_program(source)?;
|
|
81
98
|
if !begin_program.stmts.is_empty() {
|
|
82
|
-
|
|
99
|
+
cli_begin_blocks.push(begin_program.stmts);
|
|
83
100
|
}
|
|
84
101
|
}
|
|
102
|
+
cli_begin_blocks.extend(program.begin_blocks);
|
|
103
|
+
program.begin_blocks = cli_begin_blocks;
|
|
85
104
|
|
|
86
|
-
|
|
105
|
+
let mut end_blocks = program.end_blocks;
|
|
87
106
|
for source in end_sources {
|
|
88
107
|
let end_program = parse_program(source)?;
|
|
89
108
|
if !end_program.stmts.is_empty() {
|
|
90
|
-
|
|
109
|
+
end_blocks.push(end_program.stmts);
|
|
91
110
|
}
|
|
92
111
|
}
|
|
112
|
+
program.end_blocks = end_blocks;
|
|
93
113
|
|
|
94
114
|
Ok(program)
|
|
95
115
|
}
|
|
@@ -102,27 +122,133 @@ const MAP_ONLY_MESSAGE: &str = "map variables are only valid in map mode; use --
|
|
|
102
122
|
|
|
103
123
|
/// Parses a map program that processes files one at a time.
|
|
104
124
|
/// Allows map variables ($src, $fd, $text) but rejects awk variables.
|
|
125
|
+
/// In-file BEGIN/END blocks are validated but not returned; use
|
|
126
|
+
/// `parse_map_program_with_begin_end` to access them.
|
|
105
127
|
pub fn parse_map_program(source: &str) -> Result<Program, ParseError> {
|
|
106
|
-
let
|
|
128
|
+
let (program, _, _) = parse_map_program_with_begin_end(source)?;
|
|
129
|
+
Ok(program)
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
fn validate_no_awk_syntax_for_map(program: &Program, source: &str) -> Result<(), ParseError> {
|
|
133
|
+
for stmt in &program.stmts {
|
|
134
|
+
validate_stmt_for_map(stmt, source)?;
|
|
135
|
+
}
|
|
136
|
+
Ok(())
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/// Parses a map program with in-file BEGIN/END blocks.
|
|
140
|
+
/// BEGIN/END blocks are parsed as regular Snail statement blocks (no map/awk vars).
|
|
141
|
+
pub fn parse_map_program_with_begin_end(
|
|
142
|
+
source: &str,
|
|
143
|
+
) -> Result<MapProgramWithBeginEnd, ParseError> {
|
|
144
|
+
let mut pairs = SnailParser::parse(Rule::map_program, source)
|
|
107
145
|
.map_err(|err| parse_error_from_pest(err, source))?;
|
|
108
146
|
let pair = pairs
|
|
109
147
|
.next()
|
|
110
|
-
.ok_or_else(|| ParseError::new("missing program root"))?;
|
|
148
|
+
.ok_or_else(|| ParseError::new("missing map program root"))?;
|
|
111
149
|
let span = full_span(source);
|
|
112
150
|
let mut stmts = Vec::new();
|
|
151
|
+
let mut begin_blocks = Vec::new();
|
|
152
|
+
let mut end_blocks = Vec::new();
|
|
153
|
+
let mut entries = Vec::new();
|
|
154
|
+
|
|
113
155
|
for inner in pair.into_inner() {
|
|
114
|
-
if inner.as_rule() == Rule::
|
|
115
|
-
|
|
156
|
+
if inner.as_rule() == Rule::map_entry_list {
|
|
157
|
+
for entry in inner.into_inner() {
|
|
158
|
+
if entry.as_rule() != Rule::map_entry {
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
let entry_span = span_from_pair(&entry, source);
|
|
162
|
+
let mut entry_inner = entry.into_inner();
|
|
163
|
+
let entry_pair = entry_inner.next().ok_or_else(|| {
|
|
164
|
+
error_with_span("missing map entry", entry_span.clone(), source)
|
|
165
|
+
})?;
|
|
166
|
+
match entry_pair.as_rule() {
|
|
167
|
+
Rule::map_begin => {
|
|
168
|
+
let block = parse_begin_end_block(entry_pair, source, "BEGIN")?;
|
|
169
|
+
if !block.is_empty() {
|
|
170
|
+
begin_blocks.push(block);
|
|
171
|
+
}
|
|
172
|
+
entries.push((entry_span, MapEntryKind::BeginEnd));
|
|
173
|
+
}
|
|
174
|
+
Rule::map_end => {
|
|
175
|
+
let block = parse_begin_end_block(entry_pair, source, "END")?;
|
|
176
|
+
if !block.is_empty() {
|
|
177
|
+
end_blocks.push(block);
|
|
178
|
+
}
|
|
179
|
+
entries.push((entry_span, MapEntryKind::BeginEnd));
|
|
180
|
+
}
|
|
181
|
+
_ => {
|
|
182
|
+
let stmt = parse_stmt(entry_pair, source)?;
|
|
183
|
+
entries.push((entry_span, map_entry_kind_for_stmt(&stmt)));
|
|
184
|
+
stmts.push(stmt);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
116
188
|
}
|
|
117
189
|
}
|
|
190
|
+
|
|
191
|
+
validate_map_entry_separators(&entries, source)?;
|
|
192
|
+
|
|
118
193
|
let program = Program { stmts, span };
|
|
119
194
|
validate_no_awk_syntax_for_map(&program, source)?;
|
|
120
|
-
Ok(program)
|
|
195
|
+
Ok((program, begin_blocks, end_blocks))
|
|
121
196
|
}
|
|
122
197
|
|
|
123
|
-
fn
|
|
124
|
-
|
|
125
|
-
|
|
198
|
+
fn parse_begin_end_block(
|
|
199
|
+
pair: pest::iterators::Pair<'_, Rule>,
|
|
200
|
+
source: &str,
|
|
201
|
+
label: &str,
|
|
202
|
+
) -> Result<Vec<Stmt>, ParseError> {
|
|
203
|
+
let span = span_from_pair(&pair, source);
|
|
204
|
+
let mut inner = pair.into_inner();
|
|
205
|
+
let block_pair = inner
|
|
206
|
+
.next()
|
|
207
|
+
.ok_or_else(|| error_with_span(format!("missing {label} block"), span.clone(), source))?;
|
|
208
|
+
let block = parse_block(block_pair, source)?;
|
|
209
|
+
validate_block(&block, source)?;
|
|
210
|
+
Ok(block)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
#[derive(Clone, Copy)]
|
|
214
|
+
enum MapEntryKind {
|
|
215
|
+
BeginEnd,
|
|
216
|
+
Simple,
|
|
217
|
+
Compound,
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
fn map_entry_kind_for_stmt(stmt: &Stmt) -> MapEntryKind {
|
|
221
|
+
match stmt {
|
|
222
|
+
Stmt::If { .. }
|
|
223
|
+
| Stmt::While { .. }
|
|
224
|
+
| Stmt::For { .. }
|
|
225
|
+
| Stmt::Def { .. }
|
|
226
|
+
| Stmt::Class { .. }
|
|
227
|
+
| Stmt::Try { .. }
|
|
228
|
+
| Stmt::With { .. } => MapEntryKind::Compound,
|
|
229
|
+
_ => MapEntryKind::Simple,
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
fn validate_map_entry_separators(
|
|
234
|
+
entries: &[(SourceSpan, MapEntryKind)],
|
|
235
|
+
source: &str,
|
|
236
|
+
) -> Result<(), ParseError> {
|
|
237
|
+
for window in entries.windows(2) {
|
|
238
|
+
let (prev_span, prev_kind) = &window[0];
|
|
239
|
+
let (next_span, next_kind) = &window[1];
|
|
240
|
+
let gap = &source[prev_span.end.offset..next_span.start.offset];
|
|
241
|
+
let has_sep = gap.contains('\n') || gap.contains(';');
|
|
242
|
+
if !has_sep
|
|
243
|
+
&& matches!(prev_kind, MapEntryKind::Simple)
|
|
244
|
+
&& !matches!(next_kind, MapEntryKind::BeginEnd)
|
|
245
|
+
{
|
|
246
|
+
return Err(error_with_span(
|
|
247
|
+
"expected statement separator",
|
|
248
|
+
span_from_offset(next_span.start.offset, next_span.start.offset, source),
|
|
249
|
+
source,
|
|
250
|
+
));
|
|
251
|
+
}
|
|
126
252
|
}
|
|
127
253
|
Ok(())
|
|
128
254
|
}
|
|
@@ -1,13 +1,23 @@
|
|
|
1
1
|
// Top-level program entry points
|
|
2
2
|
program = { SOI ~ stmt_sep* ~ stmt_list? ~ stmt_sep* ~ EOI }
|
|
3
3
|
awk_program = { SOI ~ stmt_sep* ~ awk_entry_list? ~ stmt_sep* ~ EOI }
|
|
4
|
+
map_program = { SOI ~ stmt_sep* ~ map_entry_list? ~ stmt_sep* ~ EOI }
|
|
4
5
|
|
|
5
6
|
// AWK mode: pattern-action rules
|
|
6
7
|
awk_entry_list = { awk_entry ~ (stmt_sep* ~ awk_entry)* ~ stmt_sep* }
|
|
7
|
-
awk_entry = _{ awk_rule }
|
|
8
|
+
awk_entry = _{ awk_begin | awk_end | awk_rule }
|
|
9
|
+
awk_begin = { "BEGIN" ~ block }
|
|
10
|
+
awk_end = { "END" ~ block }
|
|
8
11
|
awk_rule = { block | awk_pattern ~ block? }
|
|
9
12
|
awk_pattern = { expr }
|
|
10
13
|
|
|
14
|
+
// Map mode: program with optional BEGIN/END blocks
|
|
15
|
+
map_entry_list = { map_entry ~ (stmt_sep* ~ map_entry)* ~ stmt_sep* }
|
|
16
|
+
map_entry = { map_begin_end | stmt }
|
|
17
|
+
map_begin_end = _{ map_begin | map_end }
|
|
18
|
+
map_begin = { "BEGIN" ~ block }
|
|
19
|
+
map_end = { "END" ~ block }
|
|
20
|
+
|
|
11
21
|
// Statements: separated by semicolons or newlines
|
|
12
22
|
// Compound statements (ending with }) don't need trailing separators
|
|
13
23
|
// Simple statements need separators unless they're the last statement
|
|
@@ -261,6 +271,7 @@ keyword = _{
|
|
|
261
271
|
| "return" | "break" | "continue" | "pass" | "raise" | "try" | "except"
|
|
262
272
|
| "finally" | "with" | "assert" | "del" | "and" | "or" | "not" | "import" | "from" | "as"
|
|
263
273
|
| "let"
|
|
274
|
+
| "BEGIN" | "END"
|
|
264
275
|
| "True" | "False" | "None") ~ !ident_continue
|
|
265
276
|
}
|
|
266
277
|
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
mod common;
|
|
2
2
|
|
|
3
3
|
use common::*;
|
|
4
|
-
use snail_parser::{
|
|
4
|
+
use snail_parser::{
|
|
5
|
+
parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
|
|
6
|
+
parse_program,
|
|
7
|
+
};
|
|
5
8
|
|
|
6
9
|
#[test]
|
|
7
10
|
fn reports_parse_error_with_location() {
|
|
@@ -203,20 +206,12 @@ fn parser_rejects_invalid_parameter_syntax() {
|
|
|
203
206
|
// ========== AWK Mode Parser Tests ==========
|
|
204
207
|
|
|
205
208
|
#[test]
|
|
206
|
-
fn
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
let program = parse_awk_program("BEGIN { print(1) }").expect("should parse");
|
|
209
|
+
fn awk_begin_end_parsed_as_blocks() {
|
|
210
|
+
let program = parse_awk_program("BEGIN { print(1) } /foo/ { print($0) } END { print(2) }")
|
|
211
|
+
.expect("should parse");
|
|
210
212
|
assert_eq!(program.rules.len(), 1);
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
assert!(program.rules[0].pattern.is_some());
|
|
214
|
-
|
|
215
|
-
let program = parse_awk_program("END { print(1) }").expect("should parse");
|
|
216
|
-
assert_eq!(program.rules.len(), 1);
|
|
217
|
-
assert!(program.end_blocks.is_empty());
|
|
218
|
-
// END is parsed as pattern (identifier), not a special end block
|
|
219
|
-
assert!(program.rules[0].pattern.is_some());
|
|
213
|
+
assert_eq!(program.begin_blocks.len(), 1);
|
|
214
|
+
assert_eq!(program.end_blocks.len(), 1);
|
|
220
215
|
}
|
|
221
216
|
|
|
222
217
|
#[test]
|
|
@@ -246,6 +241,36 @@ fn awk_with_empty_begin_end() {
|
|
|
246
241
|
assert!(program.end_blocks.is_empty());
|
|
247
242
|
}
|
|
248
243
|
|
|
244
|
+
#[test]
|
|
245
|
+
fn awk_begin_end_rejects_awk_vars() {
|
|
246
|
+
let err = parse_awk_program("BEGIN { print($0) }").expect_err("should reject awk vars");
|
|
247
|
+
assert!(err.to_string().contains("$0"));
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
#[test]
|
|
251
|
+
fn map_begin_end_parsed_as_blocks() {
|
|
252
|
+
let (program, begin_blocks, end_blocks) =
|
|
253
|
+
parse_map_program_with_begin_end("BEGIN { print(1) } print($src) END { print(2) }")
|
|
254
|
+
.expect("should parse");
|
|
255
|
+
assert_eq!(program.stmts.len(), 1);
|
|
256
|
+
assert_eq!(begin_blocks.len(), 1);
|
|
257
|
+
assert_eq!(end_blocks.len(), 1);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
#[test]
|
|
261
|
+
fn map_begin_end_rejects_map_vars() {
|
|
262
|
+
let err = parse_map_program_with_begin_end("BEGIN { print($src) }\nprint($src)")
|
|
263
|
+
.expect_err("should reject map vars in BEGIN/END");
|
|
264
|
+
assert!(err.to_string().contains("$src"));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
#[test]
|
|
268
|
+
fn map_requires_separators_between_simple_statements() {
|
|
269
|
+
let err = parse_map_program_with_begin_end("print($src) print($src)")
|
|
270
|
+
.expect_err("should reject missing separators");
|
|
271
|
+
assert!(err.to_string().contains("expected statement separator"));
|
|
272
|
+
}
|
|
273
|
+
|
|
249
274
|
// ========== F-String Interpolation Tests ==========
|
|
250
275
|
|
|
251
276
|
#[test]
|
|
@@ -5,9 +5,10 @@ use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PySystemExit};
|
|
|
5
5
|
use pyo3::prelude::*;
|
|
6
6
|
use pyo3::types::{PyDict, PyList, PyModule, PyTuple};
|
|
7
7
|
use snail_core::{
|
|
8
|
-
CompileMode, ParseError, Program, compile_awk_source_with_begin_end,
|
|
8
|
+
CompileMode, ParseError, Program, Stmt, compile_awk_source_with_begin_end,
|
|
9
9
|
compile_map_source_with_begin_end, compile_snail_source_with_auto_print, format_snail_error,
|
|
10
|
-
parse_awk_program, parse_awk_program_with_begin_end,
|
|
10
|
+
parse_awk_program, parse_awk_program_with_begin_end, parse_map_program_with_begin_end,
|
|
11
|
+
parse_program,
|
|
11
12
|
};
|
|
12
13
|
use std::sync::OnceLock;
|
|
13
14
|
use std::time::Instant;
|
|
@@ -320,8 +321,8 @@ fn exec_py(
|
|
|
320
321
|
#[derive(Debug)]
|
|
321
322
|
struct MapAst {
|
|
322
323
|
program: Program,
|
|
323
|
-
begin_blocks: Vec<
|
|
324
|
-
end_blocks: Vec<
|
|
324
|
+
begin_blocks: Vec<Vec<Stmt>>,
|
|
325
|
+
end_blocks: Vec<Vec<Stmt>>,
|
|
325
326
|
}
|
|
326
327
|
|
|
327
328
|
#[pyfunction(name = "parse_ast")]
|
|
@@ -351,24 +352,30 @@ fn parse_ast_py(
|
|
|
351
352
|
Ok(format!("{:#?}", program))
|
|
352
353
|
}
|
|
353
354
|
CompileMode::Map => {
|
|
354
|
-
let program
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
let mut begin_blocks = Vec::new();
|
|
355
|
+
let (program, mut begin_blocks, mut end_blocks) =
|
|
356
|
+
parse_map_program_with_begin_end(source).map_err(err_to_syntax)?;
|
|
357
|
+
|
|
358
|
+
let mut cli_begin_blocks = Vec::new();
|
|
359
359
|
for source in &begin_code {
|
|
360
|
-
let begin_program =
|
|
360
|
+
let begin_program = parse_program(source).map_err(err_to_syntax)?;
|
|
361
361
|
if !begin_program.stmts.is_empty() {
|
|
362
|
-
|
|
362
|
+
cli_begin_blocks.push(begin_program.stmts);
|
|
363
363
|
}
|
|
364
364
|
}
|
|
365
|
-
|
|
365
|
+
cli_begin_blocks.extend(begin_blocks);
|
|
366
|
+
begin_blocks = cli_begin_blocks;
|
|
367
|
+
|
|
366
368
|
for source in &end_code {
|
|
367
|
-
let end_program =
|
|
369
|
+
let end_program = parse_program(source).map_err(err_to_syntax)?;
|
|
368
370
|
if !end_program.stmts.is_empty() {
|
|
369
|
-
end_blocks.push(end_program);
|
|
371
|
+
end_blocks.push(end_program.stmts);
|
|
370
372
|
}
|
|
371
373
|
}
|
|
374
|
+
|
|
375
|
+
if begin_blocks.is_empty() && end_blocks.is_empty() {
|
|
376
|
+
return Ok(format!("{:#?}", program));
|
|
377
|
+
}
|
|
378
|
+
|
|
372
379
|
let map_ast = MapAst {
|
|
373
380
|
program,
|
|
374
381
|
begin_blocks,
|
|
@@ -389,7 +396,7 @@ fn parse_py(source: &str, mode: &str, filename: &str) -> PyResult<()> {
|
|
|
389
396
|
CompileMode::Awk => parse_awk_program(source)
|
|
390
397
|
.map(|_| ())
|
|
391
398
|
.map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
|
|
392
|
-
CompileMode::Map =>
|
|
399
|
+
CompileMode::Map => parse_map_program_with_begin_end(source)
|
|
393
400
|
.map(|_| ())
|
|
394
401
|
.map_err(|err| PySyntaxError::new_err(format_snail_error(&err.into(), filename))),
|
|
395
402
|
}
|
|
@@ -187,13 +187,12 @@ def _parse_args(argv: list[str]) -> _Args:
|
|
|
187
187
|
args.args = argv[idx + 1 :]
|
|
188
188
|
return args
|
|
189
189
|
if token == "-" or not token.startswith("-"):
|
|
190
|
-
if code_found:
|
|
191
|
-
#
|
|
192
|
-
args.args
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
code_found = True
|
|
190
|
+
if not code_found:
|
|
191
|
+
# This is the code (or the first arg when -f is used)
|
|
192
|
+
args.args = [token]
|
|
193
|
+
code_found = True
|
|
194
|
+
else:
|
|
195
|
+
args.args.append(token)
|
|
197
196
|
idx += 1
|
|
198
197
|
continue
|
|
199
198
|
if token in ("-h", "--help"):
|
|
@@ -231,6 +230,7 @@ def _parse_args(argv: list[str]) -> _Args:
|
|
|
231
230
|
if idx + 1 >= len(argv):
|
|
232
231
|
raise ValueError("option -f requires an argument")
|
|
233
232
|
args.file = argv[idx + 1]
|
|
233
|
+
code_found = True
|
|
234
234
|
idx += 2
|
|
235
235
|
continue
|
|
236
236
|
if token in ("-b", "--begin"):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|