snail-lang 0.5.1__tar.gz → 0.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snail_lang-0.5.1 → snail_lang-0.5.3}/Cargo.lock +7 -7
- {snail_lang-0.5.1 → snail_lang-0.5.3}/PKG-INFO +22 -5
- {snail_lang-0.5.1 → snail_lang-0.5.3}/README.md +21 -4
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/ast.rs +2 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/src/lib.rs +14 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/expr.rs +42 -6
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/helpers.rs +31 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/lib.rs +34 -26
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/snail.pest +7 -5
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/string.rs +51 -15
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/common.rs +27 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/errors.rs +47 -1
- snail_lang-0.5.3/crates/snail-parser/tests/syntax_strings.rs +190 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/Cargo.toml +1 -1
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/src/lib.rs +127 -18
- {snail_lang-0.5.1 → snail_lang-0.5.3}/pyproject.toml +1 -1
- snail_lang-0.5.3/python/snail/__init__.py +25 -0
- snail_lang-0.5.3/python/snail/cli.py +280 -0
- snail_lang-0.5.3/python/snail/runtime/__init__.py +167 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/structured_accessor.py +3 -4
- snail_lang-0.5.1/crates/snail-parser/tests/syntax_strings.rs +0 -92
- snail_lang-0.5.1/python/snail/__init__.py +0 -10
- snail_lang-0.5.1/python/snail/cli.py +0 -177
- snail_lang-0.5.1/python/snail/runtime/__init__.py +0 -75
- {snail_lang-0.5.1 → snail_lang-0.5.3}/Cargo.toml +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/LICENSE +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/README.md +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/awk.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/lib.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/README.md +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/README.md +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/src/lib.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/README.md +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/awk.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/constants.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/lib.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/operators.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/program.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/py_ast.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/stmt.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/README.md +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/awk.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/expr.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/literal.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/stmt.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/util.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/parser.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/statements.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/syntax_expressions.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/build.rs +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/compact_try.py +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/regex.py +0 -0
- {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/subprocess.py +0 -0
|
@@ -485,11 +485,11 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
|
|
|
485
485
|
|
|
486
486
|
[[package]]
|
|
487
487
|
name = "snail-ast"
|
|
488
|
-
version = "0.5.
|
|
488
|
+
version = "0.5.3"
|
|
489
489
|
|
|
490
490
|
[[package]]
|
|
491
491
|
name = "snail-core"
|
|
492
|
-
version = "0.5.
|
|
492
|
+
version = "0.5.3"
|
|
493
493
|
dependencies = [
|
|
494
494
|
"pyo3",
|
|
495
495
|
"snail-ast",
|
|
@@ -500,14 +500,14 @@ dependencies = [
|
|
|
500
500
|
|
|
501
501
|
[[package]]
|
|
502
502
|
name = "snail-error"
|
|
503
|
-
version = "0.5.
|
|
503
|
+
version = "0.5.3"
|
|
504
504
|
dependencies = [
|
|
505
505
|
"snail-ast",
|
|
506
506
|
]
|
|
507
507
|
|
|
508
508
|
[[package]]
|
|
509
509
|
name = "snail-lower"
|
|
510
|
-
version = "0.5.
|
|
510
|
+
version = "0.5.3"
|
|
511
511
|
dependencies = [
|
|
512
512
|
"pyo3",
|
|
513
513
|
"snail-ast",
|
|
@@ -516,7 +516,7 @@ dependencies = [
|
|
|
516
516
|
|
|
517
517
|
[[package]]
|
|
518
518
|
name = "snail-parser"
|
|
519
|
-
version = "0.5.
|
|
519
|
+
version = "0.5.3"
|
|
520
520
|
dependencies = [
|
|
521
521
|
"pest",
|
|
522
522
|
"pest_derive",
|
|
@@ -526,7 +526,7 @@ dependencies = [
|
|
|
526
526
|
|
|
527
527
|
[[package]]
|
|
528
528
|
name = "snail-proptest"
|
|
529
|
-
version = "0.5.
|
|
529
|
+
version = "0.5.3"
|
|
530
530
|
dependencies = [
|
|
531
531
|
"proptest",
|
|
532
532
|
"pyo3",
|
|
@@ -540,7 +540,7 @@ dependencies = [
|
|
|
540
540
|
|
|
541
541
|
[[package]]
|
|
542
542
|
name = "snail-python"
|
|
543
|
-
version = "0.5.
|
|
543
|
+
version = "0.5.3"
|
|
544
544
|
dependencies = [
|
|
545
545
|
"pyo3",
|
|
546
546
|
"snail-core",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: snail-lang
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.3
|
|
4
4
|
Requires-Dist: jmespath>=1.0.1
|
|
5
5
|
Requires-Dist: maturin>=1.5 ; extra == 'dev'
|
|
6
6
|
Requires-Dist: pytest ; extra == 'dev'
|
|
@@ -19,6 +19,12 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
19
19
|
|
|
20
20
|
**Snail** is a programming language that compiles to Python, combining Python's familiarity and extensive libraries with Perl/awk-inspired syntax for quick scripts and one-liners.
|
|
21
21
|
|
|
22
|
+
## AI Slop!
|
|
23
|
+
|
|
24
|
+
Snail is me learning how to devlop code using LLMs. I think its neat, and
|
|
25
|
+
maybe useful. I don't think this is high quality. I am going to try and LLM my
|
|
26
|
+
way into something good, but its certainly not there yet.
|
|
27
|
+
|
|
22
28
|
## Installing Snail
|
|
23
29
|
|
|
24
30
|
Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and then run:
|
|
@@ -51,14 +57,18 @@ semicolons are optional. You can separate statements with newlines.
|
|
|
51
57
|
|
|
52
58
|
Process files line-by-line with familiar awk semantics:
|
|
53
59
|
|
|
54
|
-
```snail-awk("
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
END { print("Sum:", total); assert total == 15}
|
|
60
|
+
```snail-awk("hello world\nfoo bar\n")
|
|
61
|
+
/hello/ { print("matched:", $0) }
|
|
62
|
+
{ print($1, "->", $2) }
|
|
58
63
|
```
|
|
59
64
|
|
|
60
65
|
Built-in variables: `$0` (line), `$1`, `$2` etc (access fields), `$n` (line number), `$fn` (per-file line number), `$p` (file path), `$m` (last match).
|
|
61
66
|
|
|
67
|
+
Begin/end blocks use CLI flags for setup and teardown:
|
|
68
|
+
```bash
|
|
69
|
+
echo -e "5\n4\n3\n2\n1" | snail --awk -b 'total = 0' -e 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
|
|
70
|
+
```
|
|
71
|
+
|
|
62
72
|
### Compact Error Handling
|
|
63
73
|
|
|
64
74
|
The `?` operator makes error handling terse yet expressive:
|
|
@@ -229,6 +239,13 @@ Plug 'sudonym1/snail', { 'rtp': 'extras/vim' }
|
|
|
229
239
|
|
|
230
240
|
See [extras/vim/README.md](extras/vim/README.md) for details. Tree-sitter grammar available in `extras/tree-sitter-snail/`.
|
|
231
241
|
|
|
242
|
+
## Performance
|
|
243
|
+
|
|
244
|
+
Section is WIP
|
|
245
|
+
|
|
246
|
+
Startup performance is benchmarked with `./benchmarks/startup.py`. On my
|
|
247
|
+
machine snail adds 5 ms of overhead above the regular python3 interpreter.
|
|
248
|
+
|
|
232
249
|
## 🛠️ Building from Source
|
|
233
250
|
|
|
234
251
|
### Prerequisites
|
|
@@ -7,6 +7,12 @@
|
|
|
7
7
|
|
|
8
8
|
**Snail** is a programming language that compiles to Python, combining Python's familiarity and extensive libraries with Perl/awk-inspired syntax for quick scripts and one-liners.
|
|
9
9
|
|
|
10
|
+
## AI Slop!
|
|
11
|
+
|
|
12
|
+
Snail is me learning how to devlop code using LLMs. I think its neat, and
|
|
13
|
+
maybe useful. I don't think this is high quality. I am going to try and LLM my
|
|
14
|
+
way into something good, but its certainly not there yet.
|
|
15
|
+
|
|
10
16
|
## Installing Snail
|
|
11
17
|
|
|
12
18
|
Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and then run:
|
|
@@ -39,14 +45,18 @@ semicolons are optional. You can separate statements with newlines.
|
|
|
39
45
|
|
|
40
46
|
Process files line-by-line with familiar awk semantics:
|
|
41
47
|
|
|
42
|
-
```snail-awk("
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
END { print("Sum:", total); assert total == 15}
|
|
48
|
+
```snail-awk("hello world\nfoo bar\n")
|
|
49
|
+
/hello/ { print("matched:", $0) }
|
|
50
|
+
{ print($1, "->", $2) }
|
|
46
51
|
```
|
|
47
52
|
|
|
48
53
|
Built-in variables: `$0` (line), `$1`, `$2` etc (access fields), `$n` (line number), `$fn` (per-file line number), `$p` (file path), `$m` (last match).
|
|
49
54
|
|
|
55
|
+
Begin/end blocks use CLI flags for setup and teardown:
|
|
56
|
+
```bash
|
|
57
|
+
echo -e "5\n4\n3\n2\n1" | snail --awk -b 'total = 0' -e 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
|
|
58
|
+
```
|
|
59
|
+
|
|
50
60
|
### Compact Error Handling
|
|
51
61
|
|
|
52
62
|
The `?` operator makes error handling terse yet expressive:
|
|
@@ -217,6 +227,13 @@ Plug 'sudonym1/snail', { 'rtp': 'extras/vim' }
|
|
|
217
227
|
|
|
218
228
|
See [extras/vim/README.md](extras/vim/README.md) for details. Tree-sitter grammar available in `extras/tree-sitter-snail/`.
|
|
219
229
|
|
|
230
|
+
## Performance
|
|
231
|
+
|
|
232
|
+
Section is WIP
|
|
233
|
+
|
|
234
|
+
Startup performance is benchmarked with `./benchmarks/startup.py`. On my
|
|
235
|
+
machine snail adds 5 ms of overhead above the regular python3 interpreter.
|
|
236
|
+
|
|
220
237
|
## 🛠️ Building from Source
|
|
221
238
|
|
|
222
239
|
### Prerequisites
|
|
@@ -189,11 +189,13 @@ pub enum Expr {
|
|
|
189
189
|
String {
|
|
190
190
|
value: String,
|
|
191
191
|
raw: bool,
|
|
192
|
+
bytes: bool,
|
|
192
193
|
delimiter: StringDelimiter,
|
|
193
194
|
span: SourceSpan,
|
|
194
195
|
},
|
|
195
196
|
FString {
|
|
196
197
|
parts: Vec<FStringPart>,
|
|
198
|
+
bytes: bool,
|
|
197
199
|
span: SourceSpan,
|
|
198
200
|
},
|
|
199
201
|
Bool {
|
|
@@ -34,3 +34,17 @@ pub fn compile_snail_source_with_auto_print(
|
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
/// Compile an awk program with separate begin and end code blocks.
|
|
39
|
+
/// Each begin/end source is parsed as a regular Snail program.
|
|
40
|
+
pub fn compile_awk_source_with_begin_end(
|
|
41
|
+
py: Python<'_>,
|
|
42
|
+
main_source: &str,
|
|
43
|
+
begin_sources: &[&str],
|
|
44
|
+
end_sources: &[&str],
|
|
45
|
+
auto_print_last: bool,
|
|
46
|
+
) -> Result<PyObject, SnailError> {
|
|
47
|
+
let program = parse_awk_program_with_begin_end(main_source, begin_sources, end_sources)?;
|
|
48
|
+
let module = lower_awk_program_with_auto_print(py, &program, auto_print_last)?;
|
|
49
|
+
Ok(module)
|
|
50
|
+
}
|
|
@@ -4,7 +4,7 @@ use snail_ast::*;
|
|
|
4
4
|
use snail_error::LowerError;
|
|
5
5
|
|
|
6
6
|
use crate::constants::*;
|
|
7
|
-
use crate::helpers::{name_expr, number_expr, regex_pattern_expr, string_expr};
|
|
7
|
+
use crate::helpers::{byte_string_expr, name_expr, number_expr, regex_pattern_expr, string_expr};
|
|
8
8
|
use crate::operators::{lower_binary_op, lower_bool_op, lower_compare_op, lower_unary_op};
|
|
9
9
|
use crate::py_ast::{AstBuilder, py_err_to_lower};
|
|
10
10
|
|
|
@@ -441,18 +441,53 @@ pub(crate) fn lower_expr_with_exception(
|
|
|
441
441
|
Expr::String {
|
|
442
442
|
value,
|
|
443
443
|
raw,
|
|
444
|
+
bytes,
|
|
444
445
|
delimiter,
|
|
445
446
|
span,
|
|
446
|
-
} =>
|
|
447
|
-
|
|
447
|
+
} => {
|
|
448
|
+
if *bytes {
|
|
449
|
+
byte_string_expr(builder, value, *raw, *delimiter, span)
|
|
450
|
+
} else {
|
|
451
|
+
string_expr(builder, value, *raw, *delimiter, span)
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
Expr::FString { parts, bytes, span } => {
|
|
448
455
|
let values = lower_fstring_parts(builder, parts, exception_name)?;
|
|
449
|
-
builder
|
|
456
|
+
let joined = builder
|
|
450
457
|
.call_node(
|
|
451
458
|
"JoinedStr",
|
|
452
459
|
vec![PyList::new_bound(builder.py(), values).into_py(builder.py())],
|
|
453
460
|
span,
|
|
454
461
|
)
|
|
455
|
-
.map_err(py_err_to_lower)
|
|
462
|
+
.map_err(py_err_to_lower)?;
|
|
463
|
+
|
|
464
|
+
if *bytes {
|
|
465
|
+
// Wrap in .encode() call: f"...".encode()
|
|
466
|
+
let encode_attr = builder
|
|
467
|
+
.call_node(
|
|
468
|
+
"Attribute",
|
|
469
|
+
vec![
|
|
470
|
+
joined,
|
|
471
|
+
"encode".to_string().into_py(builder.py()),
|
|
472
|
+
builder.load_ctx().map_err(py_err_to_lower)?,
|
|
473
|
+
],
|
|
474
|
+
span,
|
|
475
|
+
)
|
|
476
|
+
.map_err(py_err_to_lower)?;
|
|
477
|
+
builder
|
|
478
|
+
.call_node(
|
|
479
|
+
"Call",
|
|
480
|
+
vec![
|
|
481
|
+
encode_attr,
|
|
482
|
+
PyList::empty_bound(builder.py()).into_py(builder.py()),
|
|
483
|
+
PyList::empty_bound(builder.py()).into_py(builder.py()),
|
|
484
|
+
],
|
|
485
|
+
span,
|
|
486
|
+
)
|
|
487
|
+
.map_err(py_err_to_lower)
|
|
488
|
+
} else {
|
|
489
|
+
Ok(joined)
|
|
490
|
+
}
|
|
456
491
|
}
|
|
457
492
|
Expr::Bool { value, span } => builder
|
|
458
493
|
.call_node("Constant", vec![value.into_py(builder.py())], span)
|
|
@@ -1324,7 +1359,7 @@ fn substitute_placeholder(expr: &Expr, replacement: &Expr) -> Expr {
|
|
|
1324
1359
|
| Expr::None { .. }
|
|
1325
1360
|
| Expr::StructuredAccessor { .. }
|
|
1326
1361
|
| Expr::FieldIndex { .. } => expr.clone(),
|
|
1327
|
-
Expr::FString { parts, span } => Expr::FString {
|
|
1362
|
+
Expr::FString { parts, bytes, span } => Expr::FString {
|
|
1328
1363
|
parts: parts
|
|
1329
1364
|
.iter()
|
|
1330
1365
|
.map(|part| match part {
|
|
@@ -1334,6 +1369,7 @@ fn substitute_placeholder(expr: &Expr, replacement: &Expr) -> Expr {
|
|
|
1334
1369
|
}
|
|
1335
1370
|
})
|
|
1336
1371
|
.collect(),
|
|
1372
|
+
bytes: *bytes,
|
|
1337
1373
|
span: span.clone(),
|
|
1338
1374
|
},
|
|
1339
1375
|
Expr::Unary { op, expr, span } => Expr::Unary {
|
|
@@ -75,6 +75,37 @@ pub(crate) fn string_expr(
|
|
|
75
75
|
Ok(expr.into_py(builder.py()))
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
+
pub(crate) fn byte_string_expr(
|
|
79
|
+
builder: &AstBuilder<'_>,
|
|
80
|
+
value: &str,
|
|
81
|
+
raw: bool,
|
|
82
|
+
delimiter: StringDelimiter,
|
|
83
|
+
span: &SourceSpan,
|
|
84
|
+
) -> Result<PyObject, LowerError> {
|
|
85
|
+
let rendered = match (raw, delimiter) {
|
|
86
|
+
(true, StringDelimiter::Single) => format!("rb'{}'", value),
|
|
87
|
+
(true, StringDelimiter::Double) => format!("rb\"{}\"", value),
|
|
88
|
+
(true, StringDelimiter::TripleSingle) => format!("rb'''{}'''", value),
|
|
89
|
+
(true, StringDelimiter::TripleDouble) => format!("rb\"\"\"{}\"\"\"", value),
|
|
90
|
+
(false, StringDelimiter::Single) => format!("b'{}'", value),
|
|
91
|
+
(false, StringDelimiter::Double) => format!("b\"{}\"", value),
|
|
92
|
+
(false, StringDelimiter::TripleSingle) => format!("b'''{}'''", value),
|
|
93
|
+
(false, StringDelimiter::TripleDouble) => format!("b\"\"\"{}\"\"\"", value),
|
|
94
|
+
};
|
|
95
|
+
let expr = builder
|
|
96
|
+
.py()
|
|
97
|
+
.import_bound("ast")
|
|
98
|
+
.and_then(|ast| ast.getattr("parse"))
|
|
99
|
+
.and_then(|parse| parse.call1((rendered,)))
|
|
100
|
+
.and_then(|module| module.getattr("body"))
|
|
101
|
+
.and_then(|body| body.get_item(0))
|
|
102
|
+
.and_then(|expr_stmt| expr_stmt.getattr("value"));
|
|
103
|
+
|
|
104
|
+
let expr = expr.map_err(py_err_to_lower)?;
|
|
105
|
+
set_location(&expr, span).map_err(py_err_to_lower)?;
|
|
106
|
+
Ok(expr.into_py(builder.py()))
|
|
107
|
+
}
|
|
108
|
+
|
|
78
109
|
pub(crate) fn number_expr(
|
|
79
110
|
builder: &AstBuilder<'_>,
|
|
80
111
|
value: &str,
|
|
@@ -12,7 +12,7 @@ mod string;
|
|
|
12
12
|
mod util;
|
|
13
13
|
|
|
14
14
|
use awk::parse_awk_rule;
|
|
15
|
-
use stmt::
|
|
15
|
+
use stmt::parse_stmt_list;
|
|
16
16
|
use util::{error_with_span, full_span, parse_error_from_pest};
|
|
17
17
|
|
|
18
18
|
#[derive(Parser)]
|
|
@@ -45,47 +45,55 @@ pub fn parse_awk_program(source: &str) -> Result<AwkProgram, ParseError> {
|
|
|
45
45
|
.ok_or_else(|| ParseError::new("missing awk program root"))?;
|
|
46
46
|
let span = full_span(source);
|
|
47
47
|
|
|
48
|
-
let mut begin_blocks = Vec::new();
|
|
49
48
|
let mut rules = Vec::new();
|
|
50
|
-
let mut end_blocks = Vec::new();
|
|
51
49
|
|
|
52
50
|
for inner in pair.into_inner() {
|
|
53
51
|
if inner.as_rule() == Rule::awk_entry_list {
|
|
54
52
|
for entry in inner.into_inner() {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
let block = entry
|
|
58
|
-
.into_inner()
|
|
59
|
-
.find(|pair| pair.as_rule() == Rule::block)
|
|
60
|
-
.ok_or_else(|| {
|
|
61
|
-
util::error_with_span("missing BEGIN block", span.clone(), source)
|
|
62
|
-
})?;
|
|
63
|
-
begin_blocks.push(parse_block(block, source)?);
|
|
64
|
-
}
|
|
65
|
-
Rule::awk_end => {
|
|
66
|
-
let block = entry
|
|
67
|
-
.into_inner()
|
|
68
|
-
.find(|pair| pair.as_rule() == Rule::block)
|
|
69
|
-
.ok_or_else(|| {
|
|
70
|
-
util::error_with_span("missing END block", span.clone(), source)
|
|
71
|
-
})?;
|
|
72
|
-
end_blocks.push(parse_block(block, source)?);
|
|
73
|
-
}
|
|
74
|
-
Rule::awk_rule => rules.push(parse_awk_rule(entry, source)?),
|
|
75
|
-
_ => {}
|
|
53
|
+
if entry.as_rule() == Rule::awk_rule {
|
|
54
|
+
rules.push(parse_awk_rule(entry, source)?);
|
|
76
55
|
}
|
|
77
56
|
}
|
|
78
57
|
}
|
|
79
58
|
}
|
|
80
59
|
|
|
81
60
|
Ok(AwkProgram {
|
|
82
|
-
begin_blocks,
|
|
61
|
+
begin_blocks: Vec::new(),
|
|
83
62
|
rules,
|
|
84
|
-
end_blocks,
|
|
63
|
+
end_blocks: Vec::new(),
|
|
85
64
|
span,
|
|
86
65
|
})
|
|
87
66
|
}
|
|
88
67
|
|
|
68
|
+
/// Parses an awk program with separate begin and end code sources.
|
|
69
|
+
/// Each begin/end source is parsed as a regular Snail program and its statements
|
|
70
|
+
/// are injected into the resulting AwkProgram.
|
|
71
|
+
pub fn parse_awk_program_with_begin_end(
|
|
72
|
+
main_source: &str,
|
|
73
|
+
begin_sources: &[&str],
|
|
74
|
+
end_sources: &[&str],
|
|
75
|
+
) -> Result<AwkProgram, ParseError> {
|
|
76
|
+
let mut program = parse_awk_program(main_source)?;
|
|
77
|
+
|
|
78
|
+
// Parse each begin source as a regular program and extract statements
|
|
79
|
+
for source in begin_sources {
|
|
80
|
+
let begin_program = parse_program(source)?;
|
|
81
|
+
if !begin_program.stmts.is_empty() {
|
|
82
|
+
program.begin_blocks.push(begin_program.stmts);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Parse each end source as a regular program and extract statements
|
|
87
|
+
for source in end_sources {
|
|
88
|
+
let end_program = parse_program(source)?;
|
|
89
|
+
if !end_program.stmts.is_empty() {
|
|
90
|
+
program.end_blocks.push(end_program.stmts);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
Ok(program)
|
|
95
|
+
}
|
|
96
|
+
|
|
89
97
|
const AWK_ONLY_NAMES: [&str; 4] = ["$n", "$fn", "$p", "$m"];
|
|
90
98
|
const AWK_ONLY_MESSAGE: &str = "awk variables are only valid in awk mode; use --awk";
|
|
91
99
|
|
|
@@ -2,11 +2,9 @@
|
|
|
2
2
|
program = { SOI ~ stmt_sep* ~ stmt_list? ~ stmt_sep* ~ EOI }
|
|
3
3
|
awk_program = { SOI ~ stmt_sep* ~ awk_entry_list? ~ stmt_sep* ~ EOI }
|
|
4
4
|
|
|
5
|
-
// AWK mode:
|
|
5
|
+
// AWK mode: pattern-action rules
|
|
6
6
|
awk_entry_list = { awk_entry ~ (stmt_sep* ~ awk_entry)* ~ stmt_sep* }
|
|
7
|
-
awk_entry = _{
|
|
8
|
-
awk_begin = { "BEGIN" ~ block }
|
|
9
|
-
awk_end = { "END" ~ block }
|
|
7
|
+
awk_entry = _{ awk_rule }
|
|
10
8
|
awk_rule = { block | awk_pattern ~ block? }
|
|
11
9
|
awk_pattern = { expr }
|
|
12
10
|
|
|
@@ -223,7 +221,11 @@ injected_var = { "$fn" | "$n" | "$p" | "$m" }
|
|
|
223
221
|
|
|
224
222
|
// Number, string, and regex literals
|
|
225
223
|
number = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? }
|
|
226
|
-
string = @{
|
|
224
|
+
string = @{ string_prefix? ~ (triple_double | triple_single | double_string | single_string) }
|
|
225
|
+
string_prefix = { byte_raw_prefix | raw_byte_prefix | byte_prefix | raw_prefix }
|
|
226
|
+
byte_raw_prefix = { "br" }
|
|
227
|
+
raw_byte_prefix = { "rb" }
|
|
228
|
+
byte_prefix = { "b" }
|
|
227
229
|
raw_prefix = { "r" }
|
|
228
230
|
triple_double = { "\"\"\"" ~ (!"\"\"\"" ~ ANY)* ~ "\"\"\"" }
|
|
229
231
|
triple_single = { "'''" ~ (!"'''" ~ ANY)* ~ "'''" }
|
|
@@ -17,6 +17,7 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
|
|
|
17
17
|
return Ok(Expr::String {
|
|
18
18
|
value: parsed.content,
|
|
19
19
|
raw: true,
|
|
20
|
+
bytes: parsed.bytes,
|
|
20
21
|
delimiter: parsed.delimiter,
|
|
21
22
|
span,
|
|
22
23
|
});
|
|
@@ -28,12 +29,17 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
|
|
|
28
29
|
.any(|part| matches!(part, FStringPart::Expr(_)));
|
|
29
30
|
if has_expr {
|
|
30
31
|
let parts = normalize_string_parts(parts, parsed.raw)?;
|
|
31
|
-
Ok(Expr::FString {
|
|
32
|
+
Ok(Expr::FString {
|
|
33
|
+
parts,
|
|
34
|
+
bytes: parsed.bytes,
|
|
35
|
+
span,
|
|
36
|
+
})
|
|
32
37
|
} else {
|
|
33
38
|
let value = join_fstring_text(parts);
|
|
34
39
|
Ok(Expr::String {
|
|
35
40
|
value,
|
|
36
41
|
raw: parsed.raw,
|
|
42
|
+
bytes: parsed.bytes,
|
|
37
43
|
delimiter: parsed.delimiter,
|
|
38
44
|
span,
|
|
39
45
|
})
|
|
@@ -43,6 +49,7 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
|
|
|
43
49
|
pub struct ParsedStringLiteral {
|
|
44
50
|
pub content: String,
|
|
45
51
|
pub raw: bool,
|
|
52
|
+
pub bytes: bool,
|
|
46
53
|
pub delimiter: StringDelimiter,
|
|
47
54
|
pub content_offset: usize,
|
|
48
55
|
}
|
|
@@ -50,10 +57,17 @@ pub struct ParsedStringLiteral {
|
|
|
50
57
|
pub fn parse_string_literal(pair: Pair<'_, Rule>) -> Result<ParsedStringLiteral, ParseError> {
|
|
51
58
|
let value = pair.as_str();
|
|
52
59
|
let span = pair.as_span();
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
// Parse prefix - check longer prefixes first
|
|
61
|
+
let (raw, bytes, rest, prefix_len) = if let Some(stripped) = value.strip_prefix("br") {
|
|
62
|
+
(true, true, stripped, 2usize)
|
|
63
|
+
} else if let Some(stripped) = value.strip_prefix("rb") {
|
|
64
|
+
(true, true, stripped, 2usize)
|
|
65
|
+
} else if let Some(stripped) = value.strip_prefix('b') {
|
|
66
|
+
(false, true, stripped, 1usize)
|
|
67
|
+
} else if let Some(stripped) = value.strip_prefix('r') {
|
|
68
|
+
(true, false, stripped, 1usize)
|
|
55
69
|
} else {
|
|
56
|
-
(false, value, 0usize)
|
|
70
|
+
(false, false, value, 0usize)
|
|
57
71
|
};
|
|
58
72
|
let (delimiter, open, close) = if rest.starts_with("\"\"\"") {
|
|
59
73
|
(StringDelimiter::TripleDouble, "\"\"\"", "\"\"\"")
|
|
@@ -73,6 +87,7 @@ pub fn parse_string_literal(pair: Pair<'_, Rule>) -> Result<ParsedStringLiteral,
|
|
|
73
87
|
Ok(ParsedStringLiteral {
|
|
74
88
|
content: content.to_string(),
|
|
75
89
|
raw,
|
|
90
|
+
bytes,
|
|
76
91
|
delimiter,
|
|
77
92
|
content_offset,
|
|
78
93
|
})
|
|
@@ -166,15 +181,29 @@ pub fn find_fstring_expr_end(content: &str, start: usize) -> Option<usize> {
|
|
|
166
181
|
let mut brace = 0usize;
|
|
167
182
|
while i < bytes.len() {
|
|
168
183
|
match bytes[i] {
|
|
169
|
-
b'r' => {
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
184
|
+
b'r' | b'b' => {
|
|
185
|
+
// Check for string prefix combinations: r, b, rb, br
|
|
186
|
+
if let Some(next) = bytes.get(i + 1) {
|
|
187
|
+
if *next == b'\'' || *next == b'"' {
|
|
188
|
+
// r"..." or b"..."
|
|
189
|
+
if let Some(end) = skip_string_literal(bytes, i) {
|
|
190
|
+
i = end;
|
|
191
|
+
continue;
|
|
192
|
+
} else {
|
|
193
|
+
return None;
|
|
194
|
+
}
|
|
195
|
+
} else if (*next == b'r' || *next == b'b') && bytes[i] != *next {
|
|
196
|
+
// Could be rb"..." or br"..."
|
|
197
|
+
if let Some(third) = bytes.get(i + 2)
|
|
198
|
+
&& (*third == b'\'' || *third == b'"')
|
|
199
|
+
{
|
|
200
|
+
if let Some(end) = skip_string_literal(bytes, i) {
|
|
201
|
+
i = end;
|
|
202
|
+
continue;
|
|
203
|
+
} else {
|
|
204
|
+
return None;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
178
207
|
}
|
|
179
208
|
}
|
|
180
209
|
i += 1;
|
|
@@ -221,7 +250,14 @@ pub fn find_fstring_expr_end(content: &str, start: usize) -> Option<usize> {
|
|
|
221
250
|
|
|
222
251
|
pub fn skip_string_literal(bytes: &[u8], start: usize) -> Option<usize> {
|
|
223
252
|
let mut i = start;
|
|
224
|
-
|
|
253
|
+
// Handle prefixes: br, rb, b, r (check longer prefixes first)
|
|
254
|
+
let raw = if bytes.get(i..i + 2) == Some(b"br") || bytes.get(i..i + 2) == Some(b"rb") {
|
|
255
|
+
i += 2;
|
|
256
|
+
true
|
|
257
|
+
} else if bytes.get(i) == Some(&b'b') {
|
|
258
|
+
i += 1;
|
|
259
|
+
false
|
|
260
|
+
} else if bytes.get(i) == Some(&b'r') {
|
|
225
261
|
i += 1;
|
|
226
262
|
true
|
|
227
263
|
} else {
|
|
@@ -327,7 +363,7 @@ pub fn shift_expr_spans(expr: &mut Expr, offset: usize, source: &str) {
|
|
|
327
363
|
| Expr::Slice { span, .. } => {
|
|
328
364
|
*span = shift_span(span, offset, source);
|
|
329
365
|
}
|
|
330
|
-
Expr::FString { parts, span } => {
|
|
366
|
+
Expr::FString { parts, span, .. } => {
|
|
331
367
|
for part in parts {
|
|
332
368
|
if let FStringPart::Expr(expr) = part {
|
|
333
369
|
shift_expr_spans(expr, offset, source);
|
|
@@ -84,6 +84,33 @@ pub fn expect_string_contains(expr: &Expr, snippet: &str, raw: bool, delimiter:
|
|
|
84
84
|
}
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
+
pub fn expect_byte_string(expr: &Expr, expected: &str, raw: bool, delimiter: StringDelimiter) {
|
|
88
|
+
match expr {
|
|
89
|
+
Expr::String {
|
|
90
|
+
value,
|
|
91
|
+
raw: is_raw,
|
|
92
|
+
bytes,
|
|
93
|
+
delimiter: actual_delimiter,
|
|
94
|
+
..
|
|
95
|
+
} => {
|
|
96
|
+
assert_eq!(value, expected);
|
|
97
|
+
assert_eq!(*is_raw, raw);
|
|
98
|
+
assert!(*bytes, "Expected byte string (bytes=true)");
|
|
99
|
+
assert_eq!(*actual_delimiter, delimiter);
|
|
100
|
+
}
|
|
101
|
+
other => panic!("Expected byte string, got {other:?}"),
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
pub fn expect_byte_fstring(expr: &Expr) {
|
|
106
|
+
match expr {
|
|
107
|
+
Expr::FString { bytes, .. } => {
|
|
108
|
+
assert!(*bytes, "Expected byte f-string (bytes=true)");
|
|
109
|
+
}
|
|
110
|
+
other => panic!("Expected byte f-string, got {other:?}"),
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
87
114
|
pub fn expect_span_start(span: &SourceSpan, line: usize, column: usize) {
|
|
88
115
|
assert_eq!(span.start.line, line);
|
|
89
116
|
assert_eq!(span.start.column, column);
|