snail-lang 0.5.1__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {snail_lang-0.5.1 → snail_lang-0.5.3}/Cargo.lock +7 -7
  2. {snail_lang-0.5.1 → snail_lang-0.5.3}/PKG-INFO +22 -5
  3. {snail_lang-0.5.1 → snail_lang-0.5.3}/README.md +21 -4
  4. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/Cargo.toml +1 -1
  5. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/ast.rs +2 -0
  6. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/Cargo.toml +1 -1
  7. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/src/lib.rs +14 -0
  8. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/Cargo.toml +1 -1
  9. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/Cargo.toml +1 -1
  10. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/expr.rs +42 -6
  11. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/helpers.rs +31 -0
  12. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/Cargo.toml +1 -1
  13. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/lib.rs +34 -26
  14. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/snail.pest +7 -5
  15. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/string.rs +51 -15
  16. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/common.rs +27 -0
  17. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/errors.rs +47 -1
  18. snail_lang-0.5.3/crates/snail-parser/tests/syntax_strings.rs +190 -0
  19. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/Cargo.toml +1 -1
  20. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/src/lib.rs +127 -18
  21. {snail_lang-0.5.1 → snail_lang-0.5.3}/pyproject.toml +1 -1
  22. snail_lang-0.5.3/python/snail/__init__.py +25 -0
  23. snail_lang-0.5.3/python/snail/cli.py +280 -0
  24. snail_lang-0.5.3/python/snail/runtime/__init__.py +167 -0
  25. {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/structured_accessor.py +3 -4
  26. snail_lang-0.5.1/crates/snail-parser/tests/syntax_strings.rs +0 -92
  27. snail_lang-0.5.1/python/snail/__init__.py +0 -10
  28. snail_lang-0.5.1/python/snail/cli.py +0 -177
  29. snail_lang-0.5.1/python/snail/runtime/__init__.py +0 -75
  30. {snail_lang-0.5.1 → snail_lang-0.5.3}/Cargo.toml +0 -0
  31. {snail_lang-0.5.1 → snail_lang-0.5.3}/LICENSE +0 -0
  32. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/README.md +0 -0
  33. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/awk.rs +0 -0
  34. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-ast/src/lib.rs +0 -0
  35. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-core/README.md +0 -0
  36. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/README.md +0 -0
  37. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-error/src/lib.rs +0 -0
  38. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/README.md +0 -0
  39. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/awk.rs +0 -0
  40. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/constants.rs +0 -0
  41. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/lib.rs +0 -0
  42. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/operators.rs +0 -0
  43. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/program.rs +0 -0
  44. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/py_ast.rs +0 -0
  45. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-lower/src/stmt.rs +0 -0
  46. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/README.md +0 -0
  47. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/awk.rs +0 -0
  48. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/expr.rs +0 -0
  49. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/literal.rs +0 -0
  50. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/stmt.rs +0 -0
  51. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/src/util.rs +0 -0
  52. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/parser.rs +0 -0
  53. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/statements.rs +0 -0
  54. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-parser/tests/syntax_expressions.rs +0 -0
  55. {snail_lang-0.5.1 → snail_lang-0.5.3}/crates/snail-python/build.rs +0 -0
  56. {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/compact_try.py +0 -0
  57. {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/regex.py +0 -0
  58. {snail_lang-0.5.1 → snail_lang-0.5.3}/python/snail/runtime/subprocess.py +0 -0
@@ -485,11 +485,11 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
485
485
 
486
486
  [[package]]
487
487
  name = "snail-ast"
488
- version = "0.5.1"
488
+ version = "0.5.3"
489
489
 
490
490
  [[package]]
491
491
  name = "snail-core"
492
- version = "0.5.1"
492
+ version = "0.5.3"
493
493
  dependencies = [
494
494
  "pyo3",
495
495
  "snail-ast",
@@ -500,14 +500,14 @@ dependencies = [
500
500
 
501
501
  [[package]]
502
502
  name = "snail-error"
503
- version = "0.5.1"
503
+ version = "0.5.3"
504
504
  dependencies = [
505
505
  "snail-ast",
506
506
  ]
507
507
 
508
508
  [[package]]
509
509
  name = "snail-lower"
510
- version = "0.5.1"
510
+ version = "0.5.3"
511
511
  dependencies = [
512
512
  "pyo3",
513
513
  "snail-ast",
@@ -516,7 +516,7 @@ dependencies = [
516
516
 
517
517
  [[package]]
518
518
  name = "snail-parser"
519
- version = "0.5.1"
519
+ version = "0.5.3"
520
520
  dependencies = [
521
521
  "pest",
522
522
  "pest_derive",
@@ -526,7 +526,7 @@ dependencies = [
526
526
 
527
527
  [[package]]
528
528
  name = "snail-proptest"
529
- version = "0.5.1"
529
+ version = "0.5.3"
530
530
  dependencies = [
531
531
  "proptest",
532
532
  "pyo3",
@@ -540,7 +540,7 @@ dependencies = [
540
540
 
541
541
  [[package]]
542
542
  name = "snail-python"
543
- version = "0.5.1"
543
+ version = "0.5.3"
544
544
  dependencies = [
545
545
  "pyo3",
546
546
  "snail-core",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snail-lang
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Requires-Dist: jmespath>=1.0.1
5
5
  Requires-Dist: maturin>=1.5 ; extra == 'dev'
6
6
  Requires-Dist: pytest ; extra == 'dev'
@@ -19,6 +19,12 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
19
19
 
20
20
  **Snail** is a programming language that compiles to Python, combining Python's familiarity and extensive libraries with Perl/awk-inspired syntax for quick scripts and one-liners.
21
21
 
22
+ ## AI Slop!
23
+
24
+ Snail is me learning how to devlop code using LLMs. I think its neat, and
25
+ maybe useful. I don't think this is high quality. I am going to try and LLM my
26
+ way into something good, but its certainly not there yet.
27
+
22
28
  ## Installing Snail
23
29
 
24
30
  Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and then run:
@@ -51,14 +57,18 @@ semicolons are optional. You can separate statements with newlines.
51
57
 
52
58
  Process files line-by-line with familiar awk semantics:
53
59
 
54
- ```snail-awk("5\n4\n3\n2\n1\nbanana\n")
55
- BEGIN { total = 0 }
56
- /^[0-9]+/ { total = total + int($1) }
57
- END { print("Sum:", total); assert total == 15}
60
+ ```snail-awk("hello world\nfoo bar\n")
61
+ /hello/ { print("matched:", $0) }
62
+ { print($1, "->", $2) }
58
63
  ```
59
64
 
60
65
  Built-in variables: `$0` (line), `$1`, `$2` etc (access fields), `$n` (line number), `$fn` (per-file line number), `$p` (file path), `$m` (last match).
61
66
 
67
+ Begin/end blocks use CLI flags for setup and teardown:
68
+ ```bash
69
+ echo -e "5\n4\n3\n2\n1" | snail --awk -b 'total = 0' -e 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
70
+ ```
71
+
62
72
  ### Compact Error Handling
63
73
 
64
74
  The `?` operator makes error handling terse yet expressive:
@@ -229,6 +239,13 @@ Plug 'sudonym1/snail', { 'rtp': 'extras/vim' }
229
239
 
230
240
  See [extras/vim/README.md](extras/vim/README.md) for details. Tree-sitter grammar available in `extras/tree-sitter-snail/`.
231
241
 
242
+ ## Performance
243
+
244
+ Section is WIP
245
+
246
+ Startup performance is benchmarked with `./benchmarks/startup.py`. On my
247
+ machine snail adds 5 ms of overhead above the regular python3 interpreter.
248
+
232
249
  ## 🛠️ Building from Source
233
250
 
234
251
  ### Prerequisites
@@ -7,6 +7,12 @@
7
7
 
8
8
  **Snail** is a programming language that compiles to Python, combining Python's familiarity and extensive libraries with Perl/awk-inspired syntax for quick scripts and one-liners.
9
9
 
10
+ ## AI Slop!
11
+
12
+ Snail is me learning how to devlop code using LLMs. I think its neat, and
13
+ maybe useful. I don't think this is high quality. I am going to try and LLM my
14
+ way into something good, but its certainly not there yet.
15
+
10
16
  ## Installing Snail
11
17
 
12
18
  Install [uv](https://docs.astral.sh/uv/getting-started/installation/) and then run:
@@ -39,14 +45,18 @@ semicolons are optional. You can separate statements with newlines.
39
45
 
40
46
  Process files line-by-line with familiar awk semantics:
41
47
 
42
- ```snail-awk("5\n4\n3\n2\n1\nbanana\n")
43
- BEGIN { total = 0 }
44
- /^[0-9]+/ { total = total + int($1) }
45
- END { print("Sum:", total); assert total == 15}
48
+ ```snail-awk("hello world\nfoo bar\n")
49
+ /hello/ { print("matched:", $0) }
50
+ { print($1, "->", $2) }
46
51
  ```
47
52
 
48
53
  Built-in variables: `$0` (line), `$1`, `$2` etc (access fields), `$n` (line number), `$fn` (per-file line number), `$p` (file path), `$m` (last match).
49
54
 
55
+ Begin/end blocks use CLI flags for setup and teardown:
56
+ ```bash
57
+ echo -e "5\n4\n3\n2\n1" | snail --awk -b 'total = 0' -e 'print("Sum:", total)' '/^[0-9]+/ { total = total + int($1) }'
58
+ ```
59
+
50
60
  ### Compact Error Handling
51
61
 
52
62
  The `?` operator makes error handling terse yet expressive:
@@ -217,6 +227,13 @@ Plug 'sudonym1/snail', { 'rtp': 'extras/vim' }
217
227
 
218
228
  See [extras/vim/README.md](extras/vim/README.md) for details. Tree-sitter grammar available in `extras/tree-sitter-snail/`.
219
229
 
230
+ ## Performance
231
+
232
+ Section is WIP
233
+
234
+ Startup performance is benchmarked with `./benchmarks/startup.py`. On my
235
+ machine snail adds 5 ms of overhead above the regular python3 interpreter.
236
+
220
237
  ## 🛠️ Building from Source
221
238
 
222
239
  ### Prerequisites
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-ast"
3
- version = "0.5.1"
3
+ version = "0.5.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -189,11 +189,13 @@ pub enum Expr {
189
189
  String {
190
190
  value: String,
191
191
  raw: bool,
192
+ bytes: bool,
192
193
  delimiter: StringDelimiter,
193
194
  span: SourceSpan,
194
195
  },
195
196
  FString {
196
197
  parts: Vec<FStringPart>,
198
+ bytes: bool,
197
199
  span: SourceSpan,
198
200
  },
199
201
  Bool {
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-core"
3
- version = "0.5.1"
3
+ version = "0.5.3"
4
4
  edition.workspace = true
5
5
  readme = "README.md"
6
6
 
@@ -34,3 +34,17 @@ pub fn compile_snail_source_with_auto_print(
34
34
  }
35
35
  }
36
36
  }
37
+
38
+ /// Compile an awk program with separate begin and end code blocks.
39
+ /// Each begin/end source is parsed as a regular Snail program.
40
+ pub fn compile_awk_source_with_begin_end(
41
+ py: Python<'_>,
42
+ main_source: &str,
43
+ begin_sources: &[&str],
44
+ end_sources: &[&str],
45
+ auto_print_last: bool,
46
+ ) -> Result<PyObject, SnailError> {
47
+ let program = parse_awk_program_with_begin_end(main_source, begin_sources, end_sources)?;
48
+ let module = lower_awk_program_with_auto_print(py, &program, auto_print_last)?;
49
+ Ok(module)
50
+ }
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-error"
3
- version = "0.5.1"
3
+ version = "0.5.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-lower"
3
- version = "0.5.1"
3
+ version = "0.5.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -4,7 +4,7 @@ use snail_ast::*;
4
4
  use snail_error::LowerError;
5
5
 
6
6
  use crate::constants::*;
7
- use crate::helpers::{name_expr, number_expr, regex_pattern_expr, string_expr};
7
+ use crate::helpers::{byte_string_expr, name_expr, number_expr, regex_pattern_expr, string_expr};
8
8
  use crate::operators::{lower_binary_op, lower_bool_op, lower_compare_op, lower_unary_op};
9
9
  use crate::py_ast::{AstBuilder, py_err_to_lower};
10
10
 
@@ -441,18 +441,53 @@ pub(crate) fn lower_expr_with_exception(
441
441
  Expr::String {
442
442
  value,
443
443
  raw,
444
+ bytes,
444
445
  delimiter,
445
446
  span,
446
- } => string_expr(builder, value, *raw, *delimiter, span),
447
- Expr::FString { parts, span } => {
447
+ } => {
448
+ if *bytes {
449
+ byte_string_expr(builder, value, *raw, *delimiter, span)
450
+ } else {
451
+ string_expr(builder, value, *raw, *delimiter, span)
452
+ }
453
+ }
454
+ Expr::FString { parts, bytes, span } => {
448
455
  let values = lower_fstring_parts(builder, parts, exception_name)?;
449
- builder
456
+ let joined = builder
450
457
  .call_node(
451
458
  "JoinedStr",
452
459
  vec![PyList::new_bound(builder.py(), values).into_py(builder.py())],
453
460
  span,
454
461
  )
455
- .map_err(py_err_to_lower)
462
+ .map_err(py_err_to_lower)?;
463
+
464
+ if *bytes {
465
+ // Wrap in .encode() call: f"...".encode()
466
+ let encode_attr = builder
467
+ .call_node(
468
+ "Attribute",
469
+ vec![
470
+ joined,
471
+ "encode".to_string().into_py(builder.py()),
472
+ builder.load_ctx().map_err(py_err_to_lower)?,
473
+ ],
474
+ span,
475
+ )
476
+ .map_err(py_err_to_lower)?;
477
+ builder
478
+ .call_node(
479
+ "Call",
480
+ vec![
481
+ encode_attr,
482
+ PyList::empty_bound(builder.py()).into_py(builder.py()),
483
+ PyList::empty_bound(builder.py()).into_py(builder.py()),
484
+ ],
485
+ span,
486
+ )
487
+ .map_err(py_err_to_lower)
488
+ } else {
489
+ Ok(joined)
490
+ }
456
491
  }
457
492
  Expr::Bool { value, span } => builder
458
493
  .call_node("Constant", vec![value.into_py(builder.py())], span)
@@ -1324,7 +1359,7 @@ fn substitute_placeholder(expr: &Expr, replacement: &Expr) -> Expr {
1324
1359
  | Expr::None { .. }
1325
1360
  | Expr::StructuredAccessor { .. }
1326
1361
  | Expr::FieldIndex { .. } => expr.clone(),
1327
- Expr::FString { parts, span } => Expr::FString {
1362
+ Expr::FString { parts, bytes, span } => Expr::FString {
1328
1363
  parts: parts
1329
1364
  .iter()
1330
1365
  .map(|part| match part {
@@ -1334,6 +1369,7 @@ fn substitute_placeholder(expr: &Expr, replacement: &Expr) -> Expr {
1334
1369
  }
1335
1370
  })
1336
1371
  .collect(),
1372
+ bytes: *bytes,
1337
1373
  span: span.clone(),
1338
1374
  },
1339
1375
  Expr::Unary { op, expr, span } => Expr::Unary {
@@ -75,6 +75,37 @@ pub(crate) fn string_expr(
75
75
  Ok(expr.into_py(builder.py()))
76
76
  }
77
77
 
78
+ pub(crate) fn byte_string_expr(
79
+ builder: &AstBuilder<'_>,
80
+ value: &str,
81
+ raw: bool,
82
+ delimiter: StringDelimiter,
83
+ span: &SourceSpan,
84
+ ) -> Result<PyObject, LowerError> {
85
+ let rendered = match (raw, delimiter) {
86
+ (true, StringDelimiter::Single) => format!("rb'{}'", value),
87
+ (true, StringDelimiter::Double) => format!("rb\"{}\"", value),
88
+ (true, StringDelimiter::TripleSingle) => format!("rb'''{}'''", value),
89
+ (true, StringDelimiter::TripleDouble) => format!("rb\"\"\"{}\"\"\"", value),
90
+ (false, StringDelimiter::Single) => format!("b'{}'", value),
91
+ (false, StringDelimiter::Double) => format!("b\"{}\"", value),
92
+ (false, StringDelimiter::TripleSingle) => format!("b'''{}'''", value),
93
+ (false, StringDelimiter::TripleDouble) => format!("b\"\"\"{}\"\"\"", value),
94
+ };
95
+ let expr = builder
96
+ .py()
97
+ .import_bound("ast")
98
+ .and_then(|ast| ast.getattr("parse"))
99
+ .and_then(|parse| parse.call1((rendered,)))
100
+ .and_then(|module| module.getattr("body"))
101
+ .and_then(|body| body.get_item(0))
102
+ .and_then(|expr_stmt| expr_stmt.getattr("value"));
103
+
104
+ let expr = expr.map_err(py_err_to_lower)?;
105
+ set_location(&expr, span).map_err(py_err_to_lower)?;
106
+ Ok(expr.into_py(builder.py()))
107
+ }
108
+
78
109
  pub(crate) fn number_expr(
79
110
  builder: &AstBuilder<'_>,
80
111
  value: &str,
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "snail-parser"
3
- version = "0.5.1"
3
+ version = "0.5.3"
4
4
  edition = "2024"
5
5
  readme = "README.md"
6
6
 
@@ -12,7 +12,7 @@ mod string;
12
12
  mod util;
13
13
 
14
14
  use awk::parse_awk_rule;
15
- use stmt::{parse_block, parse_stmt_list};
15
+ use stmt::parse_stmt_list;
16
16
  use util::{error_with_span, full_span, parse_error_from_pest};
17
17
 
18
18
  #[derive(Parser)]
@@ -45,47 +45,55 @@ pub fn parse_awk_program(source: &str) -> Result<AwkProgram, ParseError> {
45
45
  .ok_or_else(|| ParseError::new("missing awk program root"))?;
46
46
  let span = full_span(source);
47
47
 
48
- let mut begin_blocks = Vec::new();
49
48
  let mut rules = Vec::new();
50
- let mut end_blocks = Vec::new();
51
49
 
52
50
  for inner in pair.into_inner() {
53
51
  if inner.as_rule() == Rule::awk_entry_list {
54
52
  for entry in inner.into_inner() {
55
- match entry.as_rule() {
56
- Rule::awk_begin => {
57
- let block = entry
58
- .into_inner()
59
- .find(|pair| pair.as_rule() == Rule::block)
60
- .ok_or_else(|| {
61
- util::error_with_span("missing BEGIN block", span.clone(), source)
62
- })?;
63
- begin_blocks.push(parse_block(block, source)?);
64
- }
65
- Rule::awk_end => {
66
- let block = entry
67
- .into_inner()
68
- .find(|pair| pair.as_rule() == Rule::block)
69
- .ok_or_else(|| {
70
- util::error_with_span("missing END block", span.clone(), source)
71
- })?;
72
- end_blocks.push(parse_block(block, source)?);
73
- }
74
- Rule::awk_rule => rules.push(parse_awk_rule(entry, source)?),
75
- _ => {}
53
+ if entry.as_rule() == Rule::awk_rule {
54
+ rules.push(parse_awk_rule(entry, source)?);
76
55
  }
77
56
  }
78
57
  }
79
58
  }
80
59
 
81
60
  Ok(AwkProgram {
82
- begin_blocks,
61
+ begin_blocks: Vec::new(),
83
62
  rules,
84
- end_blocks,
63
+ end_blocks: Vec::new(),
85
64
  span,
86
65
  })
87
66
  }
88
67
 
68
+ /// Parses an awk program with separate begin and end code sources.
69
+ /// Each begin/end source is parsed as a regular Snail program and its statements
70
+ /// are injected into the resulting AwkProgram.
71
+ pub fn parse_awk_program_with_begin_end(
72
+ main_source: &str,
73
+ begin_sources: &[&str],
74
+ end_sources: &[&str],
75
+ ) -> Result<AwkProgram, ParseError> {
76
+ let mut program = parse_awk_program(main_source)?;
77
+
78
+ // Parse each begin source as a regular program and extract statements
79
+ for source in begin_sources {
80
+ let begin_program = parse_program(source)?;
81
+ if !begin_program.stmts.is_empty() {
82
+ program.begin_blocks.push(begin_program.stmts);
83
+ }
84
+ }
85
+
86
+ // Parse each end source as a regular program and extract statements
87
+ for source in end_sources {
88
+ let end_program = parse_program(source)?;
89
+ if !end_program.stmts.is_empty() {
90
+ program.end_blocks.push(end_program.stmts);
91
+ }
92
+ }
93
+
94
+ Ok(program)
95
+ }
96
+
89
97
  const AWK_ONLY_NAMES: [&str; 4] = ["$n", "$fn", "$p", "$m"];
90
98
  const AWK_ONLY_MESSAGE: &str = "awk variables are only valid in awk mode; use --awk";
91
99
 
@@ -2,11 +2,9 @@
2
2
  program = { SOI ~ stmt_sep* ~ stmt_list? ~ stmt_sep* ~ EOI }
3
3
  awk_program = { SOI ~ stmt_sep* ~ awk_entry_list? ~ stmt_sep* ~ EOI }
4
4
 
5
- // AWK mode: BEGIN blocks, END blocks, and pattern-action rules
5
+ // AWK mode: pattern-action rules
6
6
  awk_entry_list = { awk_entry ~ (stmt_sep* ~ awk_entry)* ~ stmt_sep* }
7
- awk_entry = _{ awk_begin | awk_end | awk_rule }
8
- awk_begin = { "BEGIN" ~ block }
9
- awk_end = { "END" ~ block }
7
+ awk_entry = _{ awk_rule }
10
8
  awk_rule = { block | awk_pattern ~ block? }
11
9
  awk_pattern = { expr }
12
10
 
@@ -223,7 +221,11 @@ injected_var = { "$fn" | "$n" | "$p" | "$m" }
223
221
 
224
222
  // Number, string, and regex literals
225
223
  number = @{ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? }
226
- string = @{ raw_prefix? ~ (triple_double | triple_single | double_string | single_string) }
224
+ string = @{ string_prefix? ~ (triple_double | triple_single | double_string | single_string) }
225
+ string_prefix = { byte_raw_prefix | raw_byte_prefix | byte_prefix | raw_prefix }
226
+ byte_raw_prefix = { "br" }
227
+ raw_byte_prefix = { "rb" }
228
+ byte_prefix = { "b" }
227
229
  raw_prefix = { "r" }
228
230
  triple_double = { "\"\"\"" ~ (!"\"\"\"" ~ ANY)* ~ "\"\"\"" }
229
231
  triple_single = { "'''" ~ (!"'''" ~ ANY)* ~ "'''" }
@@ -17,6 +17,7 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
17
17
  return Ok(Expr::String {
18
18
  value: parsed.content,
19
19
  raw: true,
20
+ bytes: parsed.bytes,
20
21
  delimiter: parsed.delimiter,
21
22
  span,
22
23
  });
@@ -28,12 +29,17 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
28
29
  .any(|part| matches!(part, FStringPart::Expr(_)));
29
30
  if has_expr {
30
31
  let parts = normalize_string_parts(parts, parsed.raw)?;
31
- Ok(Expr::FString { parts, span })
32
+ Ok(Expr::FString {
33
+ parts,
34
+ bytes: parsed.bytes,
35
+ span,
36
+ })
32
37
  } else {
33
38
  let value = join_fstring_text(parts);
34
39
  Ok(Expr::String {
35
40
  value,
36
41
  raw: parsed.raw,
42
+ bytes: parsed.bytes,
37
43
  delimiter: parsed.delimiter,
38
44
  span,
39
45
  })
@@ -43,6 +49,7 @@ pub fn parse_string_or_fstring(pair: Pair<'_, Rule>, source: &str) -> Result<Exp
43
49
  pub struct ParsedStringLiteral {
44
50
  pub content: String,
45
51
  pub raw: bool,
52
+ pub bytes: bool,
46
53
  pub delimiter: StringDelimiter,
47
54
  pub content_offset: usize,
48
55
  }
@@ -50,10 +57,17 @@ pub struct ParsedStringLiteral {
50
57
  pub fn parse_string_literal(pair: Pair<'_, Rule>) -> Result<ParsedStringLiteral, ParseError> {
51
58
  let value = pair.as_str();
52
59
  let span = pair.as_span();
53
- let (raw, rest, prefix_len) = if let Some(stripped) = value.strip_prefix('r') {
54
- (true, stripped, 1usize)
60
+ // Parse prefix - check longer prefixes first
61
+ let (raw, bytes, rest, prefix_len) = if let Some(stripped) = value.strip_prefix("br") {
62
+ (true, true, stripped, 2usize)
63
+ } else if let Some(stripped) = value.strip_prefix("rb") {
64
+ (true, true, stripped, 2usize)
65
+ } else if let Some(stripped) = value.strip_prefix('b') {
66
+ (false, true, stripped, 1usize)
67
+ } else if let Some(stripped) = value.strip_prefix('r') {
68
+ (true, false, stripped, 1usize)
55
69
  } else {
56
- (false, value, 0usize)
70
+ (false, false, value, 0usize)
57
71
  };
58
72
  let (delimiter, open, close) = if rest.starts_with("\"\"\"") {
59
73
  (StringDelimiter::TripleDouble, "\"\"\"", "\"\"\"")
@@ -73,6 +87,7 @@ pub fn parse_string_literal(pair: Pair<'_, Rule>) -> Result<ParsedStringLiteral,
73
87
  Ok(ParsedStringLiteral {
74
88
  content: content.to_string(),
75
89
  raw,
90
+ bytes,
76
91
  delimiter,
77
92
  content_offset,
78
93
  })
@@ -166,15 +181,29 @@ pub fn find_fstring_expr_end(content: &str, start: usize) -> Option<usize> {
166
181
  let mut brace = 0usize;
167
182
  while i < bytes.len() {
168
183
  match bytes[i] {
169
- b'r' => {
170
- if let Some(next) = bytes.get(i + 1)
171
- && (*next == b'\'' || *next == b'"')
172
- {
173
- if let Some(end) = skip_string_literal(bytes, i) {
174
- i = end;
175
- continue;
176
- } else {
177
- return None;
184
+ b'r' | b'b' => {
185
+ // Check for string prefix combinations: r, b, rb, br
186
+ if let Some(next) = bytes.get(i + 1) {
187
+ if *next == b'\'' || *next == b'"' {
188
+ // r"..." or b"..."
189
+ if let Some(end) = skip_string_literal(bytes, i) {
190
+ i = end;
191
+ continue;
192
+ } else {
193
+ return None;
194
+ }
195
+ } else if (*next == b'r' || *next == b'b') && bytes[i] != *next {
196
+ // Could be rb"..." or br"..."
197
+ if let Some(third) = bytes.get(i + 2)
198
+ && (*third == b'\'' || *third == b'"')
199
+ {
200
+ if let Some(end) = skip_string_literal(bytes, i) {
201
+ i = end;
202
+ continue;
203
+ } else {
204
+ return None;
205
+ }
206
+ }
178
207
  }
179
208
  }
180
209
  i += 1;
@@ -221,7 +250,14 @@ pub fn find_fstring_expr_end(content: &str, start: usize) -> Option<usize> {
221
250
 
222
251
  pub fn skip_string_literal(bytes: &[u8], start: usize) -> Option<usize> {
223
252
  let mut i = start;
224
- let raw = if bytes.get(i) == Some(&b'r') {
253
+ // Handle prefixes: br, rb, b, r (check longer prefixes first)
254
+ let raw = if bytes.get(i..i + 2) == Some(b"br") || bytes.get(i..i + 2) == Some(b"rb") {
255
+ i += 2;
256
+ true
257
+ } else if bytes.get(i) == Some(&b'b') {
258
+ i += 1;
259
+ false
260
+ } else if bytes.get(i) == Some(&b'r') {
225
261
  i += 1;
226
262
  true
227
263
  } else {
@@ -327,7 +363,7 @@ pub fn shift_expr_spans(expr: &mut Expr, offset: usize, source: &str) {
327
363
  | Expr::Slice { span, .. } => {
328
364
  *span = shift_span(span, offset, source);
329
365
  }
330
- Expr::FString { parts, span } => {
366
+ Expr::FString { parts, span, .. } => {
331
367
  for part in parts {
332
368
  if let FStringPart::Expr(expr) = part {
333
369
  shift_expr_spans(expr, offset, source);
@@ -84,6 +84,33 @@ pub fn expect_string_contains(expr: &Expr, snippet: &str, raw: bool, delimiter:
84
84
  }
85
85
  }
86
86
 
87
+ pub fn expect_byte_string(expr: &Expr, expected: &str, raw: bool, delimiter: StringDelimiter) {
88
+ match expr {
89
+ Expr::String {
90
+ value,
91
+ raw: is_raw,
92
+ bytes,
93
+ delimiter: actual_delimiter,
94
+ ..
95
+ } => {
96
+ assert_eq!(value, expected);
97
+ assert_eq!(*is_raw, raw);
98
+ assert!(*bytes, "Expected byte string (bytes=true)");
99
+ assert_eq!(*actual_delimiter, delimiter);
100
+ }
101
+ other => panic!("Expected byte string, got {other:?}"),
102
+ }
103
+ }
104
+
105
+ pub fn expect_byte_fstring(expr: &Expr) {
106
+ match expr {
107
+ Expr::FString { bytes, .. } => {
108
+ assert!(*bytes, "Expected byte f-string (bytes=true)");
109
+ }
110
+ other => panic!("Expected byte f-string, got {other:?}"),
111
+ }
112
+ }
113
+
87
114
  pub fn expect_span_start(span: &SourceSpan, line: usize, column: usize) {
88
115
  assert_eq!(span.start.line, line);
89
116
  assert_eq!(span.start.column, column);