tree-sitter-sas 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ix
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: tree-sitter-sas
3
+ Version: 0.3.8
4
+ Summary: SAS language grammar for tree-sitter
5
+ Author-email: Brandon Garate <bgarate@ix-infra.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ix-infrastructure/tree-sitter-sas
8
+ Keywords: incremental,parsing,tree-sitter,tree-sitter-sas
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Software Development :: Compilers
11
+ Classifier: Topic :: Text Processing :: Linguistic
12
+ Classifier: Typing :: Typed
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Provides-Extra: core
17
+ Requires-Dist: tree-sitter~=0.24; extra == "core"
18
+ Dynamic: license-file
19
+
20
+ # tree-sitter-sas
21
+
22
+ A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
23
+
24
+ [![npm](https://img.shields.io/npm/v/tree-sitter-sas)](https://www.npmjs.com/package/tree-sitter-sas)
25
+ [![CI](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml/badge.svg)](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
26
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
27
+
28
+ ## Node bindings
29
+
30
+ Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
31
+
32
+ ```bash
33
+ npm install tree-sitter-sas
34
+ ```
35
+
36
+ Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
37
+
38
+ ## Rust bindings
39
+
40
+ > **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
41
+ >
42
+ > ```toml
43
+ > [dependencies]
44
+ > tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
45
+ > ```
46
+
47
+ ## Python bindings
48
+
49
+ > **Coming soon** — a PyPI release is planned. In the meantime, build from source:
50
+ >
51
+ > ```bash
52
+ > pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
53
+ > ```
54
+
55
+ ## Go bindings
56
+
57
+ ```go
58
+ import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
59
+ ```
60
+
61
+ ## Swift bindings
62
+
63
+ Add to `Package.swift`:
64
+
65
+ ```swift
66
+ .package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
67
+ ```
68
+
69
+ ## Architecture
70
+
71
+ ```mermaid
72
+ flowchart LR
73
+ subgraph source ["Grammar source"]
74
+ G["grammar.js\n(tree-sitter DSL)"]
75
+ SC["src/scanner.c\n(external scanner)"]
76
+ end
77
+
78
+ GEN(["tree-sitter generate"])
79
+
80
+ subgraph generated ["Generated parser"]
81
+ PC["src/parser.c"]
82
+ NT["src/node-types.json"]
83
+ end
84
+
85
+ subgraph queries ["Queries"]
86
+ HL["queries/highlights.scm\n(syntax highlighting)"]
87
+ TG["queries/tags.scm\n(symbol index)"]
88
+ end
89
+
90
+ subgraph bindings ["Language bindings"]
91
+ N["Node · npm"]
92
+ R["Rust · crates.io"]
93
+ PY["Python · PyPI"]
94
+ GO["Go"]
95
+ SW["Swift"]
96
+ end
97
+
98
+ G & SC --> GEN --> PC & NT
99
+ NT -.-> HL & TG
100
+ PC --> N & R & PY & GO & SW
101
+ ```
102
+
103
+ `grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
104
+ disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
105
+ calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
106
+ character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
107
+ Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
108
+ (`src/node-types.json`).
109
+
110
+ ## Language coverage
111
+
112
+ | Construct | Node type |
113
+ |---|---|
114
+ | `DATA` step | `data_step` |
115
+ | `PROC` step | `proc_step` |
116
+ | `%MACRO` / `%MEND` definition | `macro_definition` |
117
+ | `%name(args)` call statement | `macro_call_statement` |
118
+ | `%name(args)` inline call | `macro_call` |
119
+ | `%LET var = value` | `macro_variable_assignment` |
120
+ | `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
121
+ | `%INCLUDE 'path'` | `include_statement` |
122
+ | `LIBNAME libref ...` | `libname_statement` |
123
+ | `OPTIONS ...` | `options_statement` |
124
+ | `/* ... */` block comments | `block_comment` |
125
+ | `* ... ;` line comments | `line_comment` |
126
+ | `%* ... ;` macro comments | `percent_comment` |
127
+ | Everything else | `generic_statement` (flat fallback) |
128
+
129
+ Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
130
+ text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
131
+ breaks the parse.
132
+
133
+ ## Known deviations
134
+
135
+ ### `generic_statement` as a catch-all
136
+
137
+ SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
138
+ would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
139
+ fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
140
+ proc statement structure is not captured in the tree, but the tree is always well-formed and the
141
+ surrounding program structure is always intact.
142
+
143
+ ### Macro keyword disambiguation via external scanner
144
+
145
+ `%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
146
+ resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
147
+ followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
148
+ SAS's own behavior.
149
+
150
+ ### Bare `%` in non-macro contexts
151
+
152
+ A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
153
+ scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
154
+ without triggering a macro parse path.
155
+
156
+ ## References
157
+
158
+ - [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
159
+ - [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
160
+ - [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
@@ -0,0 +1,141 @@
1
+ # tree-sitter-sas
2
+
3
+ A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
4
+
5
+ [![npm](https://img.shields.io/npm/v/tree-sitter-sas)](https://www.npmjs.com/package/tree-sitter-sas)
6
+ [![CI](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml/badge.svg)](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
8
+
9
+ ## Node bindings
10
+
11
+ Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
12
+
13
+ ```bash
14
+ npm install tree-sitter-sas
15
+ ```
16
+
17
+ Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
18
+
19
+ ## Rust bindings
20
+
21
+ > **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
22
+ >
23
+ > ```toml
24
+ > [dependencies]
25
+ > tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
26
+ > ```
27
+
28
+ ## Python bindings
29
+
30
+ > **Coming soon** — a PyPI release is planned. In the meantime, build from source:
31
+ >
32
+ > ```bash
33
+ > pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
34
+ > ```
35
+
36
+ ## Go bindings
37
+
38
+ ```go
39
+ import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
40
+ ```
41
+
42
+ ## Swift bindings
43
+
44
+ Add to `Package.swift`:
45
+
46
+ ```swift
47
+ .package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
48
+ ```
49
+
50
+ ## Architecture
51
+
52
+ ```mermaid
53
+ flowchart LR
54
+ subgraph source ["Grammar source"]
55
+ G["grammar.js\n(tree-sitter DSL)"]
56
+ SC["src/scanner.c\n(external scanner)"]
57
+ end
58
+
59
+ GEN(["tree-sitter generate"])
60
+
61
+ subgraph generated ["Generated parser"]
62
+ PC["src/parser.c"]
63
+ NT["src/node-types.json"]
64
+ end
65
+
66
+ subgraph queries ["Queries"]
67
+ HL["queries/highlights.scm\n(syntax highlighting)"]
68
+ TG["queries/tags.scm\n(symbol index)"]
69
+ end
70
+
71
+ subgraph bindings ["Language bindings"]
72
+ N["Node · npm"]
73
+ R["Rust · crates.io"]
74
+ PY["Python · PyPI"]
75
+ GO["Go"]
76
+ SW["Swift"]
77
+ end
78
+
79
+ G & SC --> GEN --> PC & NT
80
+ NT -.-> HL & TG
81
+ PC --> N & R & PY & GO & SW
82
+ ```
83
+
84
+ `grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
85
+ disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
86
+ calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
87
+ character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
88
+ Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
89
+ (`src/node-types.json`).
90
+
91
+ ## Language coverage
92
+
93
+ | Construct | Node type |
94
+ |---|---|
95
+ | `DATA` step | `data_step` |
96
+ | `PROC` step | `proc_step` |
97
+ | `%MACRO` / `%MEND` definition | `macro_definition` |
98
+ | `%name(args)` call statement | `macro_call_statement` |
99
+ | `%name(args)` inline call | `macro_call` |
100
+ | `%LET var = value` | `macro_variable_assignment` |
101
+ | `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
102
+ | `%INCLUDE 'path'` | `include_statement` |
103
+ | `LIBNAME libref ...` | `libname_statement` |
104
+ | `OPTIONS ...` | `options_statement` |
105
+ | `/* ... */` block comments | `block_comment` |
106
+ | `* ... ;` line comments | `line_comment` |
107
+ | `%* ... ;` macro comments | `percent_comment` |
108
+ | Everything else | `generic_statement` (flat fallback) |
109
+
110
+ Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
111
+ text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
112
+ breaks the parse.
113
+
114
+ ## Known deviations
115
+
116
+ ### `generic_statement` as a catch-all
117
+
118
+ SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
119
+ would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
120
+ fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
121
+ proc statement structure is not captured in the tree, but the tree is always well-formed and the
122
+ surrounding program structure is always intact.
123
+
124
+ ### Macro keyword disambiguation via external scanner
125
+
126
+ `%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
127
+ resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
128
+ followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
129
+ SAS's own behavior.
130
+
131
+ ### Bare `%` in non-macro contexts
132
+
133
+ A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
134
+ scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
135
+ without triggering a macro parse path.
136
+
137
+ ## References
138
+
139
+ - [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
140
+ - [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
141
+ - [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
@@ -0,0 +1,43 @@
1
+ """SAS language grammar for tree-sitter"""
2
+
3
+ from importlib.resources import files as _files
4
+
5
+ from ._binding import language
6
+
7
+
8
+ def _get_query(name, file):
9
+ try:
10
+ query = _files(f"{__package__}") / file
11
+ globals()[name] = query.read_text()
12
+ except FileNotFoundError:
13
+ globals()[name] = None
14
+ return globals()[name]
15
+
16
+
17
+ def __getattr__(name):
18
+ if name == "HIGHLIGHTS_QUERY":
19
+ return _get_query("HIGHLIGHTS_QUERY", "queries/highlights.scm")
20
+ if name == "INJECTIONS_QUERY":
21
+ return _get_query("INJECTIONS_QUERY", "queries/injections.scm")
22
+ if name == "LOCALS_QUERY":
23
+ return _get_query("LOCALS_QUERY", "queries/locals.scm")
24
+ if name == "TAGS_QUERY":
25
+ return _get_query("TAGS_QUERY", "queries/tags.scm")
26
+
27
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
28
+
29
+
30
+ __all__ = [
31
+ "language",
32
+ "HIGHLIGHTS_QUERY",
33
+ "INJECTIONS_QUERY",
34
+ "LOCALS_QUERY",
35
+ "TAGS_QUERY",
36
+ ]
37
+
38
+
39
+ def __dir__():
40
+ return sorted(__all__ + [
41
+ "__all__", "__builtins__", "__cached__", "__doc__", "__file__",
42
+ "__loader__", "__name__", "__package__", "__path__", "__spec__",
43
+ ])
@@ -0,0 +1,17 @@
1
+ from typing import Final
2
+ from typing_extensions import CapsuleType
3
+
4
+ HIGHLIGHTS_QUERY: Final[str] | None
5
+ """The syntax highlighting query for this grammar."""
6
+
7
+ INJECTIONS_QUERY: Final[str] | None
8
+ """The language injection query for this grammar."""
9
+
10
+ LOCALS_QUERY: Final[str] | None
11
+ """The local variable query for this grammar."""
12
+
13
+ TAGS_QUERY: Final[str] | None
14
+ """The symbol tagging query for this grammar."""
15
+
16
+ def language() -> CapsuleType:
17
+ """The tree-sitter language function for this grammar."""
@@ -0,0 +1,35 @@
1
+ #include <Python.h>
2
+
3
+ typedef struct TSLanguage TSLanguage;
4
+
5
+ TSLanguage *tree_sitter_sas(void);
6
+
7
+ static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
8
+ return PyCapsule_New(tree_sitter_sas(), "tree_sitter.Language", NULL);
9
+ }
10
+
11
+ static struct PyModuleDef_Slot slots[] = {
12
+ #ifdef Py_GIL_DISABLED
13
+ {Py_mod_gil, Py_MOD_GIL_NOT_USED},
14
+ #endif
15
+ {0, NULL}
16
+ };
17
+
18
+ static PyMethodDef methods[] = {
19
+ {"language", _binding_language, METH_NOARGS,
20
+ "Get the tree-sitter language for this grammar."},
21
+ {NULL, NULL, 0, NULL}
22
+ };
23
+
24
+ static struct PyModuleDef module = {
25
+ .m_base = PyModuleDef_HEAD_INIT,
26
+ .m_name = "_binding",
27
+ .m_doc = NULL,
28
+ .m_size = 0,
29
+ .m_methods = methods,
30
+ .m_slots = slots,
31
+ };
32
+
33
+ PyMODINIT_FUNC PyInit__binding(void) {
34
+ return PyModuleDef_Init(&module);
35
+ }
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: tree-sitter-sas
3
+ Version: 0.3.8
4
+ Summary: SAS language grammar for tree-sitter
5
+ Author-email: Brandon Garate <bgarate@ix-infra.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ix-infrastructure/tree-sitter-sas
8
+ Keywords: incremental,parsing,tree-sitter,tree-sitter-sas
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Software Development :: Compilers
11
+ Classifier: Topic :: Text Processing :: Linguistic
12
+ Classifier: Typing :: Typed
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Provides-Extra: core
17
+ Requires-Dist: tree-sitter~=0.24; extra == "core"
18
+ Dynamic: license-file
19
+
20
+ # tree-sitter-sas
21
+
22
+ A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
23
+
24
+ [![npm](https://img.shields.io/npm/v/tree-sitter-sas)](https://www.npmjs.com/package/tree-sitter-sas)
25
+ [![CI](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml/badge.svg)](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
26
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
27
+
28
+ ## Node bindings
29
+
30
+ Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
31
+
32
+ ```bash
33
+ npm install tree-sitter-sas
34
+ ```
35
+
36
+ Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
37
+
38
+ ## Rust bindings
39
+
40
+ > **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
41
+ >
42
+ > ```toml
43
+ > [dependencies]
44
+ > tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
45
+ > ```
46
+
47
+ ## Python bindings
48
+
49
+ > **Coming soon** — a PyPI release is planned. In the meantime, build from source:
50
+ >
51
+ > ```bash
52
+ > pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
53
+ > ```
54
+
55
+ ## Go bindings
56
+
57
+ ```go
58
+ import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
59
+ ```
60
+
61
+ ## Swift bindings
62
+
63
+ Add to `Package.swift`:
64
+
65
+ ```swift
66
+ .package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
67
+ ```
68
+
69
+ ## Architecture
70
+
71
+ ```mermaid
72
+ flowchart LR
73
+ subgraph source ["Grammar source"]
74
+ G["grammar.js\n(tree-sitter DSL)"]
75
+ SC["src/scanner.c\n(external scanner)"]
76
+ end
77
+
78
+ GEN(["tree-sitter generate"])
79
+
80
+ subgraph generated ["Generated parser"]
81
+ PC["src/parser.c"]
82
+ NT["src/node-types.json"]
83
+ end
84
+
85
+ subgraph queries ["Queries"]
86
+ HL["queries/highlights.scm\n(syntax highlighting)"]
87
+ TG["queries/tags.scm\n(symbol index)"]
88
+ end
89
+
90
+ subgraph bindings ["Language bindings"]
91
+ N["Node · npm"]
92
+ R["Rust · crates.io"]
93
+ PY["Python · PyPI"]
94
+ GO["Go"]
95
+ SW["Swift"]
96
+ end
97
+
98
+ G & SC --> GEN --> PC & NT
99
+ NT -.-> HL & TG
100
+ PC --> N & R & PY & GO & SW
101
+ ```
102
+
103
+ `grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
104
+ disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
105
+ calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
106
+ character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
107
+ Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
108
+ (`src/node-types.json`).
109
+
110
+ ## Language coverage
111
+
112
+ | Construct | Node type |
113
+ |---|---|
114
+ | `DATA` step | `data_step` |
115
+ | `PROC` step | `proc_step` |
116
+ | `%MACRO` / `%MEND` definition | `macro_definition` |
117
+ | `%name(args)` call statement | `macro_call_statement` |
118
+ | `%name(args)` inline call | `macro_call` |
119
+ | `%LET var = value` | `macro_variable_assignment` |
120
+ | `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
121
+ | `%INCLUDE 'path'` | `include_statement` |
122
+ | `LIBNAME libref ...` | `libname_statement` |
123
+ | `OPTIONS ...` | `options_statement` |
124
+ | `/* ... */` block comments | `block_comment` |
125
+ | `* ... ;` line comments | `line_comment` |
126
+ | `%* ... ;` macro comments | `percent_comment` |
127
+ | Everything else | `generic_statement` (flat fallback) |
128
+
129
+ Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
130
+ text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
131
+ breaks the parse.
132
+
133
+ ## Known deviations
134
+
135
+ ### `generic_statement` as a catch-all
136
+
137
+ SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
138
+ would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
139
+ fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
140
+ proc statement structure is not captured in the tree, but the tree is always well-formed and the
141
+ surrounding program structure is always intact.
142
+
143
+ ### Macro keyword disambiguation via external scanner
144
+
145
+ `%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
146
+ resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
147
+ followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
148
+ SAS's own behavior.
149
+
150
+ ### Bare `%` in non-macro contexts
151
+
152
+ A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
153
+ scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
154
+ without triggering a macro parse path.
155
+
156
+ ## References
157
+
158
+ - [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
159
+ - [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
160
+ - [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
@@ -0,0 +1,15 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ bindings/python/tree_sitter_sas/__init__.py
6
+ bindings/python/tree_sitter_sas/__init__.pyi
7
+ bindings/python/tree_sitter_sas/binding.c
8
+ bindings/python/tree_sitter_sas/py.typed
9
+ bindings/python/tree_sitter_sas.egg-info/PKG-INFO
10
+ bindings/python/tree_sitter_sas.egg-info/SOURCES.txt
11
+ bindings/python/tree_sitter_sas.egg-info/dependency_links.txt
12
+ bindings/python/tree_sitter_sas.egg-info/not-zip-safe
13
+ bindings/python/tree_sitter_sas.egg-info/requires.txt
14
+ bindings/python/tree_sitter_sas.egg-info/top_level.txt
15
+ src/parser.c
@@ -0,0 +1,3 @@
1
+
2
+ [core]
3
+ tree-sitter~=0.24
@@ -0,0 +1,2 @@
1
+ _binding
2
+ tree_sitter_sas
@@ -0,0 +1,29 @@
1
+ [build-system]
2
+ requires = ["setuptools>=62.4.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tree-sitter-sas"
7
+ description = "SAS language grammar for tree-sitter"
8
+ version = "0.3.8"
9
+ keywords = ["incremental", "parsing", "tree-sitter", "tree-sitter-sas"]
10
+ classifiers = [
11
+ "Intended Audience :: Developers",
12
+ "Topic :: Software Development :: Compilers",
13
+ "Topic :: Text Processing :: Linguistic",
14
+ "Typing :: Typed",
15
+ ]
16
+ authors = [{ name = "Brandon Garate", email = "bgarate@ix-infra.com" }]
17
+ requires-python = ">=3.10"
18
+ license.text = "MIT"
19
+ readme = "README.md"
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/ix-infrastructure/tree-sitter-sas"
23
+
24
+ [project.optional-dependencies]
25
+ core = ["tree-sitter~=0.24"]
26
+
27
+ [tool.cibuildwheel]
28
+ build = "cp310-*"
29
+ build-frontend = "build"
@@ -0,0 +1,20 @@
1
+ (block_comment) @comment.block
2
+ (line_comment) @comment.line
3
+
4
+ (macro_definition) @function
5
+ (macro_name) @function.method
6
+
7
+ (macro_variable_assignment) @keyword
8
+ (macro_variable_ref) @variable
9
+
10
+ (string_literal) @string
11
+
12
+ (data_step_header) @keyword
13
+ (proc_step_header) @keyword
14
+ (run_statement) @keyword
15
+ (run_or_quit_statement) @keyword
16
+ (macro_end) @keyword
17
+
18
+ (libname_statement) @keyword
19
+ (include_statement) @keyword
20
+ (options_statement) @keyword