tree-sitter-sas 0.3.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_sas-0.3.8/LICENSE +21 -0
- tree_sitter_sas-0.3.8/PKG-INFO +160 -0
- tree_sitter_sas-0.3.8/README.md +141 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas/__init__.py +43 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas/__init__.pyi +17 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas/binding.c +35 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas/py.typed +0 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/PKG-INFO +160 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/SOURCES.txt +15 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/dependency_links.txt +1 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/not-zip-safe +1 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/requires.txt +3 -0
- tree_sitter_sas-0.3.8/bindings/python/tree_sitter_sas.egg-info/top_level.txt +2 -0
- tree_sitter_sas-0.3.8/pyproject.toml +29 -0
- tree_sitter_sas-0.3.8/queries/highlights.scm +20 -0
- tree_sitter_sas-0.3.8/queries/tags.scm +30 -0
- tree_sitter_sas-0.3.8/setup.cfg +4 -0
- tree_sitter_sas-0.3.8/setup.py +77 -0
- tree_sitter_sas-0.3.8/src/parser.c +11489 -0
- tree_sitter_sas-0.3.8/src/tree_sitter/alloc.h +54 -0
- tree_sitter_sas-0.3.8/src/tree_sitter/array.h +291 -0
- tree_sitter_sas-0.3.8/src/tree_sitter/parser.h +266 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ix
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tree-sitter-sas
|
|
3
|
+
Version: 0.3.8
|
|
4
|
+
Summary: SAS language grammar for tree-sitter
|
|
5
|
+
Author-email: Brandon Garate <bgarate@ix-infra.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ix-infrastructure/tree-sitter-sas
|
|
8
|
+
Keywords: incremental,parsing,tree-sitter,tree-sitter-sas
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
11
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
12
|
+
Classifier: Typing :: Typed
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Provides-Extra: core
|
|
17
|
+
Requires-Dist: tree-sitter~=0.24; extra == "core"
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# tree-sitter-sas
|
|
21
|
+
|
|
22
|
+
A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
|
|
23
|
+
|
|
24
|
+
[](https://www.npmjs.com/package/tree-sitter-sas)
|
|
25
|
+
[](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
|
|
26
|
+
[](LICENSE)
|
|
27
|
+
|
|
28
|
+
## Node bindings
|
|
29
|
+
|
|
30
|
+
Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install tree-sitter-sas
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
|
|
37
|
+
|
|
38
|
+
## Rust bindings
|
|
39
|
+
|
|
40
|
+
> **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
|
|
41
|
+
>
|
|
42
|
+
> ```toml
|
|
43
|
+
> [dependencies]
|
|
44
|
+
> tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
|
|
45
|
+
> ```
|
|
46
|
+
|
|
47
|
+
## Python bindings
|
|
48
|
+
|
|
49
|
+
> **Coming soon** — a PyPI release is planned. In the meantime, build from source:
|
|
50
|
+
>
|
|
51
|
+
> ```bash
|
|
52
|
+
> pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
|
|
53
|
+
> ```
|
|
54
|
+
|
|
55
|
+
## Go bindings
|
|
56
|
+
|
|
57
|
+
```go
|
|
58
|
+
import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Swift bindings
|
|
62
|
+
|
|
63
|
+
Add to `Package.swift`:
|
|
64
|
+
|
|
65
|
+
```swift
|
|
66
|
+
.package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Architecture
|
|
70
|
+
|
|
71
|
+
```mermaid
|
|
72
|
+
flowchart LR
|
|
73
|
+
subgraph source ["Grammar source"]
|
|
74
|
+
G["grammar.js\n(tree-sitter DSL)"]
|
|
75
|
+
SC["src/scanner.c\n(external scanner)"]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
GEN(["tree-sitter generate"])
|
|
79
|
+
|
|
80
|
+
subgraph generated ["Generated parser"]
|
|
81
|
+
PC["src/parser.c"]
|
|
82
|
+
NT["src/node-types.json"]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
subgraph queries ["Queries"]
|
|
86
|
+
HL["queries/highlights.scm\n(syntax highlighting)"]
|
|
87
|
+
TG["queries/tags.scm\n(symbol index)"]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
subgraph bindings ["Language bindings"]
|
|
91
|
+
N["Node · npm"]
|
|
92
|
+
R["Rust · crates.io"]
|
|
93
|
+
PY["Python · PyPI"]
|
|
94
|
+
GO["Go"]
|
|
95
|
+
SW["Swift"]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
G & SC --> GEN --> PC & NT
|
|
99
|
+
NT -.-> HL & TG
|
|
100
|
+
PC --> N & R & PY & GO & SW
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
`grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
|
|
104
|
+
disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
|
|
105
|
+
calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
|
|
106
|
+
character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
|
|
107
|
+
Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
|
|
108
|
+
(`src/node-types.json`).
|
|
109
|
+
|
|
110
|
+
## Language coverage
|
|
111
|
+
|
|
112
|
+
| Construct | Node type |
|
|
113
|
+
|---|---|
|
|
114
|
+
| `DATA` step | `data_step` |
|
|
115
|
+
| `PROC` step | `proc_step` |
|
|
116
|
+
| `%MACRO` / `%MEND` definition | `macro_definition` |
|
|
117
|
+
| `%name(args)` call statement | `macro_call_statement` |
|
|
118
|
+
| `%name(args)` inline call | `macro_call` |
|
|
119
|
+
| `%LET var = value` | `macro_variable_assignment` |
|
|
120
|
+
| `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
|
|
121
|
+
| `%INCLUDE 'path'` | `include_statement` |
|
|
122
|
+
| `LIBNAME libref ...` | `libname_statement` |
|
|
123
|
+
| `OPTIONS ...` | `options_statement` |
|
|
124
|
+
| `/* ... */` block comments | `block_comment` |
|
|
125
|
+
| `* ... ;` line comments | `line_comment` |
|
|
126
|
+
| `%* ... ;` macro comments | `percent_comment` |
|
|
127
|
+
| Everything else | `generic_statement` (flat fallback) |
|
|
128
|
+
|
|
129
|
+
Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
|
|
130
|
+
text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
|
|
131
|
+
breaks the parse.
|
|
132
|
+
|
|
133
|
+
## Known deviations
|
|
134
|
+
|
|
135
|
+
### `generic_statement` as a catch-all
|
|
136
|
+
|
|
137
|
+
SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
|
|
138
|
+
would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
|
|
139
|
+
fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
|
|
140
|
+
proc statement structure is not captured in the tree, but the tree is always well-formed and the
|
|
141
|
+
surrounding program structure is always intact.
|
|
142
|
+
|
|
143
|
+
### Macro keyword disambiguation via external scanner
|
|
144
|
+
|
|
145
|
+
`%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
|
|
146
|
+
resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
|
|
147
|
+
followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
|
|
148
|
+
SAS's own behavior.
|
|
149
|
+
|
|
150
|
+
### Bare `%` in non-macro contexts
|
|
151
|
+
|
|
152
|
+
A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
|
|
153
|
+
scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
|
|
154
|
+
without triggering a macro parse path.
|
|
155
|
+
|
|
156
|
+
## References
|
|
157
|
+
|
|
158
|
+
- [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
|
|
159
|
+
- [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
|
|
160
|
+
- [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# tree-sitter-sas
|
|
2
|
+
|
|
3
|
+
A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/tree-sitter-sas)
|
|
6
|
+
[](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
## Node bindings
|
|
10
|
+
|
|
11
|
+
Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install tree-sitter-sas
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
|
|
18
|
+
|
|
19
|
+
## Rust bindings
|
|
20
|
+
|
|
21
|
+
> **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
|
|
22
|
+
>
|
|
23
|
+
> ```toml
|
|
24
|
+
> [dependencies]
|
|
25
|
+
> tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
|
|
26
|
+
> ```
|
|
27
|
+
|
|
28
|
+
## Python bindings
|
|
29
|
+
|
|
30
|
+
> **Coming soon** — a PyPI release is planned. In the meantime, build from source:
|
|
31
|
+
>
|
|
32
|
+
> ```bash
|
|
33
|
+
> pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
|
|
34
|
+
> ```
|
|
35
|
+
|
|
36
|
+
## Go bindings
|
|
37
|
+
|
|
38
|
+
```go
|
|
39
|
+
import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Swift bindings
|
|
43
|
+
|
|
44
|
+
Add to `Package.swift`:
|
|
45
|
+
|
|
46
|
+
```swift
|
|
47
|
+
.package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Architecture
|
|
51
|
+
|
|
52
|
+
```mermaid
|
|
53
|
+
flowchart LR
|
|
54
|
+
subgraph source ["Grammar source"]
|
|
55
|
+
G["grammar.js\n(tree-sitter DSL)"]
|
|
56
|
+
SC["src/scanner.c\n(external scanner)"]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
GEN(["tree-sitter generate"])
|
|
60
|
+
|
|
61
|
+
subgraph generated ["Generated parser"]
|
|
62
|
+
PC["src/parser.c"]
|
|
63
|
+
NT["src/node-types.json"]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
subgraph queries ["Queries"]
|
|
67
|
+
HL["queries/highlights.scm\n(syntax highlighting)"]
|
|
68
|
+
TG["queries/tags.scm\n(symbol index)"]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
subgraph bindings ["Language bindings"]
|
|
72
|
+
N["Node · npm"]
|
|
73
|
+
R["Rust · crates.io"]
|
|
74
|
+
PY["Python · PyPI"]
|
|
75
|
+
GO["Go"]
|
|
76
|
+
SW["Swift"]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
G & SC --> GEN --> PC & NT
|
|
80
|
+
NT -.-> HL & TG
|
|
81
|
+
PC --> N & R & PY & GO & SW
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
`grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
|
|
85
|
+
disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
|
|
86
|
+
calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
|
|
87
|
+
character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
|
|
88
|
+
Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
|
|
89
|
+
(`src/node-types.json`).
|
|
90
|
+
|
|
91
|
+
## Language coverage
|
|
92
|
+
|
|
93
|
+
| Construct | Node type |
|
|
94
|
+
|---|---|
|
|
95
|
+
| `DATA` step | `data_step` |
|
|
96
|
+
| `PROC` step | `proc_step` |
|
|
97
|
+
| `%MACRO` / `%MEND` definition | `macro_definition` |
|
|
98
|
+
| `%name(args)` call statement | `macro_call_statement` |
|
|
99
|
+
| `%name(args)` inline call | `macro_call` |
|
|
100
|
+
| `%LET var = value` | `macro_variable_assignment` |
|
|
101
|
+
| `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
|
|
102
|
+
| `%INCLUDE 'path'` | `include_statement` |
|
|
103
|
+
| `LIBNAME libref ...` | `libname_statement` |
|
|
104
|
+
| `OPTIONS ...` | `options_statement` |
|
|
105
|
+
| `/* ... */` block comments | `block_comment` |
|
|
106
|
+
| `* ... ;` line comments | `line_comment` |
|
|
107
|
+
| `%* ... ;` macro comments | `percent_comment` |
|
|
108
|
+
| Everything else | `generic_statement` (flat fallback) |
|
|
109
|
+
|
|
110
|
+
Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
|
|
111
|
+
text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
|
|
112
|
+
breaks the parse.
|
|
113
|
+
|
|
114
|
+
## Known deviations
|
|
115
|
+
|
|
116
|
+
### `generic_statement` as a catch-all
|
|
117
|
+
|
|
118
|
+
SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
|
|
119
|
+
would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
|
|
120
|
+
fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
|
|
121
|
+
proc statement structure is not captured in the tree, but the tree is always well-formed and the
|
|
122
|
+
surrounding program structure is always intact.
|
|
123
|
+
|
|
124
|
+
### Macro keyword disambiguation via external scanner
|
|
125
|
+
|
|
126
|
+
`%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
|
|
127
|
+
resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
|
|
128
|
+
followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
|
|
129
|
+
SAS's own behavior.
|
|
130
|
+
|
|
131
|
+
### Bare `%` in non-macro contexts
|
|
132
|
+
|
|
133
|
+
A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
|
|
134
|
+
scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
|
|
135
|
+
without triggering a macro parse path.
|
|
136
|
+
|
|
137
|
+
## References
|
|
138
|
+
|
|
139
|
+
- [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
|
|
140
|
+
- [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
|
|
141
|
+
- [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""SAS language grammar for tree-sitter"""
|
|
2
|
+
|
|
3
|
+
from importlib.resources import files as _files
|
|
4
|
+
|
|
5
|
+
from ._binding import language
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _get_query(name, file):
|
|
9
|
+
try:
|
|
10
|
+
query = _files(f"{__package__}") / file
|
|
11
|
+
globals()[name] = query.read_text()
|
|
12
|
+
except FileNotFoundError:
|
|
13
|
+
globals()[name] = None
|
|
14
|
+
return globals()[name]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __getattr__(name):
|
|
18
|
+
if name == "HIGHLIGHTS_QUERY":
|
|
19
|
+
return _get_query("HIGHLIGHTS_QUERY", "queries/highlights.scm")
|
|
20
|
+
if name == "INJECTIONS_QUERY":
|
|
21
|
+
return _get_query("INJECTIONS_QUERY", "queries/injections.scm")
|
|
22
|
+
if name == "LOCALS_QUERY":
|
|
23
|
+
return _get_query("LOCALS_QUERY", "queries/locals.scm")
|
|
24
|
+
if name == "TAGS_QUERY":
|
|
25
|
+
return _get_query("TAGS_QUERY", "queries/tags.scm")
|
|
26
|
+
|
|
27
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"language",
|
|
32
|
+
"HIGHLIGHTS_QUERY",
|
|
33
|
+
"INJECTIONS_QUERY",
|
|
34
|
+
"LOCALS_QUERY",
|
|
35
|
+
"TAGS_QUERY",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def __dir__():
|
|
40
|
+
return sorted(__all__ + [
|
|
41
|
+
"__all__", "__builtins__", "__cached__", "__doc__", "__file__",
|
|
42
|
+
"__loader__", "__name__", "__package__", "__path__", "__spec__",
|
|
43
|
+
])
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Final
|
|
2
|
+
from typing_extensions import CapsuleType
|
|
3
|
+
|
|
4
|
+
HIGHLIGHTS_QUERY: Final[str] | None
|
|
5
|
+
"""The syntax highlighting query for this grammar."""
|
|
6
|
+
|
|
7
|
+
INJECTIONS_QUERY: Final[str] | None
|
|
8
|
+
"""The language injection query for this grammar."""
|
|
9
|
+
|
|
10
|
+
LOCALS_QUERY: Final[str] | None
|
|
11
|
+
"""The local variable query for this grammar."""
|
|
12
|
+
|
|
13
|
+
TAGS_QUERY: Final[str] | None
|
|
14
|
+
"""The symbol tagging query for this grammar."""
|
|
15
|
+
|
|
16
|
+
def language() -> CapsuleType:
|
|
17
|
+
"""The tree-sitter language function for this grammar."""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#include <Python.h>
|
|
2
|
+
|
|
3
|
+
typedef struct TSLanguage TSLanguage;
|
|
4
|
+
|
|
5
|
+
TSLanguage *tree_sitter_sas(void);
|
|
6
|
+
|
|
7
|
+
static PyObject* _binding_language(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(args)) {
|
|
8
|
+
return PyCapsule_New(tree_sitter_sas(), "tree_sitter.Language", NULL);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
static struct PyModuleDef_Slot slots[] = {
|
|
12
|
+
#ifdef Py_GIL_DISABLED
|
|
13
|
+
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
|
|
14
|
+
#endif
|
|
15
|
+
{0, NULL}
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
static PyMethodDef methods[] = {
|
|
19
|
+
{"language", _binding_language, METH_NOARGS,
|
|
20
|
+
"Get the tree-sitter language for this grammar."},
|
|
21
|
+
{NULL, NULL, 0, NULL}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
static struct PyModuleDef module = {
|
|
25
|
+
.m_base = PyModuleDef_HEAD_INIT,
|
|
26
|
+
.m_name = "_binding",
|
|
27
|
+
.m_doc = NULL,
|
|
28
|
+
.m_size = 0,
|
|
29
|
+
.m_methods = methods,
|
|
30
|
+
.m_slots = slots,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
PyMODINIT_FUNC PyInit__binding(void) {
|
|
34
|
+
return PyModuleDef_Init(&module);
|
|
35
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tree-sitter-sas
|
|
3
|
+
Version: 0.3.8
|
|
4
|
+
Summary: SAS language grammar for tree-sitter
|
|
5
|
+
Author-email: Brandon Garate <bgarate@ix-infra.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ix-infrastructure/tree-sitter-sas
|
|
8
|
+
Keywords: incremental,parsing,tree-sitter,tree-sitter-sas
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
11
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
12
|
+
Classifier: Typing :: Typed
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Provides-Extra: core
|
|
17
|
+
Requires-Dist: tree-sitter~=0.24; extra == "core"
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# tree-sitter-sas
|
|
21
|
+
|
|
22
|
+
A [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for the [SAS programming language](https://www.sas.com/).
|
|
23
|
+
|
|
24
|
+
[](https://www.npmjs.com/package/tree-sitter-sas)
|
|
25
|
+
[](https://github.com/ix-infrastructure/tree-sitter-sas/actions/workflows/ci.yml)
|
|
26
|
+
[](LICENSE)
|
|
27
|
+
|
|
28
|
+
## Node bindings
|
|
29
|
+
|
|
30
|
+
Available on npm as [`tree-sitter-sas`](https://www.npmjs.com/package/tree-sitter-sas).
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install tree-sitter-sas
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Prebuilt native binaries ship for `linux-x64`, `darwin-x64`, and `darwin-arm64` — no compile step required.
|
|
37
|
+
|
|
38
|
+
## Rust bindings
|
|
39
|
+
|
|
40
|
+
> **Coming soon** — a crates.io release is planned. In the meantime, use the GitHub source directly:
|
|
41
|
+
>
|
|
42
|
+
> ```toml
|
|
43
|
+
> [dependencies]
|
|
44
|
+
> tree-sitter-sas = { git = "https://github.com/ix-infrastructure/tree-sitter-sas" }
|
|
45
|
+
> ```
|
|
46
|
+
|
|
47
|
+
## Python bindings
|
|
48
|
+
|
|
49
|
+
> **Coming soon** — a PyPI release is planned. In the meantime, build from source:
|
|
50
|
+
>
|
|
51
|
+
> ```bash
|
|
52
|
+
> pip install git+https://github.com/ix-infrastructure/tree-sitter-sas
|
|
53
|
+
> ```
|
|
54
|
+
|
|
55
|
+
## Go bindings
|
|
56
|
+
|
|
57
|
+
```go
|
|
58
|
+
import "github.com/ix-infrastructure/tree-sitter-sas/bindings/go"
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Swift bindings
|
|
62
|
+
|
|
63
|
+
Add to `Package.swift`:
|
|
64
|
+
|
|
65
|
+
```swift
|
|
66
|
+
.package(url: "https://github.com/ix-infrastructure/tree-sitter-sas", from: "0.3.1")
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Architecture
|
|
70
|
+
|
|
71
|
+
```mermaid
|
|
72
|
+
flowchart LR
|
|
73
|
+
subgraph source ["Grammar source"]
|
|
74
|
+
G["grammar.js\n(tree-sitter DSL)"]
|
|
75
|
+
SC["src/scanner.c\n(external scanner)"]
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
GEN(["tree-sitter generate"])
|
|
79
|
+
|
|
80
|
+
subgraph generated ["Generated parser"]
|
|
81
|
+
PC["src/parser.c"]
|
|
82
|
+
NT["src/node-types.json"]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
subgraph queries ["Queries"]
|
|
86
|
+
HL["queries/highlights.scm\n(syntax highlighting)"]
|
|
87
|
+
TG["queries/tags.scm\n(symbol index)"]
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
subgraph bindings ["Language bindings"]
|
|
91
|
+
N["Node · npm"]
|
|
92
|
+
R["Rust · crates.io"]
|
|
93
|
+
PY["Python · PyPI"]
|
|
94
|
+
GO["Go"]
|
|
95
|
+
SW["Swift"]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
G & SC --> GEN --> PC & NT
|
|
99
|
+
NT -.-> HL & TG
|
|
100
|
+
PC --> N & R & PY & GO & SW
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
`grammar.js` defines the language using the tree-sitter DSL. `src/scanner.c` is an external lexer that
|
|
104
|
+
disambiguates `%`-prefixed macro keywords (`%let`, `%macro`, `%mend`, `%include`) from user-defined macro
|
|
105
|
+
calls — it emits a keyword token only when the keyword is **not** immediately followed by an identifier
|
|
106
|
+
character, so `%letput` and `%macroFoo` correctly fall through to `macro_call_statement` instead.
|
|
107
|
+
Running `tree-sitter generate` produces the LR parser (`src/parser.c`) and the node type schema
|
|
108
|
+
(`src/node-types.json`).
|
|
109
|
+
|
|
110
|
+
## Language coverage
|
|
111
|
+
|
|
112
|
+
| Construct | Node type |
|
|
113
|
+
|---|---|
|
|
114
|
+
| `DATA` step | `data_step` |
|
|
115
|
+
| `PROC` step | `proc_step` |
|
|
116
|
+
| `%MACRO` / `%MEND` definition | `macro_definition` |
|
|
117
|
+
| `%name(args)` call statement | `macro_call_statement` |
|
|
118
|
+
| `%name(args)` inline call | `macro_call` |
|
|
119
|
+
| `%LET var = value` | `macro_variable_assignment` |
|
|
120
|
+
| `&var`, `&&var`, `&var.` references | `macro_variable_ref` |
|
|
121
|
+
| `%INCLUDE 'path'` | `include_statement` |
|
|
122
|
+
| `LIBNAME libref ...` | `libname_statement` |
|
|
123
|
+
| `OPTIONS ...` | `options_statement` |
|
|
124
|
+
| `/* ... */` block comments | `block_comment` |
|
|
125
|
+
| `* ... ;` line comments | `line_comment` |
|
|
126
|
+
| `%* ... ;` macro comments | `percent_comment` |
|
|
127
|
+
| Everything else | `generic_statement` (flat fallback) |
|
|
128
|
+
|
|
129
|
+
Statements not matched by a specific rule are absorbed by `generic_statement`, which preserves the source
|
|
130
|
+
text without losing parse continuity. The tree is always complete — unknown or proc-specific syntax never
|
|
131
|
+
breaks the parse.
|
|
132
|
+
|
|
133
|
+
## Known deviations
|
|
134
|
+
|
|
135
|
+
### `generic_statement` as a catch-all
|
|
136
|
+
|
|
137
|
+
SAS has hundreds of proc-specific statements (`MODEL`, `CLASS`, `OUTPUT` inside `PROC REG`, etc.) that
|
|
138
|
+
would require individual rules to represent structurally. This grammar uses `generic_statement` as a flat
|
|
139
|
+
fallback for any semicolon-terminated statement not matched by a more specific rule. The tradeoff: internal
|
|
140
|
+
proc statement structure is not captured in the tree, but the tree is always well-formed and the
|
|
141
|
+
surrounding program structure is always intact.
|
|
142
|
+
|
|
143
|
+
### Macro keyword disambiguation via external scanner
|
|
144
|
+
|
|
145
|
+
`%KEYWORD` tokens share a prefix with user-defined macro calls. The external scanner in `src/scanner.c`
|
|
146
|
+
resolves the ambiguity: it emits a structured keyword token only when the keyword is **not** immediately
|
|
147
|
+
followed by `[A-Za-z0-9_]`. This means `%letput` is a macro call, not a `%let` statement — matching
|
|
148
|
+
SAS's own behavior.
|
|
149
|
+
|
|
150
|
+
### Bare `%` in non-macro contexts
|
|
151
|
+
|
|
152
|
+
A literal `%` can appear in non-macro positions (e.g. `width=20%` in ODS style attributes). The external
|
|
153
|
+
scanner emits a `_bare_pct` token for these cases, which is absorbed into `generic_statement` content
|
|
154
|
+
without triggering a macro parse path.
|
|
155
|
+
|
|
156
|
+
## References
|
|
157
|
+
|
|
158
|
+
- [SAS Macro Language Reference](https://documentation.sas.com/doc/en/mcrolref/9.4/titlepage.htm)
|
|
159
|
+
- [SAS Language Reference: Concepts](https://documentation.sas.com/doc/en/lrcon/9.4/titlepage.htm)
|
|
160
|
+
- [tree-sitter documentation](https://tree-sitter.github.io/tree-sitter/)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
bindings/python/tree_sitter_sas/__init__.py
|
|
6
|
+
bindings/python/tree_sitter_sas/__init__.pyi
|
|
7
|
+
bindings/python/tree_sitter_sas/binding.c
|
|
8
|
+
bindings/python/tree_sitter_sas/py.typed
|
|
9
|
+
bindings/python/tree_sitter_sas.egg-info/PKG-INFO
|
|
10
|
+
bindings/python/tree_sitter_sas.egg-info/SOURCES.txt
|
|
11
|
+
bindings/python/tree_sitter_sas.egg-info/dependency_links.txt
|
|
12
|
+
bindings/python/tree_sitter_sas.egg-info/not-zip-safe
|
|
13
|
+
bindings/python/tree_sitter_sas.egg-info/requires.txt
|
|
14
|
+
bindings/python/tree_sitter_sas.egg-info/top_level.txt
|
|
15
|
+
src/parser.c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=62.4.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tree-sitter-sas"
|
|
7
|
+
description = "SAS language grammar for tree-sitter"
|
|
8
|
+
version = "0.3.8"
|
|
9
|
+
keywords = ["incremental", "parsing", "tree-sitter", "tree-sitter-sas"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"Topic :: Software Development :: Compilers",
|
|
13
|
+
"Topic :: Text Processing :: Linguistic",
|
|
14
|
+
"Typing :: Typed",
|
|
15
|
+
]
|
|
16
|
+
authors = [{ name = "Brandon Garate", email = "bgarate@ix-infra.com" }]
|
|
17
|
+
requires-python = ">=3.10"
|
|
18
|
+
license.text = "MIT"
|
|
19
|
+
readme = "README.md"
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://github.com/ix-infrastructure/tree-sitter-sas"
|
|
23
|
+
|
|
24
|
+
[project.optional-dependencies]
|
|
25
|
+
core = ["tree-sitter~=0.24"]
|
|
26
|
+
|
|
27
|
+
[tool.cibuildwheel]
|
|
28
|
+
build = "cp310-*"
|
|
29
|
+
build-frontend = "build"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
(block_comment) @comment.block
|
|
2
|
+
(line_comment) @comment.line
|
|
3
|
+
|
|
4
|
+
(macro_definition) @function
|
|
5
|
+
(macro_name) @function.method
|
|
6
|
+
|
|
7
|
+
(macro_variable_assignment) @keyword
|
|
8
|
+
(macro_variable_ref) @variable
|
|
9
|
+
|
|
10
|
+
(string_literal) @string
|
|
11
|
+
|
|
12
|
+
(data_step_header) @keyword
|
|
13
|
+
(proc_step_header) @keyword
|
|
14
|
+
(run_statement) @keyword
|
|
15
|
+
(run_or_quit_statement) @keyword
|
|
16
|
+
(macro_end) @keyword
|
|
17
|
+
|
|
18
|
+
(libname_statement) @keyword
|
|
19
|
+
(include_statement) @keyword
|
|
20
|
+
(options_statement) @keyword
|