tree-sitter-wikitext 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_wikitext-0.1.0/LICENSE.md +19 -0
- tree_sitter_wikitext-0.1.0/PKG-INFO +151 -0
- tree_sitter_wikitext-0.1.0/README.md +131 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tests/test_binding.py +12 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext/__init__.py +5 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext/__init__.pyi +1 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext/binding.c +27 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext/py.typed +0 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/PKG-INFO +151 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/SOURCES.txt +17 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/dependency_links.txt +1 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/not-zip-safe +1 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/requires.txt +3 -0
- tree_sitter_wikitext-0.1.0/bindings/python/tree_sitter_wikitext.egg-info/top_level.txt +3 -0
- tree_sitter_wikitext-0.1.0/pyproject.toml +41 -0
- tree_sitter_wikitext-0.1.0/setup.cfg +4 -0
- tree_sitter_wikitext-0.1.0/setup.py +61 -0
- tree_sitter_wikitext-0.1.0/src/parser.c +33648 -0
- tree_sitter_wikitext-0.1.0/src/scanner.c +1119 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright Santhosh Thottingal <santhosh.thottingal@gmail.com> © 2025
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the "Software"),
|
|
5
|
+
to deal in the Software without restriction, including without limitation
|
|
6
|
+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
7
|
+
and/or sell copies of the Software, and to permit persons to whom the
|
|
8
|
+
Software is furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included
|
|
11
|
+
in all copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
14
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
15
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
16
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
17
|
+
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
18
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
19
|
+
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tree-sitter-wikitext
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Wikitext grammar for tree-sitter
|
|
5
|
+
Author-email: Santhosh Thottingal <santhosh.thottingal@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/santhoshtr/tree-sitter-wikitext
|
|
8
|
+
Keywords: incremental,parsing,tree-sitter,wikitext
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
12
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE.md
|
|
17
|
+
Provides-Extra: core
|
|
18
|
+
Requires-Dist: tree-sitter~=0.25; extra == "core"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Tree-Sitter Wikitext Parser
|
|
22
|
+
|
|
23
|
+
This repository contains the implementation of a **Tree-Sitter** parser for **Wikitext**, a markup language used by MediaWiki.
|
|
24
|
+
|
|
25
|
+
Try the parse in the [playground](https://tree-sitter-wikitext.toolforge.org/)
|
|
26
|
+
|
|
27
|
+
## Overview
|
|
28
|
+
|
|
29
|
+
Tree-Sitter is a powerful parser generator tool and incremental parsing library. It is designed to build concrete syntax trees for source files and efficiently update them as the source changes. This project leverages Tree-Sitter to parse Wikitext, enabling structured analysis and manipulation of MediaWiki content.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Incremental Parsing**: Efficiently updates syntax trees as the source changes.
|
|
34
|
+
- **Language Agnostic**: Can be embedded in applications written in C, Python, Go, Rust, Node.js, and Swift.
|
|
35
|
+
- **Robust Parsing**: Handles syntax errors gracefully to provide useful results.
|
|
36
|
+
- **Custom Grammar**: Implements a grammar tailored for Wikitext.
|
|
37
|
+
|
|
38
|
+
## Repository Structure
|
|
39
|
+
|
|
40
|
+
- **`src/`**: Contains the core C implementation of the parser.
|
|
41
|
+
- **`bindings/`**: Language-specific bindings for Python, Go, Node.js, Rust, and Swift.
|
|
42
|
+
- **`grammar.js`**: Defines the grammar for Wikitext.
|
|
43
|
+
- **`queries/`**: Contains Tree-Sitter query files for extracting specific syntax patterns.
|
|
44
|
+
- **`tests/`**: Unit tests for validating the parser's functionality.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
### Prerequisites
|
|
49
|
+
|
|
50
|
+
- A C compiler (e.g., GCC or Clang)
|
|
51
|
+
- [Node.js](https://nodejs.org/) (for building the grammar)
|
|
52
|
+
- Python 3.6+ (optional, for Python bindings)
|
|
53
|
+
|
|
54
|
+
### Build Instructions
|
|
55
|
+
|
|
56
|
+
1. Clone the repository:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/santhoshtr/tree-sitter-wikitext.git
|
|
60
|
+
cd tree-sitter-wikitext
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
2. Build the parser:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
npm install
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
3. (Optional) Build language-specific bindings:
|
|
70
|
+
- **Python**: Run `python setup.py build`.
|
|
71
|
+
- **Rust**: Use `cargo build`.
|
|
72
|
+
- **Go**: Use `go build`.
|
|
73
|
+
|
|
74
|
+
## Usage
|
|
75
|
+
|
|
76
|
+
### Embedding in Applications
|
|
77
|
+
|
|
78
|
+
The parser can be embedded in applications written in various languages. For example:
|
|
79
|
+
|
|
80
|
+
- **Python**: Use the `tree-sitter` Python module to load and use the parser.
|
|
81
|
+
- **Node.js**: Import the parser as a Node.js module.
|
|
82
|
+
- **Rust**: Use the `tree-sitter` crate to integrate the parser.
|
|
83
|
+
|
|
84
|
+
### Example: Parsing Wikitext in Rust
|
|
85
|
+
|
|
86
|
+
```rust
|
|
87
|
+
use tree_sitter::{Parser, Language};
|
|
88
|
+
|
|
89
|
+
fn main() {
|
|
90
|
+
// Create a new parser
|
|
91
|
+
let mut parser = tree_sitter::Parser::new();
|
|
92
|
+
parser.set_language(&tree_sitter_wikitext::LANGUAGE.into()).expect("Error loading wikitext grammar");
|
|
93
|
+
|
|
94
|
+
// Parse a Wikitext string
|
|
95
|
+
let source_code = "== Heading ==\nThis is a paragraph.\n";
|
|
96
|
+
let tree = parser.parse(source_code, None).unwrap();
|
|
97
|
+
|
|
98
|
+
// Print the syntax tree
|
|
99
|
+
println!("{}", tree.root_node().to_sexp());
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Using with Neovim
|
|
104
|
+
|
|
105
|
+
Checkout the repo, add the following configuration to `init.lua` of your nvim installation.
|
|
106
|
+
|
|
107
|
+
```lua
|
|
108
|
+
--- Refer https://github.com/nvim-treesitter/nvim-treesitter
|
|
109
|
+
local parser_config = require("nvim-treesitter.parsers").get_parser_configs()
|
|
110
|
+
parser_config.wikitext = {
|
|
111
|
+
install_info = {
|
|
112
|
+
url = "~/path/to/tree-sitter-wikitext", -- local path or git repo
|
|
113
|
+
files = { "src/parser.c" }, -- note that some parsers also require src/scanner.c or src/scanner.cc
|
|
114
|
+
-- optional entries:
|
|
115
|
+
branch = "main", -- default branch in case of git repo if different from master
|
|
116
|
+
generate_requires_npm = false, -- if stand-alone parser without npm dependencies
|
|
117
|
+
requires_generate_from_grammar = false, -- if folder contains pre-generated src/parser.c
|
|
118
|
+
},
|
|
119
|
+
filetype = "wikitext", -- if filetype does not match the parser name
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
vim.filetype.add({
|
|
123
|
+
pattern = {
|
|
124
|
+
[".*/*.wikitext"] = "wikitext",
|
|
125
|
+
},
|
|
126
|
+
})
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Link the queries folder of `tree-sitter-wikitext` to `queries/wikitext` folder of nvim
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
cd ~/.config/nvim
|
|
133
|
+
mkdir -p queries
|
|
134
|
+
ln -s path/to/tree-sitter-wikitext/queries queries/wikitext
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Re-open nvim. Open any file with `.wikitext` extension. You should see syntax highlighting. You can also inspect the tree-sitter tree using `:InspectTree` command
|
|
138
|
+
|
|
139
|
+
To run queries against a buffer, run `:EditQuery wikitext`. A scratch buffer will be opened. Write your Tree-Sitter query there, in normal node, move cursor over the capture names. You will see the corresponding text in the buffer get highlighted.
|
|
140
|
+
|
|
141
|
+
## Contributing
|
|
142
|
+
|
|
143
|
+
Contributions are welcome! Please follow these steps:
|
|
144
|
+
|
|
145
|
+
1. Fork the repository.
|
|
146
|
+
2. Create a new branch for your feature or bug fix.
|
|
147
|
+
3. Submit a pull request with a detailed description of your changes.
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
This project is licensed under the MIT License. See the `LICENSE.md` file for details.
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Tree-Sitter Wikitext Parser
|
|
2
|
+
|
|
3
|
+
This repository contains the implementation of a **Tree-Sitter** parser for **Wikitext**, a markup language used by MediaWiki.
|
|
4
|
+
|
|
5
|
+
Try the parse in the [playground](https://tree-sitter-wikitext.toolforge.org/)
|
|
6
|
+
|
|
7
|
+
## Overview
|
|
8
|
+
|
|
9
|
+
Tree-Sitter is a powerful parser generator tool and incremental parsing library. It is designed to build concrete syntax trees for source files and efficiently update them as the source changes. This project leverages Tree-Sitter to parse Wikitext, enabling structured analysis and manipulation of MediaWiki content.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Incremental Parsing**: Efficiently updates syntax trees as the source changes.
|
|
14
|
+
- **Language Agnostic**: Can be embedded in applications written in C, Python, Go, Rust, Node.js, and Swift.
|
|
15
|
+
- **Robust Parsing**: Handles syntax errors gracefully to provide useful results.
|
|
16
|
+
- **Custom Grammar**: Implements a grammar tailored for Wikitext.
|
|
17
|
+
|
|
18
|
+
## Repository Structure
|
|
19
|
+
|
|
20
|
+
- **`src/`**: Contains the core C implementation of the parser.
|
|
21
|
+
- **`bindings/`**: Language-specific bindings for Python, Go, Node.js, Rust, and Swift.
|
|
22
|
+
- **`grammar.js`**: Defines the grammar for Wikitext.
|
|
23
|
+
- **`queries/`**: Contains Tree-Sitter query files for extracting specific syntax patterns.
|
|
24
|
+
- **`tests/`**: Unit tests for validating the parser's functionality.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
### Prerequisites
|
|
29
|
+
|
|
30
|
+
- A C compiler (e.g., GCC or Clang)
|
|
31
|
+
- [Node.js](https://nodejs.org/) (for building the grammar)
|
|
32
|
+
- Python 3.6+ (optional, for Python bindings)
|
|
33
|
+
|
|
34
|
+
### Build Instructions
|
|
35
|
+
|
|
36
|
+
1. Clone the repository:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
git clone https://github.com/santhoshtr/tree-sitter-wikitext.git
|
|
40
|
+
cd tree-sitter-wikitext
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
2. Build the parser:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
npm install
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
3. (Optional) Build language-specific bindings:
|
|
50
|
+
- **Python**: Run `python setup.py build`.
|
|
51
|
+
- **Rust**: Use `cargo build`.
|
|
52
|
+
- **Go**: Use `go build`.
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
### Embedding in Applications
|
|
57
|
+
|
|
58
|
+
The parser can be embedded in applications written in various languages. For example:
|
|
59
|
+
|
|
60
|
+
- **Python**: Use the `tree-sitter` Python module to load and use the parser.
|
|
61
|
+
- **Node.js**: Import the parser as a Node.js module.
|
|
62
|
+
- **Rust**: Use the `tree-sitter` crate to integrate the parser.
|
|
63
|
+
|
|
64
|
+
### Example: Parsing Wikitext in Rust
|
|
65
|
+
|
|
66
|
+
```rust
|
|
67
|
+
use tree_sitter::{Parser, Language};
|
|
68
|
+
|
|
69
|
+
fn main() {
|
|
70
|
+
// Create a new parser
|
|
71
|
+
let mut parser = tree_sitter::Parser::new();
|
|
72
|
+
parser.set_language(&tree_sitter_wikitext::LANGUAGE.into()).expect("Error loading wikitext grammar");
|
|
73
|
+
|
|
74
|
+
// Parse a Wikitext string
|
|
75
|
+
let source_code = "== Heading ==\nThis is a paragraph.\n";
|
|
76
|
+
let tree = parser.parse(source_code, None).unwrap();
|
|
77
|
+
|
|
78
|
+
// Print the syntax tree
|
|
79
|
+
println!("{}", tree.root_node().to_sexp());
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Using with Neovim
|
|
84
|
+
|
|
85
|
+
Checkout the repo, add the following configuration to `init.lua` of your nvim installation.
|
|
86
|
+
|
|
87
|
+
```lua
|
|
88
|
+
--- Refer https://github.com/nvim-treesitter/nvim-treesitter
|
|
89
|
+
local parser_config = require("nvim-treesitter.parsers").get_parser_configs()
|
|
90
|
+
parser_config.wikitext = {
|
|
91
|
+
install_info = {
|
|
92
|
+
url = "~/path/to/tree-sitter-wikitext", -- local path or git repo
|
|
93
|
+
files = { "src/parser.c" }, -- note that some parsers also require src/scanner.c or src/scanner.cc
|
|
94
|
+
-- optional entries:
|
|
95
|
+
branch = "main", -- default branch in case of git repo if different from master
|
|
96
|
+
generate_requires_npm = false, -- if stand-alone parser without npm dependencies
|
|
97
|
+
requires_generate_from_grammar = false, -- if folder contains pre-generated src/parser.c
|
|
98
|
+
},
|
|
99
|
+
filetype = "wikitext", -- if filetype does not match the parser name
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
vim.filetype.add({
|
|
103
|
+
pattern = {
|
|
104
|
+
[".*/*.wikitext"] = "wikitext",
|
|
105
|
+
},
|
|
106
|
+
})
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Link the queries folder of `tree-sitter-wikitext` to `queries/wikitext` folder of nvim
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
cd ~/.config/nvim
|
|
113
|
+
mkdir -p queries
|
|
114
|
+
ln -s path/to/tree-sitter-wikitext/queries queries/wikitext
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Re-open nvim. Open any file with `.wikitext` extension. You should see syntax highlighting. You can also inspect the tree-sitter tree using `:InspectTree` command
|
|
118
|
+
|
|
119
|
+
To run queries against a buffer, run `:EditQuery wikitext`. A scratch buffer will be opened. Write your Tree-Sitter query there, in normal node, move cursor over the capture names. You will see the corresponding text in the buffer get highlighted.
|
|
120
|
+
|
|
121
|
+
## Contributing
|
|
122
|
+
|
|
123
|
+
Contributions are welcome! Please follow these steps:
|
|
124
|
+
|
|
125
|
+
1. Fork the repository.
|
|
126
|
+
2. Create a new branch for your feature or bug fix.
|
|
127
|
+
3. Submit a pull request with a detailed description of your changes.
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
This project is licensed under the MIT License. See the `LICENSE.md` file for details.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from unittest import TestCase
|
|
2
|
+
|
|
3
|
+
import tree_sitter
|
|
4
|
+
import tree_sitter_wikitext
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TestLanguage(TestCase):
|
|
8
|
+
def test_can_load_grammar(self):
|
|
9
|
+
try:
|
|
10
|
+
tree_sitter.Language(tree_sitter_wikitext.language())
|
|
11
|
+
except Exception:
|
|
12
|
+
self.fail("Error loading Wikitext grammar")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
def language() -> int: ...
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#include <Python.h>
|
|
2
|
+
|
|
3
|
+
typedef struct TSLanguage TSLanguage;
|
|
4
|
+
|
|
5
|
+
TSLanguage *tree_sitter_wikitext(void);
|
|
6
|
+
|
|
7
|
+
static PyObject* _binding_language(PyObject *self, PyObject *args) {
|
|
8
|
+
return PyLong_FromVoidPtr(tree_sitter_wikitext());
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
static PyMethodDef methods[] = {
|
|
12
|
+
{"language", _binding_language, METH_NOARGS,
|
|
13
|
+
"Get the tree-sitter language for this grammar."},
|
|
14
|
+
{NULL, NULL, 0, NULL}
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
static struct PyModuleDef module = {
|
|
18
|
+
.m_base = PyModuleDef_HEAD_INIT,
|
|
19
|
+
.m_name = "_binding",
|
|
20
|
+
.m_doc = NULL,
|
|
21
|
+
.m_size = -1,
|
|
22
|
+
.m_methods = methods
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
PyMODINIT_FUNC PyInit__binding(void) {
|
|
26
|
+
return PyModule_Create(&module);
|
|
27
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tree-sitter-wikitext
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Wikitext grammar for tree-sitter
|
|
5
|
+
Author-email: Santhosh Thottingal <santhosh.thottingal@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/santhoshtr/tree-sitter-wikitext
|
|
8
|
+
Keywords: incremental,parsing,tree-sitter,wikitext
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
12
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE.md
|
|
17
|
+
Provides-Extra: core
|
|
18
|
+
Requires-Dist: tree-sitter~=0.25; extra == "core"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# Tree-Sitter Wikitext Parser
|
|
22
|
+
|
|
23
|
+
This repository contains the implementation of a **Tree-Sitter** parser for **Wikitext**, a markup language used by MediaWiki.
|
|
24
|
+
|
|
25
|
+
Try the parse in the [playground](https://tree-sitter-wikitext.toolforge.org/)
|
|
26
|
+
|
|
27
|
+
## Overview
|
|
28
|
+
|
|
29
|
+
Tree-Sitter is a powerful parser generator tool and incremental parsing library. It is designed to build concrete syntax trees for source files and efficiently update them as the source changes. This project leverages Tree-Sitter to parse Wikitext, enabling structured analysis and manipulation of MediaWiki content.
|
|
30
|
+
|
|
31
|
+
## Features
|
|
32
|
+
|
|
33
|
+
- **Incremental Parsing**: Efficiently updates syntax trees as the source changes.
|
|
34
|
+
- **Language Agnostic**: Can be embedded in applications written in C, Python, Go, Rust, Node.js, and Swift.
|
|
35
|
+
- **Robust Parsing**: Handles syntax errors gracefully to provide useful results.
|
|
36
|
+
- **Custom Grammar**: Implements a grammar tailored for Wikitext.
|
|
37
|
+
|
|
38
|
+
## Repository Structure
|
|
39
|
+
|
|
40
|
+
- **`src/`**: Contains the core C implementation of the parser.
|
|
41
|
+
- **`bindings/`**: Language-specific bindings for Python, Go, Node.js, Rust, and Swift.
|
|
42
|
+
- **`grammar.js`**: Defines the grammar for Wikitext.
|
|
43
|
+
- **`queries/`**: Contains Tree-Sitter query files for extracting specific syntax patterns.
|
|
44
|
+
- **`tests/`**: Unit tests for validating the parser's functionality.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
### Prerequisites
|
|
49
|
+
|
|
50
|
+
- A C compiler (e.g., GCC or Clang)
|
|
51
|
+
- [Node.js](https://nodejs.org/) (for building the grammar)
|
|
52
|
+
- Python 3.6+ (optional, for Python bindings)
|
|
53
|
+
|
|
54
|
+
### Build Instructions
|
|
55
|
+
|
|
56
|
+
1. Clone the repository:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git clone https://github.com/santhoshtr/tree-sitter-wikitext.git
|
|
60
|
+
cd tree-sitter-wikitext
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
2. Build the parser:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
npm install
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
3. (Optional) Build language-specific bindings:
|
|
70
|
+
- **Python**: Run `python setup.py build`.
|
|
71
|
+
- **Rust**: Use `cargo build`.
|
|
72
|
+
- **Go**: Use `go build`.
|
|
73
|
+
|
|
74
|
+
## Usage
|
|
75
|
+
|
|
76
|
+
### Embedding in Applications
|
|
77
|
+
|
|
78
|
+
The parser can be embedded in applications written in various languages. For example:
|
|
79
|
+
|
|
80
|
+
- **Python**: Use the `tree-sitter` Python module to load and use the parser.
|
|
81
|
+
- **Node.js**: Import the parser as a Node.js module.
|
|
82
|
+
- **Rust**: Use the `tree-sitter` crate to integrate the parser.
|
|
83
|
+
|
|
84
|
+
### Example: Parsing Wikitext in Rust
|
|
85
|
+
|
|
86
|
+
```rust
|
|
87
|
+
use tree_sitter::{Parser, Language};
|
|
88
|
+
|
|
89
|
+
fn main() {
|
|
90
|
+
// Create a new parser
|
|
91
|
+
let mut parser = tree_sitter::Parser::new();
|
|
92
|
+
parser.set_language(&tree_sitter_wikitext::LANGUAGE.into()).expect("Error loading wikitext grammar");
|
|
93
|
+
|
|
94
|
+
// Parse a Wikitext string
|
|
95
|
+
let source_code = "== Heading ==\nThis is a paragraph.\n";
|
|
96
|
+
let tree = parser.parse(source_code, None).unwrap();
|
|
97
|
+
|
|
98
|
+
// Print the syntax tree
|
|
99
|
+
println!("{}", tree.root_node().to_sexp());
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Using with Neovim
|
|
104
|
+
|
|
105
|
+
Checkout the repo, add the following configuration to `init.lua` of your nvim installation.
|
|
106
|
+
|
|
107
|
+
```lua
|
|
108
|
+
--- Refer https://github.com/nvim-treesitter/nvim-treesitter
|
|
109
|
+
local parser_config = require("nvim-treesitter.parsers").get_parser_configs()
|
|
110
|
+
parser_config.wikitext = {
|
|
111
|
+
install_info = {
|
|
112
|
+
url = "~/path/to/tree-sitter-wikitext", -- local path or git repo
|
|
113
|
+
files = { "src/parser.c" }, -- note that some parsers also require src/scanner.c or src/scanner.cc
|
|
114
|
+
-- optional entries:
|
|
115
|
+
branch = "main", -- default branch in case of git repo if different from master
|
|
116
|
+
generate_requires_npm = false, -- if stand-alone parser without npm dependencies
|
|
117
|
+
requires_generate_from_grammar = false, -- if folder contains pre-generated src/parser.c
|
|
118
|
+
},
|
|
119
|
+
filetype = "wikitext", -- if filetype does not match the parser name
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
vim.filetype.add({
|
|
123
|
+
pattern = {
|
|
124
|
+
[".*/*.wikitext"] = "wikitext",
|
|
125
|
+
},
|
|
126
|
+
})
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Link the queries folder of `tree-sitter-wikitext` to `queries/wikitext` folder of nvim
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
cd ~/.config/nvim
|
|
133
|
+
mkdir -p queries
|
|
134
|
+
ln -s path/to/tree-sitter-wikitext/queries queries/wikitext
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Re-open nvim. Open any file with `.wikitext` extension. You should see syntax highlighting. You can also inspect the tree-sitter tree using `:InspectTree` command
|
|
138
|
+
|
|
139
|
+
To run queries against a buffer, run `:EditQuery wikitext`. A scratch buffer will be opened. Write your Tree-Sitter query there, in normal node, move cursor over the capture names. You will see the corresponding text in the buffer get highlighted.
|
|
140
|
+
|
|
141
|
+
## Contributing
|
|
142
|
+
|
|
143
|
+
Contributions are welcome! Please follow these steps:
|
|
144
|
+
|
|
145
|
+
1. Fork the repository.
|
|
146
|
+
2. Create a new branch for your feature or bug fix.
|
|
147
|
+
3. Submit a pull request with a detailed description of your changes.
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
This project is licensed under the MIT License. See the `LICENSE.md` file for details.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE.md
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
bindings/python/tests/test_binding.py
|
|
6
|
+
bindings/python/tree_sitter_wikitext/__init__.py
|
|
7
|
+
bindings/python/tree_sitter_wikitext/__init__.pyi
|
|
8
|
+
bindings/python/tree_sitter_wikitext/binding.c
|
|
9
|
+
bindings/python/tree_sitter_wikitext/py.typed
|
|
10
|
+
bindings/python/tree_sitter_wikitext.egg-info/PKG-INFO
|
|
11
|
+
bindings/python/tree_sitter_wikitext.egg-info/SOURCES.txt
|
|
12
|
+
bindings/python/tree_sitter_wikitext.egg-info/dependency_links.txt
|
|
13
|
+
bindings/python/tree_sitter_wikitext.egg-info/not-zip-safe
|
|
14
|
+
bindings/python/tree_sitter_wikitext.egg-info/requires.txt
|
|
15
|
+
bindings/python/tree_sitter_wikitext.egg-info/top_level.txt
|
|
16
|
+
src/parser.c
|
|
17
|
+
src/scanner.c
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tree-sitter-wikitext"
|
|
7
|
+
description = "Wikitext grammar for tree-sitter"
|
|
8
|
+
version = "0.1.0"
|
|
9
|
+
keywords = ["incremental", "parsing", "tree-sitter", "wikitext"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Topic :: Software Development :: Compilers",
|
|
14
|
+
"Topic :: Text Processing :: Linguistic",
|
|
15
|
+
"Typing :: Typed",
|
|
16
|
+
]
|
|
17
|
+
requires-python = ">=3.10"
|
|
18
|
+
license.text = "MIT"
|
|
19
|
+
readme = "README.md"
|
|
20
|
+
authors = [
|
|
21
|
+
{ name = "Santhosh Thottingal", email = "santhosh.thottingal@gmail.com" },
|
|
22
|
+
]
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/santhoshtr/tree-sitter-wikitext"
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
core = ["tree-sitter~=0.25"]
|
|
28
|
+
|
|
29
|
+
[tool.cibuildwheel]
|
|
30
|
+
build = "cp310-*"
|
|
31
|
+
build-frontend = "build"
|
|
32
|
+
[tool.setuptools]
|
|
33
|
+
package-dir = {"" = "bindings/python"}
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["bindings/python"]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.package-data]
|
|
39
|
+
tree_sitter_wikitext = ["*.pyi", "py.typed"]
|
|
40
|
+
"tree_sitter_wikitext.queries" = ["*.scm"]
|
|
41
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
from os.path import isdir, join
|
|
2
|
+
from platform import system
|
|
3
|
+
|
|
4
|
+
from setuptools import Extension, find_packages, setup
|
|
5
|
+
from setuptools.command.build import build
|
|
6
|
+
from wheel.bdist_wheel import bdist_wheel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Build(build):
|
|
10
|
+
def run(self):
|
|
11
|
+
if isdir("queries"):
|
|
12
|
+
dest = join(self.build_lib, "tree_sitter_wikitext", "queries")
|
|
13
|
+
self.copy_tree("queries", dest)
|
|
14
|
+
super().run()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BdistWheel(bdist_wheel):
|
|
18
|
+
def get_tag(self):
|
|
19
|
+
python, abi, platform = super().get_tag()
|
|
20
|
+
if python.startswith("cp"):
|
|
21
|
+
python, abi = "cp39", "abi3"
|
|
22
|
+
return python, abi, platform
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
setup(
|
|
26
|
+
packages=find_packages("bindings/python"),
|
|
27
|
+
package_dir={"": "bindings/python"},
|
|
28
|
+
package_data={
|
|
29
|
+
"tree_sitter_wikitext": ["*.pyi", "py.typed"],
|
|
30
|
+
"tree_sitter_wikitext.queries": ["*.scm"],
|
|
31
|
+
},
|
|
32
|
+
ext_package="tree_sitter_wikitext",
|
|
33
|
+
ext_modules=[
|
|
34
|
+
Extension(
|
|
35
|
+
name="_binding",
|
|
36
|
+
sources=[
|
|
37
|
+
"bindings/python/tree_sitter_wikitext/binding.c",
|
|
38
|
+
"src/parser.c",
|
|
39
|
+
# NOTE: if your language uses an external scanner, add it here.
|
|
40
|
+
],
|
|
41
|
+
extra_compile_args=(
|
|
42
|
+
[
|
|
43
|
+
"-std=c11",
|
|
44
|
+
]
|
|
45
|
+
if system() != "Windows"
|
|
46
|
+
else [
|
|
47
|
+
"/std:c11",
|
|
48
|
+
"/utf-8",
|
|
49
|
+
]
|
|
50
|
+
),
|
|
51
|
+
define_macros=[
|
|
52
|
+
("Py_LIMITED_API", "0x03090000"),
|
|
53
|
+
("PY_SSIZE_T_CLEAN", None),
|
|
54
|
+
],
|
|
55
|
+
include_dirs=["src"],
|
|
56
|
+
py_limited_api=True,
|
|
57
|
+
)
|
|
58
|
+
],
|
|
59
|
+
cmdclass={"build": Build, "bdist_wheel": BdistWheel},
|
|
60
|
+
zip_safe=False,
|
|
61
|
+
)
|