tinysexpr 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tinysexpr-1.0/LICENSE +21 -0
- tinysexpr-1.0/PKG-INFO +61 -0
- tinysexpr-1.0/README.md +44 -0
- tinysexpr-1.0/pyproject.toml +27 -0
- tinysexpr-1.0/setup.cfg +4 -0
- tinysexpr-1.0/src/tinysexpr.egg-info/PKG-INFO +61 -0
- tinysexpr-1.0/src/tinysexpr.egg-info/SOURCES.txt +8 -0
- tinysexpr-1.0/src/tinysexpr.egg-info/dependency_links.txt +1 -0
- tinysexpr-1.0/src/tinysexpr.egg-info/top_level.txt +1 -0
- tinysexpr-1.0/src/tinysexpr.py +156 -0
tinysexpr-1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Sebastian Hack
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tinysexpr-1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: tinysexpr
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: A very simple S-expression parser that tries to make as little fuss as possible.
|
|
5
|
+
Author-email: Sebastian Hack <contact@s-hack.de>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: GitHub, https://github.com/shack/tinysexpr
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
|
|
18
|
+
# S-Expression Parser
|
|
19
|
+
|
|
20
|
+
A very simple [S-expression](https://en.m.wikipedia.org/wiki/S-expression) parser that tries to make as little fuss as possible.
|
|
21
|
+
|
|
22
|
+
## Example
|
|
23
|
+
|
|
24
|
+
This module provides a single function
|
|
25
|
+
```
|
|
26
|
+
def read(file_like, delims=DEFAULT_DELIMS, comment_char=';', atom_handler=lambda x: x):
|
|
27
|
+
```
|
|
28
|
+
that returns the read S-expression.
|
|
29
|
+
Reading
|
|
30
|
+
```
|
|
31
|
+
(a b c (123 e f () x))
|
|
32
|
+
```
|
|
33
|
+
returns
|
|
34
|
+
```
|
|
35
|
+
['a', 'b', 'c', ['123', 'e', 'f', [], 'x']]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Atoms
|
|
39
|
+
|
|
40
|
+
`a`, `b`, `c` in the above example are called **atoms**.
|
|
41
|
+
Atoms are parsed using two different rules:
|
|
42
|
+
1. Every sequence that does not contain a whitespace, opening of closing parenthesis or the comment character is an atom.
|
|
43
|
+
2. Every sequence that starts and ends with a **delimiter** is an atom.
|
|
44
|
+
The default delimiters are `"` for strings with the usual escape characters (`\n`, `\t`, `\"`, `\\`, ...) and `|` without escape characters.
|
|
45
|
+
Using the `delims` parameter, you can customize the delimiters and their escape sequences.
|
|
46
|
+
|
|
47
|
+
## Details
|
|
48
|
+
|
|
49
|
+
The parameters of `read` are:
|
|
50
|
+
1. `file_like`: An object that is [file like](https://docs.python.org/3/glossary.html#term-file-object) i.e. provides a `read` method.
|
|
51
|
+
2. `delims`: A map of delimiters used to surround atoms that contain spaces.
|
|
52
|
+
Commonly these are double-quotes to represent strings as in `"Hello"` or
|
|
53
|
+
vertical bars to allow for symbols that contain spaces as in `|some symbol|`.
|
|
54
|
+
The map `DEFAULT_DELIMS` specifies delimiters for strings and symbols.
|
|
55
|
+
3. `comment_char`: The character that starts a single line comment.
|
|
56
|
+
The default value is `;`.
|
|
57
|
+
4. `atom_handler`: A function that is called when an atom is parsed.
|
|
58
|
+
This function is passed a string that consists of the text of the parsed
|
|
59
|
+
atom. The function can convert this string into something else and the
|
|
60
|
+
returned value is used to construct the S-expression.
|
|
61
|
+
For example, this allows for converting digit sequences into ints.
|
tinysexpr-1.0/README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# S-Expression Parser
|
|
2
|
+
|
|
3
|
+
A very simple [S-expression](https://en.m.wikipedia.org/wiki/S-expression) parser that tries to make as little fuss as possible.
|
|
4
|
+
|
|
5
|
+
## Example
|
|
6
|
+
|
|
7
|
+
This module provides a single function
|
|
8
|
+
```
|
|
9
|
+
def read(file_like, delims=DEFAULT_DELIMS, comment_char=';', atom_handler=lambda x: x):
|
|
10
|
+
```
|
|
11
|
+
that returns the read S-expression.
|
|
12
|
+
Reading
|
|
13
|
+
```
|
|
14
|
+
(a b c (123 e f () x))
|
|
15
|
+
```
|
|
16
|
+
returns
|
|
17
|
+
```
|
|
18
|
+
['a', 'b', 'c', ['123', 'e', 'f', [], 'x']]
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Atoms
|
|
22
|
+
|
|
23
|
+
`a`, `b`, `c` in the above example are called **atoms**.
|
|
24
|
+
Atoms are parsed using two different rules:
|
|
25
|
+
1. Every sequence that does not contain a whitespace, opening of closing parenthesis or the comment character is an atom.
|
|
26
|
+
2. Every sequence that starts and ends with a **delimiter** is an atom.
|
|
27
|
+
The default delimiters are `"` for strings with the usual escape characters (`\n`, `\t`, `\"`, `\\`, ...) and `|` without escape characters.
|
|
28
|
+
Using the `delims` parameter, you can customize the delimiters and their escape sequences.
|
|
29
|
+
|
|
30
|
+
## Details
|
|
31
|
+
|
|
32
|
+
The parameters of `read` are:
|
|
33
|
+
1. `file_like`: An object that is [file like](https://docs.python.org/3/glossary.html#term-file-object) i.e. provides a `read` method.
|
|
34
|
+
2. `delims`: A map of delimiters used to surround atoms that contain spaces.
|
|
35
|
+
Commonly these are double-quotes to represent strings as in `"Hello"` or
|
|
36
|
+
vertical bars to allow for symbols that contain spaces as in `|some symbol|`.
|
|
37
|
+
The map `DEFAULT_DELIMS` specifies delimiters for strings and symbols.
|
|
38
|
+
3. `comment_char`: The character that starts a single line comment.
|
|
39
|
+
The default value is `;`.
|
|
40
|
+
4. `atom_handler`: A function that is called when an atom is parsed.
|
|
41
|
+
This function is passed a string that consists of the text of the parsed
|
|
42
|
+
atom. The function can convert this string into something else and the
|
|
43
|
+
returned value is used to construct the S-expression.
|
|
44
|
+
For example, this allows for converting digit sequences into ints.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tinysexpr"
|
|
7
|
+
authors = [
|
|
8
|
+
{name = "Sebastian Hack", email = "contact@s-hack.de"},
|
|
9
|
+
]
|
|
10
|
+
version = "1.0"
|
|
11
|
+
description = "A very simple S-expression parser that tries to make as little fuss as possible."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = { text="MIT" }
|
|
14
|
+
requires-python = ">=3.10"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent"
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
"GitHub" = "https://github.com/shack/tinysexpr"
|
tinysexpr-1.0/setup.cfg
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: tinysexpr
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: A very simple S-expression parser that tries to make as little fuss as possible.
|
|
5
|
+
Author-email: Sebastian Hack <contact@s-hack.de>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: GitHub, https://github.com/shack/tinysexpr
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
|
|
18
|
+
# S-Expression Parser
|
|
19
|
+
|
|
20
|
+
A very simple [S-expression](https://en.m.wikipedia.org/wiki/S-expression) parser that tries to make as little fuss as possible.
|
|
21
|
+
|
|
22
|
+
## Example
|
|
23
|
+
|
|
24
|
+
This module provides a single function
|
|
25
|
+
```
|
|
26
|
+
def read(file_like, delims=DEFAULT_DELIMS, comment_char=';', atom_handler=lambda x: x):
|
|
27
|
+
```
|
|
28
|
+
that returns the read S-expression.
|
|
29
|
+
Reading
|
|
30
|
+
```
|
|
31
|
+
(a b c (123 e f () x))
|
|
32
|
+
```
|
|
33
|
+
returns
|
|
34
|
+
```
|
|
35
|
+
['a', 'b', 'c', ['123', 'e', 'f', [], 'x']]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Atoms
|
|
39
|
+
|
|
40
|
+
`a`, `b`, `c` in the above example are called **atoms**.
|
|
41
|
+
Atoms are parsed using two different rules:
|
|
42
|
+
1. Every sequence that does not contain a whitespace, opening of closing parenthesis or the comment character is an atom.
|
|
43
|
+
2. Every sequence that starts and ends with a **delimiter** is an atom.
|
|
44
|
+
The default delimiters are `"` for strings with the usual escape characters (`\n`, `\t`, `\"`, `\\`, ...) and `|` without escape characters.
|
|
45
|
+
Using the `delims` parameter, you can customize the delimiters and their escape sequences.
|
|
46
|
+
|
|
47
|
+
## Details
|
|
48
|
+
|
|
49
|
+
The parameters of `read` are:
|
|
50
|
+
1. `file_like`: An object that is [file like](https://docs.python.org/3/glossary.html#term-file-object) i.e. provides a `read` method.
|
|
51
|
+
2. `delims`: A map of delimiters used to surround atoms that contain spaces.
|
|
52
|
+
Commonly these are double-quotes to represent strings as in `"Hello"` or
|
|
53
|
+
vertical bars to allow for symbols that contain spaces as in `|some symbol|`.
|
|
54
|
+
The map `DEFAULT_DELIMS` specifies delimiters for strings and symbols.
|
|
55
|
+
3. `comment_char`: The character that starts a single line comment.
|
|
56
|
+
The default value is `;`.
|
|
57
|
+
4. `atom_handler`: A function that is called when an atom is parsed.
|
|
58
|
+
This function is passed a string that consists of the text of the parsed
|
|
59
|
+
atom. The function can convert this string into something else and the
|
|
60
|
+
returned value is used to construct the S-expression.
|
|
61
|
+
For example, this allows for converting digit sequences into ints.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tinysexpr
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
_UNEXPECTED_EOF = 'unexpected end of file'
|
|
6
|
+
|
|
7
|
+
class SyntaxError(Exception):
|
|
8
|
+
def __init__(self, msg):
|
|
9
|
+
self.value = msg
|
|
10
|
+
|
|
11
|
+
def __str__(self):
|
|
12
|
+
return self.value
|
|
13
|
+
|
|
14
|
+
DEFAULT_DELIMS = {
|
|
15
|
+
'"': ('\\', { 'n': '\n', 't': '\t', 'r': '\r', '\\': '\\', '"': '"' }),
|
|
16
|
+
'|': (None, {})
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def read(file_like, delims=DEFAULT_DELIMS, comment_char=';', atom_handler=lambda x: x):
|
|
20
|
+
"""Parse an S-expression from a file-like object.
|
|
21
|
+
|
|
22
|
+
The function takes the following arguments:
|
|
23
|
+
`file_like`: An object that is file like, i.e. provides a `read` method.
|
|
24
|
+
`delims`: A map of delimiters used to surround atoms that contain spaces.
|
|
25
|
+
Commonly these are double-quotes to represent strings as in `"Hello"` or vertical bars to allow for symbols that contain spaces as in `|some symbol|`.
|
|
26
|
+
The map maps the delimiter character to a tuple of two elements: the escape character and a map of escape sequences.
|
|
27
|
+
See `DEFAULT_DELIMS` for an example.
|
|
28
|
+
The map `DEFAULT_DELIMS` specifies delimiters for strings and symbols.
|
|
29
|
+
`comment_char`: The character that starts a single line comment.
|
|
30
|
+
The default value is `;`.
|
|
31
|
+
`atom_handler`: A function that is called when an atom is parsed.
|
|
32
|
+
This function is passed a string that consists of the text of the parsed atom. The function can convert this string into something else and the returned value is used to construct the S-expression.
|
|
33
|
+
|
|
34
|
+
The function returns the parsed S-expression as a nested list.
|
|
35
|
+
When a parse error is encountered, a `SyntaxError` is raised with a message that describes the error.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
sym_delim = { c for c in '()' + comment_char + ''.join(delims.keys()) }
|
|
39
|
+
ch = file_like.read(1)
|
|
40
|
+
row = 1
|
|
41
|
+
col = 1
|
|
42
|
+
|
|
43
|
+
def curr():
|
|
44
|
+
return ch
|
|
45
|
+
|
|
46
|
+
def next():
|
|
47
|
+
nonlocal ch, row, col
|
|
48
|
+
ch = file_like.read(1)
|
|
49
|
+
if ch == '\n':
|
|
50
|
+
row += 1
|
|
51
|
+
col = 1
|
|
52
|
+
else:
|
|
53
|
+
col += 1
|
|
54
|
+
return ch
|
|
55
|
+
|
|
56
|
+
def skip_ws():
|
|
57
|
+
while True:
|
|
58
|
+
c = curr()
|
|
59
|
+
if c.isspace():
|
|
60
|
+
next()
|
|
61
|
+
elif c == comment_char:
|
|
62
|
+
while c and c != '\n':
|
|
63
|
+
c = next()
|
|
64
|
+
else:
|
|
65
|
+
break
|
|
66
|
+
return c
|
|
67
|
+
|
|
68
|
+
def error(msg):
|
|
69
|
+
raise SyntaxError(f'{msg} at {row}:{col}')
|
|
70
|
+
|
|
71
|
+
def parse():
|
|
72
|
+
|
|
73
|
+
def read_delim(delim, delim_info):
|
|
74
|
+
escape_char, escape_map = delim_info
|
|
75
|
+
read = [curr()]
|
|
76
|
+
c = next()
|
|
77
|
+
while c:
|
|
78
|
+
match c:
|
|
79
|
+
case _ if c == escape_char:
|
|
80
|
+
c = next()
|
|
81
|
+
if c in escape_map:
|
|
82
|
+
read.append(escape_map[c])
|
|
83
|
+
else:
|
|
84
|
+
error(f"invalid escape character '{c}'")
|
|
85
|
+
case _ if c == delim:
|
|
86
|
+
read.append(c)
|
|
87
|
+
next()
|
|
88
|
+
return ''.join(read)
|
|
89
|
+
case _:
|
|
90
|
+
read.append(c)
|
|
91
|
+
c = next()
|
|
92
|
+
|
|
93
|
+
def read_atom():
|
|
94
|
+
read = []
|
|
95
|
+
c = curr()
|
|
96
|
+
while c and not c.isspace() and c not in sym_delim:
|
|
97
|
+
read.append(c)
|
|
98
|
+
c = next()
|
|
99
|
+
return ''.join(read)
|
|
100
|
+
|
|
101
|
+
exp = []
|
|
102
|
+
while True:
|
|
103
|
+
c = skip_ws()
|
|
104
|
+
if not c:
|
|
105
|
+
error(_UNEXPECTED_EOF)
|
|
106
|
+
match c:
|
|
107
|
+
case '(':
|
|
108
|
+
next()
|
|
109
|
+
s = parse()
|
|
110
|
+
exp.append(s)
|
|
111
|
+
case ')':
|
|
112
|
+
next()
|
|
113
|
+
return exp
|
|
114
|
+
case _ if c in delims:
|
|
115
|
+
exp.append(atom_handler(read_delim(c, delims[c])))
|
|
116
|
+
case _:
|
|
117
|
+
assert not c.isspace()
|
|
118
|
+
exp.append(atom_handler(read_atom()))
|
|
119
|
+
|
|
120
|
+
c = skip_ws()
|
|
121
|
+
if c == '(':
|
|
122
|
+
next()
|
|
123
|
+
else:
|
|
124
|
+
error(f"expected '(', got '{c}'")
|
|
125
|
+
return parse()
|
|
126
|
+
|
|
127
|
+
@pytest.mark.parametrize("input,expected", [
|
|
128
|
+
('()', []),
|
|
129
|
+
('(|a b c| || "abc\\"def" |abcgf xs!!|)', ['|a b c|', '||', '"abc"def"', '|abcgf xs!!|']),
|
|
130
|
+
('(abc b0!@#$% c-d)', ['abc', 'b0!@#$%', 'c-d']),
|
|
131
|
+
(u'(1😀)', ['1😀']),
|
|
132
|
+
('(a b c (d e f () |x yz|))', ['a', 'b', 'c', ['d', 'e', 'f', [], '|x yz|']]),
|
|
133
|
+
('(1 (2 3) (4 5) 6 (7 (8 9)))', ['1', ['2', '3'], ['4', '5'], '6', ['7', ['8', '9']]]),
|
|
134
|
+
('(1 (2 3) (4 5)); 6 (7 (8 9)))', ['1', ['2', '3'], ['4', '5']]),
|
|
135
|
+
])
|
|
136
|
+
def test_correct(input, expected):
|
|
137
|
+
assert read(StringIO(input)) == expected
|
|
138
|
+
|
|
139
|
+
@pytest.mark.parametrize("input,msg", [
|
|
140
|
+
('', _UNEXPECTED_EOF),
|
|
141
|
+
('abc', "expected '(', got 'a'"),
|
|
142
|
+
('(a', _UNEXPECTED_EOF),
|
|
143
|
+
('|a b c', _UNEXPECTED_EOF),
|
|
144
|
+
('"abc"cde"', _UNEXPECTED_EOF),
|
|
145
|
+
('"abc\\9cde"', "invalid escape character"),
|
|
146
|
+
('(1 (2 3) (4 5) 6 (7 (8 9))', _UNEXPECTED_EOF),
|
|
147
|
+
])
|
|
148
|
+
def test_error(input, msg):
|
|
149
|
+
with pytest.raises(SyntaxError) as e:
|
|
150
|
+
read(StringIO(input))
|
|
151
|
+
assert msg in str(e)
|
|
152
|
+
|
|
153
|
+
if __name__ == '__main__':
|
|
154
|
+
import sys
|
|
155
|
+
if len(sys.argv) > 1:
|
|
156
|
+
print(read(open(sys.argv[1], 'rt')))
|