techscript 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- techscript/__init__.py +2 -0
- techscript/__main__.py +5 -0
- techscript/ast_nodes.py +239 -0
- techscript/builtins.py +298 -0
- techscript/cli.py +190 -0
- techscript/environment.py +75 -0
- techscript/errors.py +153 -0
- techscript/interpreter.py +674 -0
- techscript/lexer.py +336 -0
- techscript/parser.py +637 -0
- techscript/repl.py +86 -0
- techscript/tokens.py +132 -0
- techscript/transpiler.py +290 -0
- techscript/web.py +143 -0
- techscript-1.0.3.dist-info/METADATA +510 -0
- techscript-1.0.3.dist-info/RECORD +20 -0
- techscript-1.0.3.dist-info/WHEEL +5 -0
- techscript-1.0.3.dist-info/entry_points.txt +2 -0
- techscript-1.0.3.dist-info/licenses/LICENSE +21 -0
- techscript-1.0.3.dist-info/top_level.txt +1 -0
techscript/lexer.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""TechScript Lexer — converts raw source text into a stream of tokens.
|
|
2
|
+
|
|
3
|
+
Handles:
|
|
4
|
+
* number literals (int, float, hex, binary, octal, scientific, underscore sep)
|
|
5
|
+
* string literals (single/double/triple-quoted, escape sequences)
|
|
6
|
+
* f-strings with ``{expr}`` interpolation markers
|
|
7
|
+
* identifiers and reserved keywords
|
|
8
|
+
* all operators and delimiters defined in ``tokens.py``
|
|
9
|
+
* Python-style INDENT / DEDENT tracking
|
|
10
|
+
* single-line ``#`` and block ``#[ … ]#`` comments
|
|
11
|
+
* tab rejection with a friendly error
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from techscript.tokens import Token, TokenType, KEYWORDS
|
|
17
|
+
from techscript.errors import LexerError
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Lexer:
|
|
21
|
+
"""Tokenise a TechScript source string."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, source: str, filename: str = "<stdin>") -> None:
|
|
24
|
+
self.source = source
|
|
25
|
+
self.filename = filename
|
|
26
|
+
self.pos = 0
|
|
27
|
+
self.line = 1
|
|
28
|
+
self.column = 1
|
|
29
|
+
self.tokens: list[Token] = []
|
|
30
|
+
|
|
31
|
+
# ------------------------------------------------------------------
|
|
32
|
+
# Helpers
|
|
33
|
+
# ------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
def _peek(self, offset: int = 0) -> str | None:
|
|
36
|
+
idx = self.pos + offset
|
|
37
|
+
return self.source[idx] if idx < len(self.source) else None
|
|
38
|
+
|
|
39
|
+
def _advance(self) -> str:
|
|
40
|
+
ch = self.source[self.pos]
|
|
41
|
+
self.pos += 1
|
|
42
|
+
if ch == "\n":
|
|
43
|
+
self.line += 1
|
|
44
|
+
self.column = 1
|
|
45
|
+
else:
|
|
46
|
+
self.column += 1
|
|
47
|
+
return ch
|
|
48
|
+
|
|
49
|
+
def _match(self, expected: str) -> bool:
|
|
50
|
+
if self.pos < len(self.source) and self.source[self.pos] == expected:
|
|
51
|
+
self._advance()
|
|
52
|
+
return True
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
def _add(self, tt: TokenType, value: str, *, line: int | None = None, col: int | None = None) -> None:
|
|
56
|
+
self.tokens.append(Token(tt, value, line or self.line, col or self.column))
|
|
57
|
+
|
|
58
|
+
def _error(self, msg: str) -> LexerError:
|
|
59
|
+
return LexerError(msg, line=self.line, column=self.column)
|
|
60
|
+
|
|
61
|
+
# ------------------------------------------------------------------
|
|
62
|
+
# Public API
|
|
63
|
+
# ------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
def tokenize(self) -> list[Token]:
|
|
66
|
+
"""Run the tokeniser and return the full token list."""
|
|
67
|
+
while self.pos < len(self.source):
|
|
68
|
+
ch = self._peek()
|
|
69
|
+
|
|
70
|
+
# Skip whitespace
|
|
71
|
+
if ch in (" ", "\r", "\t"):
|
|
72
|
+
self._advance()
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
# Newline
|
|
76
|
+
if ch == "\n":
|
|
77
|
+
self._add(TokenType.NEWLINE, "\\n")
|
|
78
|
+
self._advance()
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Comments
|
|
82
|
+
if ch == "#":
|
|
83
|
+
self._skip_comment()
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
# Numbers
|
|
87
|
+
if ch is not None and ch.isdigit():
|
|
88
|
+
self._read_number()
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
# Strings
|
|
92
|
+
if ch in ('"', "'"):
|
|
93
|
+
self._read_string(ch)
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# f-strings
|
|
97
|
+
if ch == "f" and self._peek(1) in ('"', "'"):
|
|
98
|
+
self._advance() # skip 'f'
|
|
99
|
+
self._read_fstring(self._peek()) # type: ignore[arg-type]
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# r-strings (raw)
|
|
103
|
+
if ch == "r" and self._peek(1) in ('"', "'"):
|
|
104
|
+
self._advance()
|
|
105
|
+
self._read_string(self._peek(), raw=True) # type: ignore[arg-type]
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
# Identifiers / keywords
|
|
109
|
+
if ch is not None and (ch.isalpha() or ch == "_"):
|
|
110
|
+
self._read_identifier()
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Operators & delimiters
|
|
114
|
+
self._read_symbol()
|
|
115
|
+
|
|
116
|
+
self._add(TokenType.EOF, "")
|
|
117
|
+
return self.tokens
|
|
118
|
+
|
|
119
|
+
# ------------------------------------------------------------------
|
|
120
|
+
# Comments
|
|
121
|
+
# ------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
def _skip_comment(self) -> None:
|
|
124
|
+
self._advance() # skip '#'
|
|
125
|
+
# Block comment #[ … ]#
|
|
126
|
+
if self._peek() == "[":
|
|
127
|
+
self._advance() # skip '['
|
|
128
|
+
while self.pos < len(self.source):
|
|
129
|
+
if self.source[self.pos] == "]" and self._peek(1) == "#":
|
|
130
|
+
self._advance() # ]
|
|
131
|
+
self._advance() # #
|
|
132
|
+
return
|
|
133
|
+
self._advance()
|
|
134
|
+
raise self._error("Unterminated block comment (missing ]#)")
|
|
135
|
+
# Single-line comment
|
|
136
|
+
while self.pos < len(self.source) and self.source[self.pos] != "\n":
|
|
137
|
+
self._advance()
|
|
138
|
+
|
|
139
|
+
# ------------------------------------------------------------------
|
|
140
|
+
# Numbers
|
|
141
|
+
# ------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
def _read_number(self) -> None:
|
|
144
|
+
start_col = self.column
|
|
145
|
+
num = ""
|
|
146
|
+
|
|
147
|
+
# Hex / binary / octal prefixes
|
|
148
|
+
if self._peek() == "0" and self._peek(1) in ("x", "X", "b", "B", "o", "O"):
|
|
149
|
+
num += self._advance() # '0'
|
|
150
|
+
num += self._advance() # prefix letter
|
|
151
|
+
while self.pos < len(self.source) and (self.source[self.pos].isalnum() or self.source[self.pos] == "_"):
|
|
152
|
+
ch = self._advance()
|
|
153
|
+
if ch != "_":
|
|
154
|
+
num += ch
|
|
155
|
+
self._add(TokenType.NUMBER_INT, num, col=start_col)
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
is_float = False
|
|
159
|
+
while self.pos < len(self.source) and (self.source[self.pos].isdigit() or self.source[self.pos] == "_"):
|
|
160
|
+
ch = self._advance()
|
|
161
|
+
if ch != "_":
|
|
162
|
+
num += ch
|
|
163
|
+
|
|
164
|
+
# Decimal point (not range ..)
|
|
165
|
+
if (
|
|
166
|
+
self.pos < len(self.source)
|
|
167
|
+
and self.source[self.pos] == "."
|
|
168
|
+
and self._peek(1) is not None
|
|
169
|
+
and self._peek(1) not in (".",) # avoid eating ..
|
|
170
|
+
and (self._peek(1).isdigit() if self._peek(1) else False)
|
|
171
|
+
):
|
|
172
|
+
is_float = True
|
|
173
|
+
num += self._advance() # '.'
|
|
174
|
+
while self.pos < len(self.source) and (self.source[self.pos].isdigit() or self.source[self.pos] == "_"):
|
|
175
|
+
ch = self._advance()
|
|
176
|
+
if ch != "_":
|
|
177
|
+
num += ch
|
|
178
|
+
|
|
179
|
+
# Scientific notation
|
|
180
|
+
if self.pos < len(self.source) and self.source[self.pos] in ("e", "E"):
|
|
181
|
+
is_float = True
|
|
182
|
+
num += self._advance()
|
|
183
|
+
if self.pos < len(self.source) and self.source[self.pos] in ("+", "-"):
|
|
184
|
+
num += self._advance()
|
|
185
|
+
while self.pos < len(self.source) and self.source[self.pos].isdigit():
|
|
186
|
+
num += self._advance()
|
|
187
|
+
|
|
188
|
+
tt = TokenType.NUMBER_FLOAT if is_float else TokenType.NUMBER_INT
|
|
189
|
+
self._add(tt, num, col=start_col)
|
|
190
|
+
|
|
191
|
+
# ------------------------------------------------------------------
|
|
192
|
+
# Strings
|
|
193
|
+
# ------------------------------------------------------------------
|
|
194
|
+
|
|
195
|
+
def _read_string(self, quote: str, *, raw: bool = False) -> None:
|
|
196
|
+
start_col = self.column
|
|
197
|
+
self._advance() # opening quote
|
|
198
|
+
result = ""
|
|
199
|
+
|
|
200
|
+
# Triple-quoted
|
|
201
|
+
if self._peek() == quote and self._peek(1) == quote:
|
|
202
|
+
self._advance()
|
|
203
|
+
self._advance()
|
|
204
|
+
while self.pos < len(self.source):
|
|
205
|
+
if self.source[self.pos] == quote and self._peek(1) == quote and self._peek(2) == quote:
|
|
206
|
+
self._advance(); self._advance(); self._advance()
|
|
207
|
+
self._add(TokenType.STRING, result, col=start_col)
|
|
208
|
+
return
|
|
209
|
+
if not raw and self.source[self.pos] == "\\":
|
|
210
|
+
result += self._read_escape()
|
|
211
|
+
else:
|
|
212
|
+
result += self._advance()
|
|
213
|
+
raise self._error("Unterminated triple-quoted string")
|
|
214
|
+
|
|
215
|
+
# Single-line string
|
|
216
|
+
while self.pos < len(self.source) and self.source[self.pos] != quote:
|
|
217
|
+
if self.source[self.pos] == "\n":
|
|
218
|
+
raise self._error("Unterminated string (use triple quotes for multi-line)")
|
|
219
|
+
if not raw and self.source[self.pos] == "\\":
|
|
220
|
+
result += self._read_escape()
|
|
221
|
+
else:
|
|
222
|
+
result += self._advance()
|
|
223
|
+
|
|
224
|
+
if self.pos >= len(self.source):
|
|
225
|
+
raise self._error("Unterminated string")
|
|
226
|
+
|
|
227
|
+
self._advance() # closing quote
|
|
228
|
+
self._add(TokenType.STRING, result, col=start_col)
|
|
229
|
+
|
|
230
|
+
def _read_fstring(self, quote: str) -> None:
|
|
231
|
+
start_col = self.column
|
|
232
|
+
self._advance() # opening quote
|
|
233
|
+
result = ""
|
|
234
|
+
while self.pos < len(self.source) and self.source[self.pos] != quote:
|
|
235
|
+
if self.source[self.pos] == "\n":
|
|
236
|
+
raise self._error("Unterminated f-string")
|
|
237
|
+
result += self._advance()
|
|
238
|
+
if self.pos >= len(self.source):
|
|
239
|
+
raise self._error("Unterminated f-string")
|
|
240
|
+
self._advance() # closing quote
|
|
241
|
+
self._add(TokenType.FSTRING, result, col=start_col)
|
|
242
|
+
|
|
243
|
+
def _read_escape(self) -> str:
|
|
244
|
+
self._advance() # backslash
|
|
245
|
+
if self.pos >= len(self.source):
|
|
246
|
+
return "\\"
|
|
247
|
+
ch = self._advance()
|
|
248
|
+
return {"n": "\n", "t": "\t", "r": "\r", "\\": "\\", "'": "'", '"': '"', "0": "\0"}.get(ch, "\\" + ch)
|
|
249
|
+
|
|
250
|
+
# ------------------------------------------------------------------
|
|
251
|
+
# Identifiers / keywords
|
|
252
|
+
# ------------------------------------------------------------------
|
|
253
|
+
|
|
254
|
+
def _read_identifier(self) -> None:
|
|
255
|
+
start_col = self.column
|
|
256
|
+
name = ""
|
|
257
|
+
while self.pos < len(self.source) and (self.source[self.pos].isalnum() or self.source[self.pos] == "_"):
|
|
258
|
+
name += self._advance()
|
|
259
|
+
|
|
260
|
+
if name == "true":
|
|
261
|
+
self._add(TokenType.BOOL_TRUE, name, col=start_col)
|
|
262
|
+
elif name == "false":
|
|
263
|
+
self._add(TokenType.BOOL_FALSE, name, col=start_col)
|
|
264
|
+
elif name == "none":
|
|
265
|
+
self._add(TokenType.NONE, name, col=start_col)
|
|
266
|
+
elif name in KEYWORDS:
|
|
267
|
+
self._add(TokenType.KEYWORD, name, col=start_col)
|
|
268
|
+
else:
|
|
269
|
+
self._add(TokenType.IDENTIFIER, name, col=start_col)
|
|
270
|
+
|
|
271
|
+
# ------------------------------------------------------------------
|
|
272
|
+
# Operators / delimiters
|
|
273
|
+
# ------------------------------------------------------------------
|
|
274
|
+
|
|
275
|
+
def _read_symbol(self) -> None:
|
|
276
|
+
start_col = self.column
|
|
277
|
+
ch = self._advance()
|
|
278
|
+
|
|
279
|
+
# --- multi-char operators ---
|
|
280
|
+
|
|
281
|
+
# ** and *=
|
|
282
|
+
if ch == "*":
|
|
283
|
+
if self._match("*"):
|
|
284
|
+
self._add(TokenType.POWER, "**", col=start_col); return
|
|
285
|
+
if self._match("="):
|
|
286
|
+
self._add(TokenType.STAR_ASSIGN, "*=", col=start_col); return
|
|
287
|
+
self._add(TokenType.STAR, "*", col=start_col); return
|
|
288
|
+
|
|
289
|
+
# .. ..= ...
|
|
290
|
+
if ch == ".":
|
|
291
|
+
if self._peek() == ".":
|
|
292
|
+
self._advance()
|
|
293
|
+
if self._match("."):
|
|
294
|
+
self._add(TokenType.SPREAD, "...", col=start_col); return
|
|
295
|
+
if self._match("="):
|
|
296
|
+
self._add(TokenType.DOTDOT_EQUAL, "..=", col=start_col); return
|
|
297
|
+
self._add(TokenType.DOTDOT, "..", col=start_col); return
|
|
298
|
+
self._add(TokenType.DOT, ".", col=start_col); return
|
|
299
|
+
|
|
300
|
+
# Two-char lookup tables
|
|
301
|
+
_two = {
|
|
302
|
+
"=": {"=": TokenType.EQUAL, ">": TokenType.ARROW},
|
|
303
|
+
"!": {"=": TokenType.NOT_EQUAL},
|
|
304
|
+
"<": {"=": TokenType.LESS_EQUAL},
|
|
305
|
+
">": {"=": TokenType.GREATER_EQUAL},
|
|
306
|
+
"+": {"=": TokenType.PLUS_ASSIGN},
|
|
307
|
+
"-": {"=": TokenType.MINUS_ASSIGN},
|
|
308
|
+
"/": {"/": TokenType.DOUBLE_SLASH, "=": TokenType.SLASH_ASSIGN},
|
|
309
|
+
"|": {">": TokenType.PIPE},
|
|
310
|
+
"?": {".": TokenType.OPTIONAL_CHAIN, "?": TokenType.NULLISH},
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
if ch in _two and self.pos < len(self.source):
|
|
314
|
+
nxt = self.source[self.pos]
|
|
315
|
+
if nxt in _two[ch]:
|
|
316
|
+
self._advance()
|
|
317
|
+
self._add(_two[ch][nxt], ch + nxt, col=start_col)
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
_single = {
|
|
321
|
+
"+": TokenType.PLUS, "-": TokenType.MINUS,
|
|
322
|
+
"/": TokenType.SLASH, "%": TokenType.PERCENT,
|
|
323
|
+
"(": TokenType.LPAREN, ")": TokenType.RPAREN,
|
|
324
|
+
"[": TokenType.LBRACKET, "]": TokenType.RBRACKET,
|
|
325
|
+
"{": TokenType.LBRACE, "}": TokenType.RBRACE,
|
|
326
|
+
",": TokenType.COMMA, ":": TokenType.COLON,
|
|
327
|
+
"@": TokenType.AT, "?": TokenType.QUESTION,
|
|
328
|
+
"=": TokenType.ASSIGN, "<": TokenType.LESS,
|
|
329
|
+
">": TokenType.GREATER,
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if ch in _single:
|
|
333
|
+
self._add(_single[ch], ch, col=start_col)
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
raise self._error(f"Unexpected character: '{ch}'")
|