vex-ast 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vex_ast/__init__.py +65 -0
- vex_ast/ast/__init__.py +75 -0
- vex_ast/ast/core.py +71 -0
- vex_ast/ast/expressions.py +233 -0
- vex_ast/ast/interfaces.py +192 -0
- vex_ast/ast/literals.py +80 -0
- vex_ast/ast/navigator.py +213 -0
- vex_ast/ast/operators.py +136 -0
- vex_ast/ast/statements.py +351 -0
- vex_ast/ast/validators.py +114 -0
- vex_ast/ast/vex_nodes.py +241 -0
- vex_ast/parser/__init__.py +0 -0
- vex_ast/parser/factory.py +179 -0
- vex_ast/parser/interfaces.py +35 -0
- vex_ast/parser/python_parser.py +725 -0
- vex_ast/parser/strategies.py +0 -0
- vex_ast/registry/__init__.py +51 -0
- vex_ast/registry/api.py +155 -0
- vex_ast/registry/categories.py +136 -0
- vex_ast/registry/language_map.py +78 -0
- vex_ast/registry/registry.py +153 -0
- vex_ast/registry/signature.py +143 -0
- vex_ast/registry/simulation_behavior.py +9 -0
- vex_ast/registry/validation.py +44 -0
- vex_ast/serialization/__init__.py +37 -0
- vex_ast/serialization/json_deserializer.py +264 -0
- vex_ast/serialization/json_serializer.py +148 -0
- vex_ast/serialization/schema.py +471 -0
- vex_ast/utils/__init__.py +0 -0
- vex_ast/utils/errors.py +112 -0
- vex_ast/utils/source_location.py +39 -0
- vex_ast/utils/type_definitions.py +0 -0
- vex_ast/visitors/__init__.py +0 -0
- vex_ast/visitors/analyzer.py +103 -0
- vex_ast/visitors/base.py +130 -0
- vex_ast/visitors/printer.py +145 -0
- vex_ast/visitors/transformer.py +0 -0
- vex_ast-0.1.0.dist-info/METADATA +176 -0
- vex_ast-0.1.0.dist-info/RECORD +41 -0
- vex_ast-0.1.0.dist-info/WHEEL +5 -0
- vex_ast-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,725 @@
|
|
1
|
+
"""Python code parser implementation."""
|
2
|
+
|
3
|
+
import ast
|
4
|
+
import textwrap
|
5
|
+
from typing import Any, Dict, List, Optional, Type, Union, cast
|
6
|
+
|
7
|
+
from .interfaces import BaseParser
|
8
|
+
from .factory import NodeFactory
|
9
|
+
|
10
|
+
from ..ast.core import Expression, Program, Statement
|
11
|
+
from ..ast.expressions import (
|
12
|
+
AttributeAccess, BinaryOperation, FunctionCall, Identifier, KeywordArgument,
|
13
|
+
UnaryOperation, VariableReference
|
14
|
+
)
|
15
|
+
|
16
|
+
from ..ast.interfaces import IExpression, IStatement
|
17
|
+
from ..ast.literals import (
|
18
|
+
BooleanLiteral, NoneLiteral, NumberLiteral, StringLiteral
|
19
|
+
)
|
20
|
+
|
21
|
+
from ..ast.operators import Operator, PYTHON_BINARY_OP_MAP, PYTHON_UNARY_OP_MAP, PYTHON_COMP_OP_MAP
|
22
|
+
from ..ast.statements import (
|
23
|
+
Argument, Assignment, BreakStatement, ContinueStatement, ExpressionStatement,
|
24
|
+
ForLoop, FunctionDefinition, IfStatement, ReturnStatement, WhileLoop
|
25
|
+
)
|
26
|
+
from ..ast.vex_nodes import create_vex_api_call, VexAPICall
|
27
|
+
from ..registry.registry import registry
|
28
|
+
|
29
|
+
from ..utils.errors import ErrorHandler, ErrorType, VexSyntaxError
|
30
|
+
from ..utils.source_location import SourceLocation
|
31
|
+
|
32
|
+
class PythonParser(BaseParser):
|
33
|
+
"""Parser for Python code using the built-in ast module (Python 3.8+)."""
|
34
|
+
|
35
|
+
def __init__(self, source: str, filename: str = "<string>",
|
36
|
+
error_handler: Optional[ErrorHandler] = None):
|
37
|
+
super().__init__(error_handler)
|
38
|
+
self.source = source
|
39
|
+
self.filename = filename
|
40
|
+
self.factory = NodeFactory(error_handler)
|
41
|
+
self._py_ast: Optional[ast.Module] = None
|
42
|
+
|
43
|
+
def _get_location(self, node: ast.AST) -> Optional[SourceLocation]:
|
44
|
+
"""Extract source location from a Python AST node."""
|
45
|
+
if hasattr(node, 'lineno'):
|
46
|
+
loc = SourceLocation(
|
47
|
+
line=node.lineno,
|
48
|
+
column=node.col_offset + 1, # ast is 0-indexed
|
49
|
+
filename=self.filename
|
50
|
+
)
|
51
|
+
|
52
|
+
# Add end position if available (Python 3.8+)
|
53
|
+
if hasattr(node, 'end_lineno') and node.end_lineno is not None and \
|
54
|
+
hasattr(node, 'end_col_offset') and node.end_col_offset is not None:
|
55
|
+
loc.end_line = node.end_lineno
|
56
|
+
loc.end_column = node.end_col_offset + 1
|
57
|
+
|
58
|
+
return loc
|
59
|
+
return None
|
60
|
+
|
61
|
+
def _convert_expression(self, node: ast.expr) -> Expression:
|
62
|
+
"""Convert a Python expression node to a VEX AST expression."""
|
63
|
+
# Handle literal values - using modern Constant node (Python 3.8+)
|
64
|
+
if isinstance(node, ast.Constant):
|
65
|
+
value = node.value
|
66
|
+
loc = self._get_location(node)
|
67
|
+
|
68
|
+
if isinstance(value, (int, float)):
|
69
|
+
return self.factory.create_number_literal(value, loc)
|
70
|
+
elif isinstance(value, str):
|
71
|
+
return self.factory.create_string_literal(value, loc)
|
72
|
+
elif isinstance(value, bool):
|
73
|
+
return self.factory.create_boolean_literal(value, loc)
|
74
|
+
elif value is None:
|
75
|
+
return self.factory.create_none_literal(loc)
|
76
|
+
else:
|
77
|
+
self.error_handler.add_error(
|
78
|
+
ErrorType.PARSER_ERROR,
|
79
|
+
f"Unsupported constant type: {type(value).__name__}",
|
80
|
+
loc
|
81
|
+
)
|
82
|
+
# Fallback - treat as string
|
83
|
+
return self.factory.create_string_literal(str(value), loc)
|
84
|
+
|
85
|
+
# Variables
|
86
|
+
elif isinstance(node, ast.Name):
|
87
|
+
loc = self._get_location(node)
|
88
|
+
ident = self.factory.create_identifier(node.id, loc)
|
89
|
+
# In a load context, create a variable reference
|
90
|
+
if isinstance(node.ctx, ast.Load):
|
91
|
+
return self.factory.create_variable_reference(ident, loc)
|
92
|
+
# For store and del contexts, just return the identifier
|
93
|
+
# These will be handled by parent nodes (e.g., Assignment)
|
94
|
+
return ident
|
95
|
+
|
96
|
+
# Attribute access (e.g., left_motor.set_velocity)
|
97
|
+
elif isinstance(node, ast.Attribute):
|
98
|
+
value = self._convert_expression(node.value)
|
99
|
+
loc = self._get_location(node)
|
100
|
+
|
101
|
+
# Create a proper AttributeAccess node
|
102
|
+
return self.factory.create_attribute_access(value, node.attr, loc)
|
103
|
+
|
104
|
+
# Binary operations
|
105
|
+
elif isinstance(node, ast.BinOp):
|
106
|
+
left = self._convert_expression(node.left)
|
107
|
+
right = self._convert_expression(node.right)
|
108
|
+
loc = self._get_location(node)
|
109
|
+
|
110
|
+
# Map Python operator to VEX operator
|
111
|
+
op_type = type(node.op)
|
112
|
+
op_name = op_type.__name__
|
113
|
+
|
114
|
+
op_map = {
|
115
|
+
'Add': '+', 'Sub': '-', 'Mult': '*', 'Div': '/',
|
116
|
+
'FloorDiv': '//', 'Mod': '%', 'Pow': '**',
|
117
|
+
'LShift': '<<', 'RShift': '>>',
|
118
|
+
'BitOr': '|', 'BitXor': '^', 'BitAnd': '&',
|
119
|
+
'MatMult': '@'
|
120
|
+
}
|
121
|
+
|
122
|
+
if op_name in op_map:
|
123
|
+
op_str = op_map[op_name]
|
124
|
+
op = PYTHON_BINARY_OP_MAP.get(op_str)
|
125
|
+
if op:
|
126
|
+
return self.factory.create_binary_operation(left, op, right, loc)
|
127
|
+
|
128
|
+
# Fallback for unknown operators
|
129
|
+
self.error_handler.add_error(
|
130
|
+
ErrorType.PARSER_ERROR,
|
131
|
+
f"Unsupported binary operator: {op_name}",
|
132
|
+
loc
|
133
|
+
)
|
134
|
+
# Create a basic operation with the operator as a string
|
135
|
+
return self.factory.create_binary_operation(
|
136
|
+
left, Operator.ADD, right, loc
|
137
|
+
)
|
138
|
+
|
139
|
+
# Unary operations
|
140
|
+
elif isinstance(node, ast.UnaryOp):
|
141
|
+
operand = self._convert_expression(node.operand)
|
142
|
+
loc = self._get_location(node)
|
143
|
+
|
144
|
+
# Map Python unary operator to VEX operator
|
145
|
+
op_type = type(node.op)
|
146
|
+
op_name = op_type.__name__
|
147
|
+
|
148
|
+
op_map = {
|
149
|
+
'UAdd': '+', 'USub': '-', 'Not': 'not', 'Invert': '~'
|
150
|
+
}
|
151
|
+
|
152
|
+
if op_name in op_map:
|
153
|
+
op_str = op_map[op_name]
|
154
|
+
op = PYTHON_UNARY_OP_MAP.get(op_str)
|
155
|
+
if op:
|
156
|
+
return self.factory.create_unary_operation(op, operand, loc)
|
157
|
+
|
158
|
+
# Fallback for unknown operators
|
159
|
+
self.error_handler.add_error(
|
160
|
+
ErrorType.PARSER_ERROR,
|
161
|
+
f"Unsupported unary operator: {op_name}",
|
162
|
+
loc
|
163
|
+
)
|
164
|
+
# Create a basic operation with a default operator
|
165
|
+
return self.factory.create_unary_operation(
|
166
|
+
Operator.UNARY_PLUS, operand, loc
|
167
|
+
)
|
168
|
+
|
169
|
+
# Function calls
|
170
|
+
elif isinstance(node, ast.Call):
|
171
|
+
func = self._convert_expression(node.func)
|
172
|
+
args = [self._convert_expression(arg) for arg in node.args]
|
173
|
+
keywords = []
|
174
|
+
loc = self._get_location(node)
|
175
|
+
|
176
|
+
for kw in node.keywords:
|
177
|
+
if kw.arg is None: # **kwargs
|
178
|
+
self.error_handler.add_error(
|
179
|
+
ErrorType.PARSER_ERROR,
|
180
|
+
"Keyword argument unpacking (**kwargs) is not supported",
|
181
|
+
self._get_location(kw)
|
182
|
+
)
|
183
|
+
continue
|
184
|
+
|
185
|
+
value = self._convert_expression(kw.value)
|
186
|
+
keyword = self.factory.create_keyword_argument(
|
187
|
+
kw.arg, value, self._get_location(kw)
|
188
|
+
)
|
189
|
+
keywords.append(keyword)
|
190
|
+
|
191
|
+
# Check if this is a VEX API call
|
192
|
+
function_name = None
|
193
|
+
if hasattr(func, 'name'):
|
194
|
+
function_name = func.name
|
195
|
+
elif hasattr(func, 'attribute') and hasattr(func, 'object'):
|
196
|
+
obj = func.object
|
197
|
+
attr = func.attribute
|
198
|
+
if hasattr(obj, 'name'):
|
199
|
+
function_name = f"{obj.name}.{attr}"
|
200
|
+
|
201
|
+
# For debugging
|
202
|
+
# print(f"Function call: {function_name}")
|
203
|
+
# print(f"Registry has function: {registry.get_function(function_name) is not None}")
|
204
|
+
|
205
|
+
if function_name:
|
206
|
+
# Check if this is a method call on a known object type
|
207
|
+
if '.' in function_name:
|
208
|
+
obj_name, method_name = function_name.split('.', 1)
|
209
|
+
# For method calls, we need to check if the method exists for any object type
|
210
|
+
# since we don't know the actual type of the object at parse time
|
211
|
+
# Just check if the method name exists in the registry
|
212
|
+
if registry.get_function(method_name):
|
213
|
+
return create_vex_api_call(func, args, keywords, loc)
|
214
|
+
# Or check if it's a direct function
|
215
|
+
elif registry.get_function(function_name):
|
216
|
+
return create_vex_api_call(func, args, keywords, loc)
|
217
|
+
|
218
|
+
# Regular function call
|
219
|
+
return self.factory.create_function_call(func, args, keywords, loc)
|
220
|
+
|
221
|
+
|
222
|
+
# Comparison operations (e.g., a < b, x == y)
|
223
|
+
elif isinstance(node, ast.Compare):
|
224
|
+
# Handle the first comparison
|
225
|
+
left = self._convert_expression(node.left)
|
226
|
+
loc = self._get_location(node)
|
227
|
+
|
228
|
+
if not node.ops or not node.comparators:
|
229
|
+
self.error_handler.add_error(
|
230
|
+
ErrorType.PARSER_ERROR,
|
231
|
+
"Invalid comparison with no operators or comparators",
|
232
|
+
loc
|
233
|
+
)
|
234
|
+
# Return a placeholder expression
|
235
|
+
return left
|
236
|
+
|
237
|
+
# Process each comparison operator and right operand
|
238
|
+
result = left
|
239
|
+
for i, (op, comparator) in enumerate(zip(node.ops, node.comparators)):
|
240
|
+
right = self._convert_expression(comparator)
|
241
|
+
|
242
|
+
# Map Python comparison operator to VEX operator
|
243
|
+
op_type = type(op)
|
244
|
+
op_name = op_type.__name__
|
245
|
+
|
246
|
+
op_map = {
|
247
|
+
'Eq': '==', 'NotEq': '!=',
|
248
|
+
'Lt': '<', 'LtE': '<=',
|
249
|
+
'Gt': '>', 'GtE': '>=',
|
250
|
+
'Is': 'is', 'IsNot': 'is not',
|
251
|
+
'In': 'in', 'NotIn': 'not in'
|
252
|
+
}
|
253
|
+
|
254
|
+
if op_name in op_map:
|
255
|
+
op_str = op_map[op_name]
|
256
|
+
vex_op = PYTHON_COMP_OP_MAP.get(op_str)
|
257
|
+
|
258
|
+
if vex_op:
|
259
|
+
# For the first comparison, use left and right
|
260
|
+
# For subsequent comparisons, use previous result and right
|
261
|
+
result = self.factory.create_binary_operation(
|
262
|
+
result, vex_op, right, loc
|
263
|
+
)
|
264
|
+
else:
|
265
|
+
self.error_handler.add_error(
|
266
|
+
ErrorType.PARSER_ERROR,
|
267
|
+
f"Unsupported comparison operator: {op_name}",
|
268
|
+
loc
|
269
|
+
)
|
270
|
+
else:
|
271
|
+
self.error_handler.add_error(
|
272
|
+
ErrorType.PARSER_ERROR,
|
273
|
+
f"Unknown comparison operator: {op_name}",
|
274
|
+
loc
|
275
|
+
)
|
276
|
+
|
277
|
+
return result
|
278
|
+
|
279
|
+
# Boolean operations (and, or)
|
280
|
+
elif isinstance(node, ast.BoolOp):
|
281
|
+
loc = self._get_location(node)
|
282
|
+
|
283
|
+
if not node.values:
|
284
|
+
self.error_handler.add_error(
|
285
|
+
ErrorType.PARSER_ERROR,
|
286
|
+
"Boolean operation with no values",
|
287
|
+
loc
|
288
|
+
)
|
289
|
+
# Return a placeholder expression
|
290
|
+
return self.factory.create_boolean_literal(False, loc)
|
291
|
+
|
292
|
+
# Get the operator
|
293
|
+
op_type = type(node.op)
|
294
|
+
op_name = op_type.__name__
|
295
|
+
|
296
|
+
op_map = {
|
297
|
+
'And': 'and',
|
298
|
+
'Or': 'or'
|
299
|
+
}
|
300
|
+
|
301
|
+
if op_name in op_map:
|
302
|
+
op_str = op_map[op_name]
|
303
|
+
vex_op = PYTHON_COMP_OP_MAP.get(op_str)
|
304
|
+
|
305
|
+
if not vex_op:
|
306
|
+
self.error_handler.add_error(
|
307
|
+
ErrorType.PARSER_ERROR,
|
308
|
+
f"Unsupported boolean operator: {op_name}",
|
309
|
+
loc
|
310
|
+
)
|
311
|
+
vex_op = Operator.LOGICAL_AND # Fallback
|
312
|
+
else:
|
313
|
+
self.error_handler.add_error(
|
314
|
+
ErrorType.PARSER_ERROR,
|
315
|
+
f"Unknown boolean operator: {op_name}",
|
316
|
+
loc
|
317
|
+
)
|
318
|
+
vex_op = Operator.LOGICAL_AND # Fallback
|
319
|
+
|
320
|
+
# Process all values from left to right
|
321
|
+
values = [self._convert_expression(val) for val in node.values]
|
322
|
+
|
323
|
+
# Build the expression tree from left to right
|
324
|
+
result = values[0]
|
325
|
+
for right in values[1:]:
|
326
|
+
result = self.factory.create_binary_operation(
|
327
|
+
result, vex_op, right, loc
|
328
|
+
)
|
329
|
+
|
330
|
+
return result
|
331
|
+
|
332
|
+
# List literals
|
333
|
+
elif isinstance(node, ast.List) or isinstance(node, ast.Tuple):
|
334
|
+
# We don't have a dedicated list/tuple node, so use function call
|
335
|
+
# with a special identifier for now
|
336
|
+
loc = self._get_location(node)
|
337
|
+
elements = [self._convert_expression(elt) for elt in node.elts]
|
338
|
+
list_name = "list" if isinstance(node, ast.List) else "tuple"
|
339
|
+
list_func = self.factory.create_identifier(list_name, loc)
|
340
|
+
|
341
|
+
return self.factory.create_function_call(list_func, elements, [], loc)
|
342
|
+
|
343
|
+
# Subscript (indexing) expressions like a[b]
|
344
|
+
elif isinstance(node, ast.Subscript):
|
345
|
+
loc = self._get_location(node)
|
346
|
+
value = self._convert_expression(node.value)
|
347
|
+
|
348
|
+
# Convert the slice/index
|
349
|
+
if isinstance(node.slice, ast.Index): # Python < 3.9
|
350
|
+
index = self._convert_expression(node.slice.value)
|
351
|
+
else: # Python 3.9+
|
352
|
+
index = self._convert_expression(node.slice)
|
353
|
+
|
354
|
+
# Create a function call to represent subscripting for now
|
355
|
+
# In the future, a dedicated SubscriptExpression node might be better
|
356
|
+
subscript_func = self.factory.create_identifier("__getitem__", loc)
|
357
|
+
return self.factory.create_function_call(
|
358
|
+
self.factory.create_attribute_access(value, "__getitem__", loc),
|
359
|
+
[index], [], loc
|
360
|
+
)
|
361
|
+
|
362
|
+
# Lambda expressions
|
363
|
+
elif isinstance(node, ast.Lambda):
|
364
|
+
loc = self._get_location(node)
|
365
|
+
# We don't have a dedicated lambda node, so warn and create a placeholder
|
366
|
+
self.error_handler.add_error(
|
367
|
+
ErrorType.PARSER_ERROR,
|
368
|
+
"Lambda expressions are not fully supported",
|
369
|
+
loc
|
370
|
+
)
|
371
|
+
|
372
|
+
# Create a placeholder function call
|
373
|
+
lambda_func = self.factory.create_identifier("lambda", loc)
|
374
|
+
return self.factory.create_function_call(lambda_func, [], [], loc)
|
375
|
+
|
376
|
+
# Dictionary literals
|
377
|
+
elif isinstance(node, ast.Dict):
|
378
|
+
loc = self._get_location(node)
|
379
|
+
# We don't have a dedicated dict node, so create a function call
|
380
|
+
dict_func = self.factory.create_identifier("dict", loc)
|
381
|
+
|
382
|
+
keywords = []
|
383
|
+
for i, (key, value) in enumerate(zip(node.keys, node.values)):
|
384
|
+
if key is None: # dict unpacking (**d)
|
385
|
+
self.error_handler.add_error(
|
386
|
+
ErrorType.PARSER_ERROR,
|
387
|
+
"Dictionary unpacking is not supported",
|
388
|
+
loc
|
389
|
+
)
|
390
|
+
continue
|
391
|
+
|
392
|
+
# For string keys, use them as keyword arguments
|
393
|
+
if isinstance(key, ast.Constant) and isinstance(key.value, str):
|
394
|
+
key_str = key.value
|
395
|
+
value_expr = self._convert_expression(value)
|
396
|
+
keywords.append(self.factory.create_keyword_argument(
|
397
|
+
key_str, value_expr, loc
|
398
|
+
))
|
399
|
+
else:
|
400
|
+
# For non-string keys, we need a different approach
|
401
|
+
self.error_handler.add_error(
|
402
|
+
ErrorType.PARSER_ERROR,
|
403
|
+
"Only string keys in dictionaries are fully supported",
|
404
|
+
loc
|
405
|
+
)
|
406
|
+
|
407
|
+
return self.factory.create_function_call(dict_func, [], keywords, loc)
|
408
|
+
|
409
|
+
# Fallback for unsupported nodes
|
410
|
+
self.error_handler.add_error(
|
411
|
+
ErrorType.PARSER_ERROR,
|
412
|
+
f"Unsupported expression type: {type(node).__name__}",
|
413
|
+
self._get_location(node)
|
414
|
+
)
|
415
|
+
# Return a simple identifier as fallback
|
416
|
+
return self.factory.create_identifier(
|
417
|
+
f"<unsupported:{type(node).__name__}>",
|
418
|
+
self._get_location(node)
|
419
|
+
)
|
420
|
+
|
421
|
+
def _convert_statement(self, node: ast.stmt) -> Statement:
|
422
|
+
"""Convert a Python statement node to a VEX AST statement."""
|
423
|
+
# Expression statements
|
424
|
+
if isinstance(node, ast.Expr):
|
425
|
+
expr = self._convert_expression(node.value)
|
426
|
+
return self.factory.create_expression_statement(
|
427
|
+
expr, self._get_location(node)
|
428
|
+
)
|
429
|
+
|
430
|
+
# Assignment statements
|
431
|
+
elif isinstance(node, ast.Assign):
|
432
|
+
# For simplicity, we'll only handle the first target
|
433
|
+
# (Python allows multiple targets like a = b = 1)
|
434
|
+
if not node.targets:
|
435
|
+
self.error_handler.add_error(
|
436
|
+
ErrorType.PARSER_ERROR,
|
437
|
+
"Assignment with no targets",
|
438
|
+
self._get_location(node)
|
439
|
+
)
|
440
|
+
# Fallback - create a dummy assignment
|
441
|
+
return self.factory.create_assignment(
|
442
|
+
self.factory.create_identifier("_dummy"),
|
443
|
+
self.factory.create_none_literal(),
|
444
|
+
self._get_location(node)
|
445
|
+
)
|
446
|
+
|
447
|
+
target = self._convert_expression(node.targets[0])
|
448
|
+
value = self._convert_expression(node.value)
|
449
|
+
return self.factory.create_assignment(
|
450
|
+
target, value, self._get_location(node)
|
451
|
+
)
|
452
|
+
|
453
|
+
# Augmented assignments (e.g., a += 1)
|
454
|
+
elif isinstance(node, ast.AugAssign):
|
455
|
+
loc = self._get_location(node)
|
456
|
+
target = self._convert_expression(node.target)
|
457
|
+
value = self._convert_expression(node.value)
|
458
|
+
|
459
|
+
# Map Python operator to VEX operator
|
460
|
+
op_type = type(node.op)
|
461
|
+
op_name = op_type.__name__
|
462
|
+
|
463
|
+
op_map = {
|
464
|
+
'Add': '+', 'Sub': '-', 'Mult': '*', 'Div': '/',
|
465
|
+
'FloorDiv': '//', 'Mod': '%', 'Pow': '**',
|
466
|
+
'LShift': '<<', 'RShift': '>>',
|
467
|
+
'BitOr': '|', 'BitXor': '^', 'BitAnd': '&',
|
468
|
+
'MatMult': '@'
|
469
|
+
}
|
470
|
+
|
471
|
+
if op_name in op_map:
|
472
|
+
op_str = op_map[op_name]
|
473
|
+
op = PYTHON_BINARY_OP_MAP.get(op_str)
|
474
|
+
|
475
|
+
if op:
|
476
|
+
# Create a binary operation (target op value)
|
477
|
+
bin_op = self.factory.create_binary_operation(
|
478
|
+
target, op, value, loc
|
479
|
+
)
|
480
|
+
|
481
|
+
# Create an assignment (target = bin_op)
|
482
|
+
return self.factory.create_assignment(
|
483
|
+
target, bin_op, loc
|
484
|
+
)
|
485
|
+
|
486
|
+
# Fallback for unknown operators
|
487
|
+
self.error_handler.add_error(
|
488
|
+
ErrorType.PARSER_ERROR,
|
489
|
+
f"Unsupported augmented assignment operator: {op_name}",
|
490
|
+
loc
|
491
|
+
)
|
492
|
+
# Create a basic assignment as fallback
|
493
|
+
return self.factory.create_assignment(target, value, loc)
|
494
|
+
|
495
|
+
# If statements
|
496
|
+
elif isinstance(node, ast.If):
|
497
|
+
test = self._convert_expression(node.test)
|
498
|
+
body = [self._convert_statement(stmt) for stmt in node.body]
|
499
|
+
loc = self._get_location(node)
|
500
|
+
|
501
|
+
# Handle else branch
|
502
|
+
orelse = None
|
503
|
+
if node.orelse:
|
504
|
+
# Check if it's an elif (a single If statement)
|
505
|
+
if len(node.orelse) == 1 and isinstance(node.orelse[0], ast.If):
|
506
|
+
orelse = self._convert_statement(node.orelse[0])
|
507
|
+
else:
|
508
|
+
# Regular else block
|
509
|
+
orelse = [self._convert_statement(stmt) for stmt in node.orelse]
|
510
|
+
|
511
|
+
return self.factory.create_if_statement(test, body, orelse, loc)
|
512
|
+
|
513
|
+
# While loops
|
514
|
+
elif isinstance(node, ast.While):
|
515
|
+
test = self._convert_expression(node.test)
|
516
|
+
body = [self._convert_statement(stmt) for stmt in node.body]
|
517
|
+
loc = self._get_location(node)
|
518
|
+
|
519
|
+
# Note: We're ignoring the else clause for now
|
520
|
+
if node.orelse:
|
521
|
+
self.error_handler.add_error(
|
522
|
+
ErrorType.PARSER_ERROR,
|
523
|
+
"While-else clauses are not supported",
|
524
|
+
loc
|
525
|
+
)
|
526
|
+
|
527
|
+
return self.factory.create_while_loop(test, body, loc)
|
528
|
+
|
529
|
+
# For loops
|
530
|
+
elif isinstance(node, ast.For):
|
531
|
+
target = self._convert_expression(node.target)
|
532
|
+
iter_expr = self._convert_expression(node.iter)
|
533
|
+
body = [self._convert_statement(stmt) for stmt in node.body]
|
534
|
+
loc = self._get_location(node)
|
535
|
+
|
536
|
+
# Note: We're ignoring the else clause for now
|
537
|
+
if node.orelse:
|
538
|
+
self.error_handler.add_error(
|
539
|
+
ErrorType.PARSER_ERROR,
|
540
|
+
"For-else clauses are not supported",
|
541
|
+
loc
|
542
|
+
)
|
543
|
+
|
544
|
+
return self.factory.create_for_loop(target, iter_expr, body, loc)
|
545
|
+
|
546
|
+
# Function definitions
|
547
|
+
elif isinstance(node, ast.FunctionDef):
|
548
|
+
loc = self._get_location(node)
|
549
|
+
|
550
|
+
# Convert arguments
|
551
|
+
args = []
|
552
|
+
for arg in node.args.args:
|
553
|
+
# Get annotation if present
|
554
|
+
annotation = None
|
555
|
+
if arg.annotation:
|
556
|
+
annotation = self._convert_expression(arg.annotation)
|
557
|
+
|
558
|
+
# Get default value if this argument has one
|
559
|
+
default = None
|
560
|
+
arg_idx = node.args.args.index(arg)
|
561
|
+
defaults_offset = len(node.args.args) - len(node.args.defaults)
|
562
|
+
if arg_idx >= defaults_offset and node.args.defaults:
|
563
|
+
default_idx = arg_idx - defaults_offset
|
564
|
+
if default_idx < len(node.args.defaults):
|
565
|
+
default_value = node.args.defaults[default_idx]
|
566
|
+
default = self._convert_expression(default_value)
|
567
|
+
|
568
|
+
args.append(Argument(arg.arg, annotation, default))
|
569
|
+
|
570
|
+
# Convert body
|
571
|
+
body = [self._convert_statement(stmt) for stmt in node.body]
|
572
|
+
|
573
|
+
# Convert return annotation if present
|
574
|
+
return_annotation = None
|
575
|
+
if node.returns:
|
576
|
+
return_annotation = self._convert_expression(node.returns)
|
577
|
+
|
578
|
+
return self.factory.create_function_definition(
|
579
|
+
node.name, args, body, return_annotation, loc
|
580
|
+
)
|
581
|
+
|
582
|
+
# Return statements
|
583
|
+
elif isinstance(node, ast.Return):
|
584
|
+
value = None
|
585
|
+
if node.value:
|
586
|
+
value = self._convert_expression(node.value)
|
587
|
+
return self.factory.create_return_statement(
|
588
|
+
value, self._get_location(node)
|
589
|
+
)
|
590
|
+
|
591
|
+
# Break statements
|
592
|
+
elif isinstance(node, ast.Break):
|
593
|
+
return self.factory.create_break_statement(
|
594
|
+
self._get_location(node)
|
595
|
+
)
|
596
|
+
|
597
|
+
# Continue statements
|
598
|
+
elif isinstance(node, ast.Continue):
|
599
|
+
return self.factory.create_continue_statement(
|
600
|
+
self._get_location(node)
|
601
|
+
)
|
602
|
+
|
603
|
+
# Pass statements - convert to empty expression statement
|
604
|
+
elif isinstance(node, ast.Pass):
|
605
|
+
return self.factory.create_expression_statement(
|
606
|
+
self.factory.create_none_literal(),
|
607
|
+
self._get_location(node)
|
608
|
+
)
|
609
|
+
|
610
|
+
# Import statements - not fully supported yet
|
611
|
+
elif isinstance(node, (ast.Import, ast.ImportFrom)):
|
612
|
+
loc = self._get_location(node)
|
613
|
+
self.error_handler.add_error(
|
614
|
+
ErrorType.PARSER_ERROR,
|
615
|
+
"Import statements are not fully supported",
|
616
|
+
loc
|
617
|
+
)
|
618
|
+
# Create a placeholder expression statement
|
619
|
+
return self.factory.create_expression_statement(
|
620
|
+
self.factory.create_identifier(
|
621
|
+
f"<import:{getattr(node, 'names', [])}>",
|
622
|
+
loc
|
623
|
+
),
|
624
|
+
loc
|
625
|
+
)
|
626
|
+
|
627
|
+
# Class definitions - not supported yet
|
628
|
+
elif isinstance(node, ast.ClassDef):
|
629
|
+
loc = self._get_location(node)
|
630
|
+
self.error_handler.add_error(
|
631
|
+
ErrorType.PARSER_ERROR,
|
632
|
+
"Class definitions are not supported",
|
633
|
+
loc
|
634
|
+
)
|
635
|
+
# Create a placeholder expression statement
|
636
|
+
return self.factory.create_expression_statement(
|
637
|
+
self.factory.create_identifier(
|
638
|
+
f"<class:{node.name}>",
|
639
|
+
loc
|
640
|
+
),
|
641
|
+
loc
|
642
|
+
)
|
643
|
+
|
644
|
+
# Fallback for unsupported nodes
|
645
|
+
self.error_handler.add_error(
|
646
|
+
ErrorType.PARSER_ERROR,
|
647
|
+
f"Unsupported statement type: {type(node).__name__}",
|
648
|
+
self._get_location(node)
|
649
|
+
)
|
650
|
+
# Return a simple expression statement as fallback
|
651
|
+
return self.factory.create_expression_statement(
|
652
|
+
self.factory.create_identifier(
|
653
|
+
f"<unsupported:{type(node).__name__}>",
|
654
|
+
self._get_location(node)
|
655
|
+
),
|
656
|
+
self._get_location(node)
|
657
|
+
)
|
658
|
+
|
659
|
+
def parse(self) -> Program:
|
660
|
+
"""Parse the Python source code and return a VEX AST."""
|
661
|
+
try:
|
662
|
+
# Dedent the source code to remove whitespace
|
663
|
+
dedented_source = textwrap.dedent(self.source)
|
664
|
+
|
665
|
+
# Parse the Python code with modern features
|
666
|
+
self._py_ast = ast.parse(
|
667
|
+
dedented_source,
|
668
|
+
filename=self.filename,
|
669
|
+
feature_version=(3, 8) # Explicitly use Python 3.8+ features
|
670
|
+
)
|
671
|
+
|
672
|
+
# Convert the module body to VEX statements
|
673
|
+
body = [self._convert_statement(stmt) for stmt in self._py_ast.body]
|
674
|
+
|
675
|
+
# Create and return the program node
|
676
|
+
return self.factory.create_program(body)
|
677
|
+
|
678
|
+
except SyntaxError as e:
|
679
|
+
# Convert Python SyntaxError to VexSyntaxError
|
680
|
+
loc = SourceLocation(
|
681
|
+
line=e.lineno or 1,
|
682
|
+
column=e.offset or 1,
|
683
|
+
filename=e.filename or self.filename
|
684
|
+
)
|
685
|
+
if hasattr(e, 'end_lineno') and e.end_lineno is not None and \
|
686
|
+
hasattr(e, 'end_offset') and e.end_offset is not None:
|
687
|
+
loc.end_line = e.end_lineno
|
688
|
+
loc.end_column = e.end_offset
|
689
|
+
|
690
|
+
self.error_handler.add_error(
|
691
|
+
ErrorType.PARSER_ERROR,
|
692
|
+
f"Syntax error: {e.msg}",
|
693
|
+
loc
|
694
|
+
)
|
695
|
+
raise VexSyntaxError(f"Syntax error: {e.msg}", loc) from e
|
696
|
+
|
697
|
+
except Exception as e:
|
698
|
+
# Handle other parsing errors
|
699
|
+
self.error_handler.add_error(
|
700
|
+
ErrorType.PARSER_ERROR,
|
701
|
+
f"Failed to parse Python code: {str(e)}",
|
702
|
+
SourceLocation(1, 1, self.filename)
|
703
|
+
)
|
704
|
+
raise VexSyntaxError(
|
705
|
+
f"Failed to parse Python code: {str(e)}",
|
706
|
+
SourceLocation(1, 1, self.filename)
|
707
|
+
) from e
|
708
|
+
|
709
|
+
# Convenience functions
|
710
|
+
def parse_string(source: str, filename: str = "<string>",
|
711
|
+
error_handler: Optional[ErrorHandler] = None) -> Program:
|
712
|
+
"""Parse Python code from a string."""
|
713
|
+
parser = PythonParser(source, filename, error_handler)
|
714
|
+
return parser.parse()
|
715
|
+
|
716
|
+
def parse_file(filepath: str, error_handler: Optional[ErrorHandler] = None) -> Program:
|
717
|
+
"""Parse Python code from a file."""
|
718
|
+
try:
|
719
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
720
|
+
source = f.read()
|
721
|
+
return parse_string(source, filepath, error_handler)
|
722
|
+
except FileNotFoundError:
|
723
|
+
raise
|
724
|
+
except IOError as e:
|
725
|
+
raise IOError(f"Error reading file {filepath}: {e}")
|