xasm 1.2.1__py312-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xasm/.gitignore +2 -0
- xasm/__init__.py +8 -0
- xasm/assemble.py +722 -0
- xasm/pyc_convert.py +259 -0
- xasm/version.py +8 -0
- xasm/write_pyc.py +48 -0
- xasm/xasm_cli.py +61 -0
- xasm-1.2.1.dist-info/METADATA +233 -0
- xasm-1.2.1.dist-info/RECORD +13 -0
- xasm-1.2.1.dist-info/WHEEL +5 -0
- xasm-1.2.1.dist-info/entry_points.txt +2 -0
- xasm-1.2.1.dist-info/licenses/LICENSE.gpl2 +339 -0
- xasm-1.2.1.dist-info/top_level.txt +1 -0
xasm/assemble.py
ADDED
@@ -0,0 +1,722 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
import ast
|
3
|
+
import re
|
4
|
+
from typing import Any, List, Optional
|
5
|
+
|
6
|
+
import xdis
|
7
|
+
from xdis import get_opcode, load_module
|
8
|
+
from xdis.opcodes.base import cmp_op
|
9
|
+
from xdis.version_info import PYTHON_VERSION_TRIPLE, version_str_to_tuple
|
10
|
+
|
11
|
+
# import xdis.bytecode as Mbytecode
|
12
|
+
|
13
|
+
|
14
|
+
class Instruction: # (Mbytecode.Instruction):
|
15
|
+
line_no: Optional[int]
|
16
|
+
opname: str
|
17
|
+
arg: Optional[int]
|
18
|
+
|
19
|
+
def __repr__(self) -> str:
|
20
|
+
if self.line_no:
|
21
|
+
s = "%4d: " % self.line_no
|
22
|
+
else:
|
23
|
+
s = " " * 6
|
24
|
+
s += f"{self.opname:15}"
|
25
|
+
if self.arg is not None:
|
26
|
+
s += f"\t{self.arg}"
|
27
|
+
return s
|
28
|
+
|
29
|
+
pass
|
30
|
+
|
31
|
+
|
32
|
+
def is_int(s: Any) -> bool:
|
33
|
+
try:
|
34
|
+
int(s)
|
35
|
+
return True
|
36
|
+
except ValueError:
|
37
|
+
return False
|
38
|
+
|
39
|
+
|
40
|
+
def match_lineno(s: str) -> Optional[re.Match]:
|
41
|
+
return re.match(r"^\d+:", s)
|
42
|
+
|
43
|
+
|
44
|
+
def get_opname_operand(opc, fields: List[str]):
|
45
|
+
assert len(fields) > 0
|
46
|
+
opname = fields[0]
|
47
|
+
if opc.opmap[opname] < opc.HAVE_ARGUMENT:
|
48
|
+
return opname, None
|
49
|
+
if len(fields) > 1:
|
50
|
+
if is_int(fields[1]):
|
51
|
+
operand = int(fields[1])
|
52
|
+
else:
|
53
|
+
operand = " ".join(fields[1:])
|
54
|
+
if operand.startswith("(to "):
|
55
|
+
int_val = operand[len("(to ") :]
|
56
|
+
# In xasm format this shouldn't appear
|
57
|
+
if is_int(int_val):
|
58
|
+
operand = int(int_val)
|
59
|
+
|
60
|
+
return opname, operand
|
61
|
+
else:
|
62
|
+
return opname, None
|
63
|
+
|
64
|
+
|
65
|
+
class Assembler:
|
66
|
+
def __init__(self, python_version, is_pypy) -> None:
|
67
|
+
self.opc = get_opcode(python_version, is_pypy)
|
68
|
+
self.code_list = []
|
69
|
+
self.codes = [] # FIXME use a better name
|
70
|
+
self.status: str = "unfinished"
|
71
|
+
self.size = 0 # Size of source code. Only relevant in version 3 and above
|
72
|
+
self.python_version = python_version
|
73
|
+
self.timestamp = None
|
74
|
+
self.backpatch = [] # list of backpatch dicts, one for each function
|
75
|
+
self.label = [] # list of label dists, one for each function
|
76
|
+
self.code = None
|
77
|
+
self.siphash = None
|
78
|
+
|
79
|
+
def code_init(self, python_version=None) -> None:
|
80
|
+
if self.python_version is None and python_version:
|
81
|
+
self.python_version = python_version
|
82
|
+
|
83
|
+
self.code = xdis.codetype.to_portable(
|
84
|
+
co_argcount=0,
|
85
|
+
co_posonlyargcount=0,
|
86
|
+
co_kwonlyargcount=0,
|
87
|
+
co_nlocals=0,
|
88
|
+
co_stacksize=10,
|
89
|
+
co_flags=0,
|
90
|
+
co_code=[],
|
91
|
+
co_consts=[],
|
92
|
+
co_names=[],
|
93
|
+
co_varnames=[],
|
94
|
+
co_filename="unknown",
|
95
|
+
co_name="unknown",
|
96
|
+
co_firstlineno=1,
|
97
|
+
co_lnotab={},
|
98
|
+
co_freevars=[],
|
99
|
+
co_cellvars=[],
|
100
|
+
version_triple=python_version,
|
101
|
+
)
|
102
|
+
|
103
|
+
self.code.instructions = []
|
104
|
+
|
105
|
+
def update_lists(self, co, label, backpatch) -> None:
|
106
|
+
self.code_list.append(co)
|
107
|
+
self.codes.append(self.code)
|
108
|
+
self.label.append(label)
|
109
|
+
self.backpatch.append(backpatch)
|
110
|
+
|
111
|
+
def print_instructions(self) -> None:
|
112
|
+
for inst in self.code.instructions:
|
113
|
+
if inst.line_no:
|
114
|
+
print()
|
115
|
+
print(inst)
|
116
|
+
|
117
|
+
def warn(self, mess: str) -> None:
|
118
|
+
"""
|
119
|
+
Print an error message and record that we warned, unless we have already errored.
|
120
|
+
"""
|
121
|
+
print("Warning: ", mess)
|
122
|
+
if self.status != "errored":
|
123
|
+
self.status = "warning"
|
124
|
+
|
125
|
+
def err(self, mess: str) -> None:
|
126
|
+
"""
|
127
|
+
Print an error message and record that we errored.
|
128
|
+
"""
|
129
|
+
print("Error: ", mess)
|
130
|
+
self.status = "errored"
|
131
|
+
|
132
|
+
|
133
|
+
def asm_file(path) -> Optional[Assembler]:
|
134
|
+
offset = 0
|
135
|
+
methods = {}
|
136
|
+
method_name = None
|
137
|
+
asm = None
|
138
|
+
backpatch_inst = set([])
|
139
|
+
label = {}
|
140
|
+
python_bytecode_version = None
|
141
|
+
lines = open(path).readlines()
|
142
|
+
i = 0
|
143
|
+
bytecode_seen = False
|
144
|
+
while i < len(lines):
|
145
|
+
line = lines[i]
|
146
|
+
i += 1
|
147
|
+
if line.startswith("##"):
|
148
|
+
# comment line
|
149
|
+
continue
|
150
|
+
if line.startswith(".READ"):
|
151
|
+
match = re.match("^.READ (.+)$", line)
|
152
|
+
if match:
|
153
|
+
input_pyc = match.group(1)
|
154
|
+
print(f"Reading {input_pyc}")
|
155
|
+
(
|
156
|
+
version,
|
157
|
+
timestamp,
|
158
|
+
magic_int,
|
159
|
+
co,
|
160
|
+
is_pypy,
|
161
|
+
source_size,
|
162
|
+
sip_hash,
|
163
|
+
) = load_module(input_pyc)
|
164
|
+
if python_bytecode_version and python_bytecode_version != version:
|
165
|
+
TypeError(
|
166
|
+
f"We previously saw Python version {python_bytecode_version} but we just loaded {version}.\n"
|
167
|
+
)
|
168
|
+
python_bytecode_version = version
|
169
|
+
# FIXME: extract all code options below the top-level and asm.code_list
|
170
|
+
|
171
|
+
elif line.startswith("#"):
|
172
|
+
match = re.match("^# (Pypy )?Python bytecode ", line)
|
173
|
+
if match:
|
174
|
+
if match.group(1):
|
175
|
+
pypy_str = match.group(1)
|
176
|
+
is_pypy = len(pypy_str)
|
177
|
+
else:
|
178
|
+
is_pypy = False
|
179
|
+
pypy_str = ""
|
180
|
+
|
181
|
+
python_bytecode_version = (
|
182
|
+
line[len("# Python bytecode " + pypy_str) :].strip().split()[0]
|
183
|
+
)
|
184
|
+
|
185
|
+
python_version_pair = version_str_to_tuple(
|
186
|
+
python_bytecode_version, length=2
|
187
|
+
)
|
188
|
+
asm = Assembler(python_version_pair, is_pypy)
|
189
|
+
if python_version_pair >= (3, 10):
|
190
|
+
TypeError(
|
191
|
+
f"Creating Python version {python_bytecode_version} not supported yet. "
|
192
|
+
"Feel free to fix and put in a PR.\n"
|
193
|
+
)
|
194
|
+
asm.code_init(python_version_pair)
|
195
|
+
bytecode_seen = True
|
196
|
+
elif line.startswith("# Timestamp in code: "):
|
197
|
+
text = line[len("# Timestamp in code: ") :].strip()
|
198
|
+
time_str = text.split()[0]
|
199
|
+
if is_int(time_str):
|
200
|
+
asm.timestamp = int(time_str)
|
201
|
+
elif line.startswith("# Method Name: "):
|
202
|
+
if method_name:
|
203
|
+
co, is_valid = create_code(asm, label, backpatch_inst)
|
204
|
+
if not is_valid:
|
205
|
+
return
|
206
|
+
asm.update_lists(co, label, backpatch_inst)
|
207
|
+
label = {}
|
208
|
+
backpatch_inst = set([])
|
209
|
+
methods[method_name] = co
|
210
|
+
offset = 0
|
211
|
+
if python_bytecode_version is None:
|
212
|
+
raise TypeError(
|
213
|
+
f'Line {i}: "Python bytecode" not seen before "Method Name:"; please set this.'
|
214
|
+
)
|
215
|
+
python_version_pair = version_str_to_tuple(
|
216
|
+
python_bytecode_version, length=2
|
217
|
+
)
|
218
|
+
asm.code_init(python_version_pair)
|
219
|
+
asm.code.co_qual_name = asm.code.co_name = line[
|
220
|
+
len("# Method Name: ") :
|
221
|
+
].strip()
|
222
|
+
method_name = asm.code.co_name
|
223
|
+
elif line.startswith("# SipHash: "):
|
224
|
+
siphash = line[len("# ShipHash: ") :].strip().split()[0]
|
225
|
+
asm.siphash = ast.literal_eval(siphash)
|
226
|
+
if asm.siphash != 0:
|
227
|
+
raise TypeError(
|
228
|
+
"SIP hashes not supported yet. Feel free to fix and in a PR.\n"
|
229
|
+
)
|
230
|
+
|
231
|
+
elif line.startswith("# Filename: "):
|
232
|
+
asm.code.co_filename = line[len("# Filename: ") :].strip()
|
233
|
+
elif line.startswith("# First Line: "):
|
234
|
+
s = line[len("# First Line: ") :].strip()
|
235
|
+
first_lineno = int(s)
|
236
|
+
asm.code.co_firstlineno = first_lineno
|
237
|
+
elif line.startswith("# Argument count: "):
|
238
|
+
argc = line[len("# Argument count: ") :].strip().split()[0]
|
239
|
+
elif line.startswith("# Position-only argument count: "):
|
240
|
+
argc = (
|
241
|
+
line[len("# Position-only argument count: ") :].strip().split()[0]
|
242
|
+
)
|
243
|
+
asm.code.co_posonlyargcount = ast.literal_eval(argc)
|
244
|
+
elif line.startswith("# Keyword-only argument count: "):
|
245
|
+
argc = line[len("# Keyword-only argument count: ") :].strip().split()[0]
|
246
|
+
asm.code.co_kwonlyargcount = ast.literal_eval(argc)
|
247
|
+
elif line.startswith("# Number of locals: "):
|
248
|
+
l_str = line[len("# Number of locals: ") :].strip()
|
249
|
+
asm.code.co_nlocals = int(l_str)
|
250
|
+
elif line.startswith("# Source code size mod 2**32: "):
|
251
|
+
l_str = line[
|
252
|
+
len("# Source code size mod 2**32: ") : -len(" bytes")
|
253
|
+
].strip()
|
254
|
+
asm.size = int(l_str)
|
255
|
+
elif line.startswith("# Stack size: "):
|
256
|
+
l_str = line[len("# Stack size: ") :].strip()
|
257
|
+
asm.code.co_stacksize = int(l_str)
|
258
|
+
elif line.startswith("# Flags: "):
|
259
|
+
flags = line[len("# Flags: ") :].strip().split()[0]
|
260
|
+
asm.code.co_flags = ast.literal_eval(flags)
|
261
|
+
elif line.startswith("# Constants:"):
|
262
|
+
count = 0
|
263
|
+
while i < len(lines):
|
264
|
+
line = lines[i]
|
265
|
+
i += 1
|
266
|
+
match = re.match(r"^#\s+(\d+): (.+)$", line)
|
267
|
+
if match:
|
268
|
+
index = int(match.group(1))
|
269
|
+
assert index == count, (
|
270
|
+
f"Constant index {index} found on line {i} "
|
271
|
+
f"doesn't match expected constant index {count}."
|
272
|
+
)
|
273
|
+
expr = match.group(2)
|
274
|
+
match = re.match(
|
275
|
+
r"<(?:Code\d+ )?code object (\S+) at (0x[0-f]+)", expr
|
276
|
+
)
|
277
|
+
if match:
|
278
|
+
name = match.group(1)
|
279
|
+
m2 = re.match("^<(.+)>$", name)
|
280
|
+
if m2:
|
281
|
+
name = f"{m2.group(1)}_{match.group(2)}"
|
282
|
+
if name in methods:
|
283
|
+
asm.code.co_consts.append(methods[name])
|
284
|
+
else:
|
285
|
+
print(
|
286
|
+
f"line {i} ({asm.code.co_filename}, {method_name}): can't find method {name}"
|
287
|
+
)
|
288
|
+
bogus_name = f"**bogus {name}**"
|
289
|
+
print(f"\t appending {bogus_name} to list of constants")
|
290
|
+
asm.code.co_consts.append(bogus_name)
|
291
|
+
else:
|
292
|
+
asm.code.co_consts.append(ast.literal_eval(expr))
|
293
|
+
count += 1
|
294
|
+
else:
|
295
|
+
i -= 1
|
296
|
+
break
|
297
|
+
pass
|
298
|
+
pass
|
299
|
+
elif line.startswith("# Cell variables:"):
|
300
|
+
i = update_code_tuple_field("co_cellvars", asm.code, lines, i)
|
301
|
+
elif line.startswith("# Free variables:"):
|
302
|
+
i = update_code_tuple_field("co_freevars", asm.code, lines, i)
|
303
|
+
elif line.startswith("# Names:"):
|
304
|
+
i = update_code_tuple_field("co_names", asm.code, lines, i)
|
305
|
+
elif line.startswith("# Varnames:"):
|
306
|
+
line = lines[i]
|
307
|
+
asm.code.co_varnames = line[1:].strip().split(", ")
|
308
|
+
i += 1
|
309
|
+
elif line.startswith("# Positional arguments:"):
|
310
|
+
line = lines[i]
|
311
|
+
args = line[1:].strip().split(", ")
|
312
|
+
asm.code.co_argcount = len(args)
|
313
|
+
i += 1
|
314
|
+
else:
|
315
|
+
if not line.strip():
|
316
|
+
continue
|
317
|
+
|
318
|
+
match = re.match(r"^(\S+):$", line)
|
319
|
+
if match:
|
320
|
+
label_value = match.group(1)
|
321
|
+
# All-numeric labels, i.e. line numbers, are handled below
|
322
|
+
if not re.match(r"^(\d+)$", label_value):
|
323
|
+
label[label_value] = offset
|
324
|
+
continue
|
325
|
+
|
326
|
+
line_no = None
|
327
|
+
|
328
|
+
match = re.match(r"^\s*(\d+):\s*", line)
|
329
|
+
|
330
|
+
# Sanity checking: make sure we have seen
|
331
|
+
# proper header lines
|
332
|
+
if i == 1:
|
333
|
+
assert bytecode_seen, (
|
334
|
+
f"Improper beginning:\n{line}"
|
335
|
+
"\nLine should begin with '#' "
|
336
|
+
"and contain header bytecode header information."
|
337
|
+
)
|
338
|
+
assert bytecode_seen, (
|
339
|
+
f"Error translating line {i}: "
|
340
|
+
"a line before this should include: \n"
|
341
|
+
"# Python bytecode <version>"
|
342
|
+
)
|
343
|
+
|
344
|
+
if match:
|
345
|
+
line_no = int(match.group(1))
|
346
|
+
linetable_field = (
|
347
|
+
"co_lnotab" if python_version_pair < (3, 10) else "co_linetable"
|
348
|
+
)
|
349
|
+
assert asm is not None
|
350
|
+
linetable = getattr(asm.code, linetable_field)
|
351
|
+
linetable[offset] = line_no
|
352
|
+
|
353
|
+
# Opcode section
|
354
|
+
fields = line.strip().split()
|
355
|
+
num_fields = len(fields)
|
356
|
+
|
357
|
+
if num_fields == 1 and line_no is not None:
|
358
|
+
continue
|
359
|
+
|
360
|
+
try:
|
361
|
+
if num_fields > 1:
|
362
|
+
if fields[0] == ">>":
|
363
|
+
fields = fields[1:]
|
364
|
+
num_fields -= 1
|
365
|
+
if match_lineno(fields[0]) and is_int(fields[1]):
|
366
|
+
line_no = int(fields[0][:-1])
|
367
|
+
opname, operand = get_opname_operand(asm.opc, fields[2:])
|
368
|
+
elif match_lineno(fields[0]):
|
369
|
+
line_no = int(fields[0][:-1])
|
370
|
+
fields = fields[1:]
|
371
|
+
if fields[0] == ">>":
|
372
|
+
fields = fields[1:]
|
373
|
+
if is_int(fields[0]):
|
374
|
+
fields = fields[1:]
|
375
|
+
opname, operand = get_opname_operand(asm.opc, fields)
|
376
|
+
elif is_int(fields[0]):
|
377
|
+
opname, operand = get_opname_operand(asm.opc, fields[1:])
|
378
|
+
else:
|
379
|
+
opname, operand = get_opname_operand(asm.opc, fields)
|
380
|
+
else:
|
381
|
+
opname, _ = get_opname_operand(asm.opc, fields)
|
382
|
+
except Exception as e:
|
383
|
+
print(f"Line {i}: {e}")
|
384
|
+
raise
|
385
|
+
|
386
|
+
if opname in asm.opc.opname:
|
387
|
+
inst = Instruction()
|
388
|
+
inst.opname = opname.replace("+", "_")
|
389
|
+
inst.opcode = asm.opc.opmap[inst.opname]
|
390
|
+
if xdis.op_has_argument(inst.opcode, asm.opc):
|
391
|
+
inst.arg = operand
|
392
|
+
else:
|
393
|
+
inst.arg = None
|
394
|
+
inst.line_no = line_no
|
395
|
+
asm.code.instructions.append(inst)
|
396
|
+
if inst.opcode in asm.opc.JUMP_OPS:
|
397
|
+
if not is_int(operand):
|
398
|
+
backpatch_inst.add(inst)
|
399
|
+
offset += xdis.op_size(inst.opcode, asm.opc)
|
400
|
+
else:
|
401
|
+
raise RuntimeError(f"Illegal opname {opname} in:\n{line}")
|
402
|
+
pass
|
403
|
+
pass
|
404
|
+
|
405
|
+
if asm is not None:
|
406
|
+
# print(linetable)
|
407
|
+
|
408
|
+
co, is_valid = create_code(asm, label, backpatch_inst)
|
409
|
+
asm.update_lists(co, label, backpatch_inst)
|
410
|
+
asm.code_list.reverse()
|
411
|
+
asm.status = "finished"
|
412
|
+
|
413
|
+
return asm
|
414
|
+
|
415
|
+
|
416
|
+
def member(fields, match_value) -> int:
|
417
|
+
for i, v in enumerate(fields):
|
418
|
+
if v == match_value and type(v) == type(match_value):
|
419
|
+
return i
|
420
|
+
pass
|
421
|
+
return -1
|
422
|
+
|
423
|
+
|
424
|
+
def update_code_field(field_name: str, value, inst, opc) -> None:
|
425
|
+
field_values = getattr(opc, field_name)
|
426
|
+
# Can't use "in" because True == 1 and False == 0
|
427
|
+
# if value in l:
|
428
|
+
i = member(field_values, value)
|
429
|
+
if i >= 0:
|
430
|
+
inst.arg = i
|
431
|
+
else:
|
432
|
+
inst.arg = len(field_values)
|
433
|
+
field_values.append(value)
|
434
|
+
|
435
|
+
|
436
|
+
def update_code_tuple_field(field_name: str, code, lines: List[str], i: int):
|
437
|
+
count = 0
|
438
|
+
while i < len(lines):
|
439
|
+
line = lines[i]
|
440
|
+
i += 1
|
441
|
+
match = re.match(r"^#\s+(\d+): (.+)$", line)
|
442
|
+
if match:
|
443
|
+
index = int(match.group(1))
|
444
|
+
assert (
|
445
|
+
index == count
|
446
|
+
), f'In field" "{field_name}", line {i}, number {index} is expected to have value {count}.'
|
447
|
+
field_values = getattr(code, field_name)
|
448
|
+
field_values.append(match.group(2))
|
449
|
+
count += 1
|
450
|
+
else:
|
451
|
+
i -= 1
|
452
|
+
break
|
453
|
+
pass
|
454
|
+
return i
|
455
|
+
|
456
|
+
|
457
|
+
def err(msg: str, inst, i: int):
|
458
|
+
msg += ". Instruction %d:\n%s" % (i, inst)
|
459
|
+
raise RuntimeError(msg)
|
460
|
+
|
461
|
+
|
462
|
+
def warn(mess: str) -> None:
|
463
|
+
"""
|
464
|
+
Print an error message and record that we warned.
|
465
|
+
"""
|
466
|
+
print("Warning: ", mess)
|
467
|
+
|
468
|
+
|
469
|
+
def decode_lineno_tab_old(lnotab, first_lineno: int) -> dict:
|
470
|
+
"""
|
471
|
+
Uncompresses line number table for Python versions before
|
472
|
+
3.10
|
473
|
+
"""
|
474
|
+
line_number, line_number_diff = first_lineno, 0
|
475
|
+
offset, offset_diff = 0, 0
|
476
|
+
uncompressed_lnotab = {}
|
477
|
+
for i in range(0, len(lnotab), 2):
|
478
|
+
offset_diff = lnotab[i]
|
479
|
+
line_number_diff = lnotab[i + 1]
|
480
|
+
if not isinstance(offset_diff, int):
|
481
|
+
offset_diff = ord(offset_diff)
|
482
|
+
line_number_diff = ord(line_number_diff)
|
483
|
+
|
484
|
+
assert offset_diff < 256
|
485
|
+
if offset_diff == 255:
|
486
|
+
continue
|
487
|
+
assert line_number_diff < 256
|
488
|
+
if line_number_diff == 255:
|
489
|
+
continue
|
490
|
+
line_number += line_number_diff
|
491
|
+
offset += offset_diff
|
492
|
+
uncompressed_lnotab[offset] = line_number
|
493
|
+
|
494
|
+
return uncompressed_lnotab
|
495
|
+
|
496
|
+
|
497
|
+
def is_code_ok(asm: Assembler) -> bool:
|
498
|
+
"""
|
499
|
+
Performs some sanity checks on code.
|
500
|
+
"""
|
501
|
+
|
502
|
+
is_valid: bool = True
|
503
|
+
|
504
|
+
code = asm.code
|
505
|
+
last_instruction = code.instructions[-1]
|
506
|
+
last_offset = last_instruction.offset
|
507
|
+
if last_instruction.opname not in ("RETURN_VALUE", "RERAISE", "RAISE_VARARGS"):
|
508
|
+
warn(
|
509
|
+
f"Last instruction of at offset {last_offset} of {code.co_name}"
|
510
|
+
f' should be "RETURN_VALUE", is "{last_instruction.opname}"'
|
511
|
+
)
|
512
|
+
is_valid = False
|
513
|
+
|
514
|
+
cells_free_len = len(code.co_freevars) + len(code.co_cellvars)
|
515
|
+
consts_len = len(code.co_consts)
|
516
|
+
names_len = len(code.co_names)
|
517
|
+
varnames_len = len(code.co_varnames)
|
518
|
+
|
519
|
+
for i, inst in enumerate(code.instructions):
|
520
|
+
if xdis.op_has_argument(inst.opcode, asm.opc):
|
521
|
+
if is_int(inst.arg):
|
522
|
+
if inst.opcode == asm.opc.EXTENDED_ARG:
|
523
|
+
continue
|
524
|
+
operand = inst.arg
|
525
|
+
if inst.opcode in asm.opc.CONST_OPS:
|
526
|
+
# FIXME: DRY operand check
|
527
|
+
if operand >= consts_len:
|
528
|
+
print(inst)
|
529
|
+
warn(
|
530
|
+
f"Constant operand index {operand} at offset {inst.offset} of {code.co_name} "
|
531
|
+
f"is too large; it should be less than {consts_len}."
|
532
|
+
)
|
533
|
+
is_valid = False
|
534
|
+
elif inst.opcode in asm.opc.LOCAL_OPS:
|
535
|
+
if operand >= varnames_len:
|
536
|
+
print(inst)
|
537
|
+
warn(
|
538
|
+
f"Variable operand index {operand} at offset {inst.offset} of {code.co_name} "
|
539
|
+
f"is too large; it should be less than {varnames_len}."
|
540
|
+
)
|
541
|
+
is_valid = False
|
542
|
+
elif inst.opcode in asm.opc.NAME_OPS:
|
543
|
+
if operand >= names_len:
|
544
|
+
print(inst)
|
545
|
+
warn(
|
546
|
+
f"Name operand index {operand} at offset {inst.offset} of {code.co_name} "
|
547
|
+
f"is too large; it should be less than {names_len}."
|
548
|
+
)
|
549
|
+
is_valid = False
|
550
|
+
elif inst.opcode in asm.opc.FREE_OPS:
|
551
|
+
# FIXME: is this right?
|
552
|
+
if operand >= cells_free_len:
|
553
|
+
print(inst)
|
554
|
+
warn(
|
555
|
+
f"Free operand index {operand} at offset {inst.offset} of {code.co_name} "
|
556
|
+
f"is too large; it should be less than {cells_free_len}."
|
557
|
+
)
|
558
|
+
is_valid = False
|
559
|
+
|
560
|
+
return is_valid
|
561
|
+
|
562
|
+
|
563
|
+
def append_operand(
|
564
|
+
bytecode: list, arg_value, extended_arg_shift, arg_max_value, extended_arg_op
|
565
|
+
) -> None:
|
566
|
+
"""
|
567
|
+
Write instruction operand adding EXTENDED_ARG instructions
|
568
|
+
when necessary.
|
569
|
+
"""
|
570
|
+
arg_shifts = []
|
571
|
+
shift_value = 1
|
572
|
+
|
573
|
+
while arg_value > arg_max_value:
|
574
|
+
shift_value <<= extended_arg_shift
|
575
|
+
ext_arg_value, arg_value = divmod(arg_value, shift_value)
|
576
|
+
arg_shifts.append(ext_arg_value)
|
577
|
+
|
578
|
+
while arg_shifts:
|
579
|
+
bytecode.append(extended_arg_op)
|
580
|
+
ext_arg_value = arg_shifts.pop()
|
581
|
+
bytecode.append(ext_arg_value)
|
582
|
+
|
583
|
+
bytecode.append(arg_value)
|
584
|
+
|
585
|
+
|
586
|
+
def create_code(asm: Assembler, label, backpatch) -> tuple:
|
587
|
+
"""
|
588
|
+
Turn ``asm`` assembler text into a code object and
|
589
|
+
return that.
|
590
|
+
"""
|
591
|
+
# print('label: ', asm.label)
|
592
|
+
# print('backpatch: ', asm.backpatch_inst)
|
593
|
+
|
594
|
+
bytecode = []
|
595
|
+
# print(asm.code.instructions)
|
596
|
+
|
597
|
+
offset = 0
|
598
|
+
offset2label = {label[j]: j for j in label}
|
599
|
+
is_valid = True
|
600
|
+
|
601
|
+
for i, inst in enumerate(asm.code.instructions):
|
602
|
+
# Strip out extended arg instructions.
|
603
|
+
# Operands in the input can be arbitary numbers.
|
604
|
+
# In this loop we will figure out whether
|
605
|
+
# or not to add EXTENDED_ARG
|
606
|
+
if inst.opcode == asm.opc.EXTENDED_ARG:
|
607
|
+
print(
|
608
|
+
f"Line {i}: superflous EXTENDED_ARG instruction removed;"
|
609
|
+
" this code decides when they are needed."
|
610
|
+
)
|
611
|
+
continue
|
612
|
+
|
613
|
+
bytecode.append(inst.opcode)
|
614
|
+
if offset in offset2label:
|
615
|
+
if is_int(offset2label[offset]):
|
616
|
+
inst.line_no = int(offset2label[offset])
|
617
|
+
if (
|
618
|
+
inst.line_no in asm.code.co_lnotab.values()
|
619
|
+
and asm.python_version < (3, 10)
|
620
|
+
):
|
621
|
+
print(
|
622
|
+
f"Line {i}: this is not the first we encounter source-code line {inst.line_no}."
|
623
|
+
)
|
624
|
+
asm.code.co_lnotab[offset] = inst.line_no
|
625
|
+
|
626
|
+
inst.offset = offset
|
627
|
+
offset += xdis.op_size(inst.opcode, asm.opc)
|
628
|
+
|
629
|
+
if xdis.op_has_argument(inst.opcode, asm.opc):
|
630
|
+
if inst in backpatch:
|
631
|
+
target = inst.arg
|
632
|
+
match = re.match(r"^(L\d+)(?: \(to \d+\))?$", target)
|
633
|
+
if match:
|
634
|
+
target = match.group(1)
|
635
|
+
try:
|
636
|
+
if inst.opcode in asm.opc.JREL_OPS:
|
637
|
+
inst.arg = label[target] - offset
|
638
|
+
else:
|
639
|
+
inst.arg = label[target]
|
640
|
+
if asm.opc.version_tuple >= (3, 10):
|
641
|
+
inst.arg >>= 1
|
642
|
+
pass
|
643
|
+
except KeyError:
|
644
|
+
err(f"Label {target} not found.\nI know about {backpatch}", inst, i)
|
645
|
+
is_valid = False
|
646
|
+
elif is_int(inst.arg):
|
647
|
+
pass
|
648
|
+
elif inst.arg.startswith("(") and inst.arg.endswith(")"):
|
649
|
+
operand = inst.arg[1:-1]
|
650
|
+
if inst.opcode in asm.opc.COMPARE_OPS:
|
651
|
+
if operand in cmp_op:
|
652
|
+
inst.arg = cmp_op.index(operand)
|
653
|
+
else:
|
654
|
+
err(f"Can't handle compare operand {inst.arg}", inst, i)
|
655
|
+
is_valid = False
|
656
|
+
break
|
657
|
+
|
658
|
+
pass
|
659
|
+
elif inst.opcode in asm.opc.CONST_OPS:
|
660
|
+
if not (operand.startswith("<Code") or operand.startswith("<code")):
|
661
|
+
operand = ast.literal_eval(operand)
|
662
|
+
update_code_field("co_consts", operand, inst, asm.code)
|
663
|
+
elif inst.opcode in asm.opc.LOCAL_OPS:
|
664
|
+
update_code_field("co_varnames", operand, inst, asm.code)
|
665
|
+
elif inst.opcode in asm.opc.NAME_OPS:
|
666
|
+
update_code_field("co_names", operand, inst, asm.code)
|
667
|
+
elif inst.opcode in asm.opc.FREE_OPS:
|
668
|
+
if operand in asm.code.co_cellvars:
|
669
|
+
inst.arg = asm.code.co_cellvars.index(operand)
|
670
|
+
else:
|
671
|
+
update_code_field("co_freevars", operand, inst, asm.code)
|
672
|
+
else:
|
673
|
+
# from trepan.api import debug; debug()
|
674
|
+
err(f"Can't handle operand {inst.arg}", inst, i)
|
675
|
+
is_valid = False
|
676
|
+
break
|
677
|
+
else:
|
678
|
+
# from trepan.api import debug; debug()
|
679
|
+
err(
|
680
|
+
f"Don't understand operand {inst.arg} expecting int or (..)",
|
681
|
+
inst,
|
682
|
+
i,
|
683
|
+
)
|
684
|
+
|
685
|
+
append_operand(
|
686
|
+
bytecode,
|
687
|
+
inst.arg,
|
688
|
+
asm.opc.EXTENDED_ARG_SHIFT,
|
689
|
+
asm.opc.ARG_MAX_VALUE,
|
690
|
+
asm.opc.EXTENDED_ARG,
|
691
|
+
)
|
692
|
+
|
693
|
+
elif asm.opc.version_tuple >= (3, 6):
|
694
|
+
# instructions with no operand, or one-byte instructions, are padded
|
695
|
+
# to two bytes in 3.6 and later.
|
696
|
+
bytecode.append(0)
|
697
|
+
|
698
|
+
if not is_valid:
|
699
|
+
return None, False
|
700
|
+
|
701
|
+
if asm.opc.version_tuple >= (3, 0):
|
702
|
+
co_code = bytearray()
|
703
|
+
for j in bytecode:
|
704
|
+
co_code.append(j % 255)
|
705
|
+
asm.code.co_code = bytes(co_code)
|
706
|
+
else:
|
707
|
+
asm.code.co_code = "".join([chr(j) for j in bytecode])
|
708
|
+
|
709
|
+
# FIXME: get
|
710
|
+
is_code_ok(asm)
|
711
|
+
|
712
|
+
# Stamp might be added here
|
713
|
+
if asm.python_version[:2] == PYTHON_VERSION_TRIPLE[:2]:
|
714
|
+
code = asm.code.to_native()
|
715
|
+
else:
|
716
|
+
code = asm.code.freeze()
|
717
|
+
|
718
|
+
# asm.print_instructions()
|
719
|
+
|
720
|
+
# print (*args)
|
721
|
+
# co = self.Code(*args)
|
722
|
+
return code, is_valid
|