vmlinux-to-elf 1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vmlinux_to_elf/__init__.py +0 -0
- vmlinux_to_elf/core/architecture_detecter.py +226 -0
- vmlinux_to_elf/core/elf_symbolizer.py +337 -0
- vmlinux_to_elf/core/kallsyms.py +1324 -0
- vmlinux_to_elf/core/vmlinuz_decompressor.py +321 -0
- vmlinux_to_elf/scripts/kallsyms_finder.py +84 -0
- vmlinux_to_elf/scripts/run_tests.py +66 -0
- vmlinux_to_elf/scripts/vmlinux_to_elf.py +110 -0
- vmlinux_to_elf/utils/__init__.py +0 -0
- vmlinux_to_elf/utils/elf.py +1055 -0
- vmlinux_to_elf/utils/elf_tests/ls_arm32_le.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/ls_mips32_be.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/ls_mips32_le.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/ls_mips64_be.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/ls_renesas.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/ls_x64.elf +0 -0
- vmlinux_to_elf/utils/elf_tests/x86_shared_library.so +0 -0
- vmlinux_to_elf/utils/lz4_legacy.py +42 -0
- vmlinux_to_elf/utils/pretty_print.py +136 -0
- vmlinux_to_elf-1.0.dist-info/METADATA +166 -0
- vmlinux_to_elf-1.0.dist-info/RECORD +24 -0
- vmlinux_to_elf-1.0.dist-info/WHEEL +4 -0
- vmlinux_to_elf-1.0.dist-info/entry_points.txt +4 -0
- vmlinux_to_elf-1.0.dist-info/licenses/LICENSE +674 -0
|
File without changes
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- encoding: Utf-8 -*-
|
|
3
|
+
import logging
|
|
4
|
+
from collections import Counter
|
|
5
|
+
from enum import IntEnum
|
|
6
|
+
from re import DOTALL, findall
|
|
7
|
+
from time import time
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
Guess the architecture of a given binary.
|
|
12
|
+
|
|
13
|
+
For this, scan it for simple function prologues.
|
|
14
|
+
Inspiration: https://github.com/ReFirmLabs/binwalk/blob/master/src/binwalk/magic/binarch
|
|
15
|
+
|
|
16
|
+
Also, return a sequence of the spacing in bytes
|
|
17
|
+
between each detected function prologue, so that
|
|
18
|
+
it can be matched with function symbols from the
|
|
19
|
+
kallsys table and the base address at the offset
|
|
20
|
+
0 of the binary can be guessed.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ArchitectureGuessError(Exception):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ArchitectureName(IntEnum):
|
|
29
|
+
mipsle = 1
|
|
30
|
+
mipsbe = 2
|
|
31
|
+
mips64le = 3
|
|
32
|
+
mips64be = 4
|
|
33
|
+
x86 = 5
|
|
34
|
+
x86_64 = 6
|
|
35
|
+
powerpcbe = 7
|
|
36
|
+
powerpcle = 8
|
|
37
|
+
armle = 9
|
|
38
|
+
armbe = 10
|
|
39
|
+
aarch64 = 11
|
|
40
|
+
mips16e = 12
|
|
41
|
+
superhle = 13
|
|
42
|
+
superhbe = 14
|
|
43
|
+
sparc = 15
|
|
44
|
+
arcompact = 16
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Prologues taken from the binwalk file linked above
|
|
48
|
+
architecture_to_prologue_regex: dict[ArchitectureName, bytes] = {
|
|
49
|
+
ArchitectureName.mipsle: rb".\xFF\xBD\x27..[\xA0-\xBF]\xAF",
|
|
50
|
+
ArchitectureName.mipsbe: rb"\x27\xBD\xFF.\xAF[\xA0-\xBF]..",
|
|
51
|
+
ArchitectureName.mips64le: rb".\xFF\xBD\x67..[\xA0-\xBF]\xFF",
|
|
52
|
+
ArchitectureName.mips64be: rb"\x67\xBD\xFF.\xFF[\xA0-\xBF]..",
|
|
53
|
+
ArchitectureName.x86: rb"\x55\x89\xE5(?:\x83\xEC|\x57\x56)",
|
|
54
|
+
ArchitectureName.x86_64: rb"\x55\x48\x89\xE5",
|
|
55
|
+
ArchitectureName.powerpcbe: rb"\x7C\x08\x02\xA6",
|
|
56
|
+
ArchitectureName.powerpcle: rb"\xA6\x02\x08\x7C",
|
|
57
|
+
ArchitectureName.armbe: rb"\xE9\x2D..(?:[\xE0-\xEF]...){2}",
|
|
58
|
+
ArchitectureName.armle: rb"\x2D\xE9(?:...[\xE0-\xEF]){2}",
|
|
59
|
+
ArchitectureName.mips16e: rb"\xf0\x08\x64.\x01.",
|
|
60
|
+
ArchitectureName.superhle: rb"\xF6\x69\x0B\x00\xF6\x68", # This is an epilogue
|
|
61
|
+
ArchitectureName.superhbe: rb"\x69\xF6\x00\x0B\x68\xF6", # This is an epilogue
|
|
62
|
+
ArchitectureName.aarch64: rb"\xc0\x03\x5f\xd6", # This is an epilogue
|
|
63
|
+
ArchitectureName.sparc: rb"\x81\xC7\xE0\x08\x81\xE8", # This is an epilogue
|
|
64
|
+
ArchitectureName.arcompact: b"\xf1\xc0.\x1c\x48[\xb0-\xbf]", # push_s blink; st.a r??, [sp, -??]
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# From https://github.com/torvalds/linux/blob/master/include/uapi/linux/elf-em.h
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ElfMachine(IntEnum):
|
|
72
|
+
# These constants define the various ELF target machines
|
|
73
|
+
EM_NONE = 0
|
|
74
|
+
EM_M32 = 1
|
|
75
|
+
EM_SPARC = 2
|
|
76
|
+
EM_386 = 3
|
|
77
|
+
EM_68K = 4
|
|
78
|
+
EM_88K = 5
|
|
79
|
+
EM_486 = 6 # Perhaps disused
|
|
80
|
+
EM_860 = 7
|
|
81
|
+
EM_MIPS = 8 # MIPS R3000 (officially, big-endian only)
|
|
82
|
+
# Next two are historical and binaries and
|
|
83
|
+
# modules of these types will be rejected by
|
|
84
|
+
# Linux.
|
|
85
|
+
EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian
|
|
86
|
+
EM_MIPS_RS4_BE = 10 # MIPS R4000 big-endian
|
|
87
|
+
|
|
88
|
+
EM_PARISC = 15 # HPPA
|
|
89
|
+
EM_SPARC32PLUS = 18 # Sun's "v8plus"
|
|
90
|
+
EM_PPC = 20 # PowerPC
|
|
91
|
+
EM_PPC64 = 21 # PowerPC64
|
|
92
|
+
EM_SPU = 23 # Cell BE SPU
|
|
93
|
+
EM_ARM = 40 # ARM 32 bit
|
|
94
|
+
EM_SH = 42 # SuperH
|
|
95
|
+
EM_SPARCV9 = 43 # SPARC v9 64-bit
|
|
96
|
+
EM_H8_300 = 46 # Renesas H8/300
|
|
97
|
+
EM_IA_64 = 50 # HP/Intel IA-64
|
|
98
|
+
EM_X86_64 = 62 # AMD x86-64
|
|
99
|
+
EM_S390 = 22 # IBM S/390
|
|
100
|
+
EM_CRIS = 76 # Axis Communications 32-bit embedded processor
|
|
101
|
+
EM_M32R = 88 # Renesas M32R
|
|
102
|
+
EM_MN10300 = 89 # Panasonic/MEI MN10300, AM33
|
|
103
|
+
EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor
|
|
104
|
+
EM_ARCOMPACT = 93 # ARCompact processor
|
|
105
|
+
EM_XTENSA = 94 # Tensilica Xtensa Architecture
|
|
106
|
+
EM_BLACKFIN = 106 # ADI Blackfin Processor
|
|
107
|
+
EM_UNICORE = 110 # UniCore-32
|
|
108
|
+
EM_ALTERA_NIOS2 = 113 # Altera Nios II soft-core processor
|
|
109
|
+
EM_TI_C6000 = 140 # TI C6X DSPs
|
|
110
|
+
EM_HEXAGON = 164 # QUALCOMM Hexagon
|
|
111
|
+
EM_NDS32 = 167 # Andes Technology compact code size embedded RISC processor family
|
|
112
|
+
EM_AARCH64 = 183 # ARM 64 bit
|
|
113
|
+
EM_TILEPRO = 188 # Tilera TILEPro
|
|
114
|
+
EM_MICROBLAZE = 189 # Xilinx MicroBlaze
|
|
115
|
+
EM_TILEGX = 191 # Tilera TILE-Gx
|
|
116
|
+
EM_ARCV2 = 195 # ARCv2 Cores
|
|
117
|
+
EM_RISCV = 243 # RISC-V
|
|
118
|
+
EM_BPF = 247 # Linux BPF - in-kernel virtual machine
|
|
119
|
+
EM_CSKY = 252 # C-SKY
|
|
120
|
+
EM_FRV = 0x5441 # Fujitsu FR-V
|
|
121
|
+
|
|
122
|
+
# This is an interim value that we will use until the committee comes
|
|
123
|
+
# up with a final number.
|
|
124
|
+
EM_ALPHA = 0x9026
|
|
125
|
+
|
|
126
|
+
# Bogus old m32r magic number, used by old tools.
|
|
127
|
+
EM_CYGNUS_M32R = 0x9041
|
|
128
|
+
# This is the old interim value for S/390 architecture
|
|
129
|
+
EM_S390_OLD = 0xA390
|
|
130
|
+
# Also Panasonic/MEI MN10300, AM33
|
|
131
|
+
EM_CYGNUS_MN10300 = 0xBEEF
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class ArchitectureDetectionResult:
|
|
135
|
+
architecture_name: ArchitectureName
|
|
136
|
+
elf_machine: ElfMachine
|
|
137
|
+
is_64_bit: bool
|
|
138
|
+
is_big_endian: bool
|
|
139
|
+
|
|
140
|
+
def __init__(self, architecture_name: ArchitectureName):
|
|
141
|
+
self.architecture_name = architecture_name
|
|
142
|
+
|
|
143
|
+
lookup_table: dict[ArchitectureName, tuple[int, bool, bool]] = {
|
|
144
|
+
ArchitectureName.mipsle: (ElfMachine.EM_MIPS, False, False),
|
|
145
|
+
ArchitectureName.mipsbe: (ElfMachine.EM_MIPS, False, True),
|
|
146
|
+
ArchitectureName.mips64le: (ElfMachine.EM_MIPS, True, False),
|
|
147
|
+
ArchitectureName.mips64be: (ElfMachine.EM_MIPS, True, True),
|
|
148
|
+
ArchitectureName.x86: (ElfMachine.EM_386, False, False),
|
|
149
|
+
ArchitectureName.x86_64: (ElfMachine.EM_X86_64, True, False),
|
|
150
|
+
ArchitectureName.powerpcbe: (ElfMachine.EM_PPC, False, True),
|
|
151
|
+
ArchitectureName.powerpcle: (ElfMachine.EM_PPC, False, False),
|
|
152
|
+
ArchitectureName.armbe: (ElfMachine.EM_ARM, False, True),
|
|
153
|
+
ArchitectureName.armle: (ElfMachine.EM_ARM, False, False),
|
|
154
|
+
ArchitectureName.mips16e: (ElfMachine.EM_MIPS, False, True),
|
|
155
|
+
ArchitectureName.superhle: (ElfMachine.EM_SH, False, False),
|
|
156
|
+
ArchitectureName.superhbe: (ElfMachine.EM_SH, False, True),
|
|
157
|
+
ArchitectureName.aarch64: (ElfMachine.EM_AARCH64, True, False),
|
|
158
|
+
ArchitectureName.sparc: (ElfMachine.EM_SPARC, False, True),
|
|
159
|
+
ArchitectureName.arcompact: (ElfMachine.EM_ARCOMPACT, False, False),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
self.elf_machine, self.is_64_bit, self.is_big_endian = lookup_table[
|
|
163
|
+
architecture_name
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class ArchitectureDetector:
|
|
168
|
+
"""
|
|
169
|
+
Main architecture guess function
|
|
170
|
+
|
|
171
|
+
@param binary (bytes): A raw kernel blob
|
|
172
|
+
@raises ArchitectureGuessError
|
|
173
|
+
@returns ArchitectureDetectionResult
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def guess(cls, binary: bytes) -> ArchitectureDetectionResult:
|
|
178
|
+
begin_time = time()
|
|
179
|
+
|
|
180
|
+
architecture_guess = cls._guess_architecture_special(binary)
|
|
181
|
+
if not architecture_guess:
|
|
182
|
+
architecture_guess = cls._guess_architecture_common(binary)
|
|
183
|
+
|
|
184
|
+
if not architecture_guess:
|
|
185
|
+
raise ArchitectureGuessError(
|
|
186
|
+
"The architecture could not be guessed successfully"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
logging.info(
|
|
190
|
+
"[+] Guessed architecture: %s successfully in %.2f seconds"
|
|
191
|
+
% (architecture_guess.name, time() - begin_time)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return ArchitectureDetectionResult(architecture_guess)
|
|
195
|
+
|
|
196
|
+
"""
|
|
197
|
+
Guess the architecture based on special knowledge, like custom signatures or binary format
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def _guess_architecture_special(binary: bytes) -> Optional[ArchitectureName]:
|
|
202
|
+
if binary[:2] == b"MZ":
|
|
203
|
+
# Maybe UEFI boot stub ?
|
|
204
|
+
if binary[0x38:0x3C] == b"ARMd":
|
|
205
|
+
return ArchitectureName.aarch64
|
|
206
|
+
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
"""
|
|
210
|
+
Guess the architecture based on common patterns
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def _guess_architecture_common(binary: bytes) -> Optional[ArchitectureName]:
|
|
215
|
+
architecture_to_number_of_prologues: dict[ArchitectureName, int] = Counter()
|
|
216
|
+
|
|
217
|
+
for architecture, prologue in architecture_to_prologue_regex.items():
|
|
218
|
+
architecture_to_number_of_prologues[architecture] = len(
|
|
219
|
+
findall(prologue, binary, flags=DOTALL)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
best_architecture_guess, number_of_prologues = (
|
|
223
|
+
architecture_to_number_of_prologues.most_common(1)[0]
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
return None if number_of_prologues < 100 else best_architecture_guess
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- encoding: Utf-8 -*-
|
|
3
|
+
import logging
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
|
|
6
|
+
from vmlinux_to_elf.core.architecture_detecter import ArchitectureGuessError
|
|
7
|
+
from vmlinux_to_elf.core.kallsyms import KallsymsFinder, KallsymsSymbolType
|
|
8
|
+
from vmlinux_to_elf.utils.elf import (
|
|
9
|
+
SH_FLAGS,
|
|
10
|
+
SPECIAL_SECTION_INDEX,
|
|
11
|
+
ST_INFO_BINDING,
|
|
12
|
+
ST_INFO_TYPE,
|
|
13
|
+
Elf32BigEndianRelocationWithAddendTableEntry,
|
|
14
|
+
Elf32BigEndianSymbolTableEntry,
|
|
15
|
+
Elf32LittleEndianRelocationWithAddendTableEntry,
|
|
16
|
+
Elf32LittleEndianSymbolTableEntry,
|
|
17
|
+
Elf64BigEndianRelocationWithAddendTableEntry,
|
|
18
|
+
Elf64BigEndianSymbolTableEntry,
|
|
19
|
+
Elf64LittleEndianRelocationWithAddendTableEntry,
|
|
20
|
+
Elf64LittleEndianSymbolTableEntry,
|
|
21
|
+
ElfFile,
|
|
22
|
+
ElfNoBits,
|
|
23
|
+
ElfNullSection,
|
|
24
|
+
ElfProgbits,
|
|
25
|
+
ElfRela,
|
|
26
|
+
ElfStrtab,
|
|
27
|
+
ElfSymtab,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
The ElfSymbolizer class, defined in this file, gathers information from
|
|
32
|
+
the other modules (such as kallsyms_finder, which extracts the kernel's
|
|
33
|
+
runtime symbol table, or vmlinuz_decompressor, which processes possible
|
|
34
|
+
kernel compressions), in order to generate the output ELF file.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ElfSymbolizer:
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
file_contents: bytes,
|
|
42
|
+
output_file: str,
|
|
43
|
+
elf_machine: int = None,
|
|
44
|
+
bit_size: int = None,
|
|
45
|
+
base_address: int = None,
|
|
46
|
+
bss_size: int = 16,
|
|
47
|
+
file_offset: int = None,
|
|
48
|
+
override_relative: bool = None,
|
|
49
|
+
):
|
|
50
|
+
if file_contents.startswith(
|
|
51
|
+
b"\x27\x05\x19\x56"
|
|
52
|
+
): # uImage header magic (always big-endian)
|
|
53
|
+
if file_offset is None:
|
|
54
|
+
file_offset = (
|
|
55
|
+
64 # uImage header size (image_header_t from u-boot/image.h)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if base_address is None:
|
|
59
|
+
base_address = int.from_bytes(file_contents[4 * 4 : 4 * 5], "big")
|
|
60
|
+
|
|
61
|
+
if file_offset:
|
|
62
|
+
file_contents = file_contents[file_offset:]
|
|
63
|
+
|
|
64
|
+
kallsyms_finder = KallsymsFinder(
|
|
65
|
+
file_contents, bit_size, override_relative, base_address
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if elf_machine is None and not kallsyms_finder.elf_machine:
|
|
69
|
+
raise ArchitectureGuessError(
|
|
70
|
+
"The architecture could not be guessed successfully"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if file_contents.startswith(b"\x7fELF"):
|
|
74
|
+
kernel = ElfFile.from_bytes(BytesIO(file_contents))
|
|
75
|
+
|
|
76
|
+
else:
|
|
77
|
+
kernel = ElfFile(kallsyms_finder.is_big_endian, kallsyms_finder.is_64_bits)
|
|
78
|
+
|
|
79
|
+
# Previsouly the register size was based on the kernel version string: bool(kallsyms_finder.offset_table_element_size >= 8 or search('itanium|(?:amd|aarch|ia|arm|x86_|\D-)64', kallsyms_finder.version_string, flags = IGNORECASE))
|
|
80
|
+
|
|
81
|
+
if elf_machine is not None:
|
|
82
|
+
kernel.file_header.e_machine = elf_machine
|
|
83
|
+
else:
|
|
84
|
+
kernel.file_header.e_machine = kallsyms_finder.elf_machine
|
|
85
|
+
|
|
86
|
+
ET_EXEC = 2
|
|
87
|
+
kernel.file_header.e_type = ET_EXEC
|
|
88
|
+
|
|
89
|
+
null = ElfNullSection(kernel)
|
|
90
|
+
null.section_name = ""
|
|
91
|
+
|
|
92
|
+
progbits = ElfProgbits(kernel)
|
|
93
|
+
progbits.section_name = ".kernel"
|
|
94
|
+
progbits.section_header.sh_flags = (
|
|
95
|
+
SH_FLAGS.SHF_ALLOC | SH_FLAGS.SHF_EXECINSTR | SH_FLAGS.SHF_WRITE
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
first_symbol_virtual_address = next(
|
|
99
|
+
(
|
|
100
|
+
symbol.virtual_address
|
|
101
|
+
for symbol in kallsyms_finder.symbols
|
|
102
|
+
if symbol.symbol_type == KallsymsSymbolType.TEXT
|
|
103
|
+
),
|
|
104
|
+
None,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if kallsyms_finder.has_base_relative:
|
|
108
|
+
first_symbol_virtual_address = min(
|
|
109
|
+
first_symbol_virtual_address, kallsyms_finder.relative_base_address
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if base_address is not None:
|
|
113
|
+
progbits.section_header.sh_addr = base_address
|
|
114
|
+
logging.info(
|
|
115
|
+
f"[+] An explicit base address was given ({progbits.section_header.sh_addr:x})"
|
|
116
|
+
)
|
|
117
|
+
elif kallsyms_finder.kernel_text_candidate:
|
|
118
|
+
progbits.section_header.sh_addr = kallsyms_finder.kernel_text_candidate
|
|
119
|
+
logging.info(
|
|
120
|
+
f"[+] Guessed the base address using the kernel_text_candidate heuristic ({progbits.section_header.sh_addr:x})"
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
progbits.section_header.sh_addr = (
|
|
124
|
+
first_symbol_virtual_address & 0xFFFFFFFFFFFFF000
|
|
125
|
+
)
|
|
126
|
+
logging.info(
|
|
127
|
+
f"[+] Guessed the base address using the first_symbol_virtual_address fallback heuristic ({progbits.section_header.sh_addr:x})"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
kernel.sections += [null, progbits]
|
|
131
|
+
|
|
132
|
+
if kallsyms_finder.elf64_rela:
|
|
133
|
+
# Punch a hole into the ELF to remove relocation tables
|
|
134
|
+
progbits.section_header.sh_size = kallsyms_finder.elf64_rela_start
|
|
135
|
+
progbits.section_contents = file_contents[
|
|
136
|
+
: progbits.section_header.sh_size
|
|
137
|
+
]
|
|
138
|
+
progbits2 = ElfProgbits(kernel)
|
|
139
|
+
progbits2.section_name = ".kernel2"
|
|
140
|
+
progbits2.section_header.sh_flags = (
|
|
141
|
+
SH_FLAGS.SHF_ALLOC | SH_FLAGS.SHF_EXECINSTR | SH_FLAGS.SHF_WRITE
|
|
142
|
+
)
|
|
143
|
+
progbits2.section_header.sh_addr = (
|
|
144
|
+
progbits.section_header.sh_addr
|
|
145
|
+
+ kallsyms_finder.elf64_rela_end_excl
|
|
146
|
+
)
|
|
147
|
+
progbits2.section_header.sh_size = (
|
|
148
|
+
len(file_contents) - kallsyms_finder.elf64_rela_end_excl
|
|
149
|
+
)
|
|
150
|
+
progbits2.section_contents = file_contents[
|
|
151
|
+
kallsyms_finder.elf64_rela_end_excl :
|
|
152
|
+
]
|
|
153
|
+
kernel.sections += [progbits2]
|
|
154
|
+
else:
|
|
155
|
+
progbits.section_contents = file_contents
|
|
156
|
+
progbits.section_header.sh_size = len(file_contents)
|
|
157
|
+
|
|
158
|
+
bss = ElfNoBits(kernel)
|
|
159
|
+
bss.section_name = ".bss"
|
|
160
|
+
bss.section_header.sh_flags = (
|
|
161
|
+
SH_FLAGS.SHF_ALLOC | SH_FLAGS.SHF_EXECINSTR | SH_FLAGS.SHF_WRITE
|
|
162
|
+
)
|
|
163
|
+
bss.section_header.sh_size = bss_size * 1024 * 1024
|
|
164
|
+
bss.section_header.sh_addr = progbits.section_header.sh_addr + len(
|
|
165
|
+
file_contents
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
kernel.sections += [bss]
|
|
169
|
+
|
|
170
|
+
r"""
|
|
171
|
+
Find the entry point symbol. Based on executing this command
|
|
172
|
+
on the Linux tree source:
|
|
173
|
+
|
|
174
|
+
for i in $(find -iname 'vmlinux.lds.S' -o -iname 'dyn.lds.S' -o -iname 'vmlinux-std.lds');
|
|
175
|
+
do echo "$i:"$(grep -P '^ENTRY\(' $i);
|
|
176
|
+
done | grep -Po 'ENTRY\((.+?)\)' | sort -u
|
|
177
|
+
|
|
178
|
+
You can find the possible symbols that are used as an entry
|
|
179
|
+
point for the kernel, here sorted from the most specific to
|
|
180
|
+
the less specific
|
|
181
|
+
"""
|
|
182
|
+
|
|
183
|
+
POSSIBLE_ENTRY_POINT_SYMBOLS = [
|
|
184
|
+
"kernel_entry",
|
|
185
|
+
"microblaze_start",
|
|
186
|
+
"parisc_kernel_start",
|
|
187
|
+
"phys_startup_32",
|
|
188
|
+
"phys_startup_64",
|
|
189
|
+
"phys_start",
|
|
190
|
+
"_stext_lma",
|
|
191
|
+
"res_service",
|
|
192
|
+
"_c_int00",
|
|
193
|
+
"startup_32",
|
|
194
|
+
"startup_64",
|
|
195
|
+
"startup_continue",
|
|
196
|
+
"startup",
|
|
197
|
+
"__start",
|
|
198
|
+
"_start",
|
|
199
|
+
"start_kernel",
|
|
200
|
+
"stext",
|
|
201
|
+
"_stext",
|
|
202
|
+
"_text",
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
entry_point_address: int = None
|
|
206
|
+
|
|
207
|
+
for symbol_name in POSSIBLE_ENTRY_POINT_SYMBOLS:
|
|
208
|
+
symbol = kallsyms_finder.name_to_symbol.get(symbol_name)
|
|
209
|
+
|
|
210
|
+
if symbol:
|
|
211
|
+
entry_point_address = symbol.virtual_address
|
|
212
|
+
|
|
213
|
+
break
|
|
214
|
+
|
|
215
|
+
if entry_point_address is None:
|
|
216
|
+
raise ValueError("No entry point symbol found in the kallsyms")
|
|
217
|
+
|
|
218
|
+
kernel.file_header.e_entry = entry_point_address
|
|
219
|
+
|
|
220
|
+
# Add symbols
|
|
221
|
+
|
|
222
|
+
symtab = next((i for i in kernel.sections if i.section_name == ".symtab"), None)
|
|
223
|
+
|
|
224
|
+
if not symtab:
|
|
225
|
+
symtab = ElfSymtab(kernel)
|
|
226
|
+
symtab.section_name = ".symtab"
|
|
227
|
+
|
|
228
|
+
strtab = ElfStrtab(kernel)
|
|
229
|
+
strtab.section_name = ".strtab"
|
|
230
|
+
symtab.string_table = strtab
|
|
231
|
+
|
|
232
|
+
shstrtab = ElfStrtab(kernel)
|
|
233
|
+
shstrtab.section_name = ".shstrtab"
|
|
234
|
+
|
|
235
|
+
kernel.section_string_table = shstrtab
|
|
236
|
+
kernel.sections += [symtab, strtab, shstrtab]
|
|
237
|
+
|
|
238
|
+
sections = sorted(
|
|
239
|
+
[i for i in kernel.sections if i.section_header.sh_addr > 0],
|
|
240
|
+
key=lambda x: x.section_header.sh_addr,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def _find_section(address):
|
|
244
|
+
"""
|
|
245
|
+
Uses binary search to quickly find the section which the address belongs to
|
|
246
|
+
"""
|
|
247
|
+
# Set baseline and roofline hypotheses, expressed in
|
|
248
|
+
# section table indexes:
|
|
249
|
+
lower_range, upper_range = 0, len(sections) - 1
|
|
250
|
+
# Wait for the hypotheses to converge
|
|
251
|
+
while lower_range < upper_range:
|
|
252
|
+
# Mean operation to pick a new tentative hypothesis
|
|
253
|
+
# (add one to ensure to ceil-round the upper
|
|
254
|
+
# hypothesis in case of a difference of 1)
|
|
255
|
+
middle = (lower_range + upper_range + 1) // 2
|
|
256
|
+
if (
|
|
257
|
+
sections[middle].section_header.sh_addr <= address
|
|
258
|
+
): # Test the hypothesis
|
|
259
|
+
lower_range = middle # Use the hypothesis as a baseline
|
|
260
|
+
else:
|
|
261
|
+
upper_range = middle - 1 # Disqualify the hypothesis
|
|
262
|
+
if (
|
|
263
|
+
sections[lower_range].section_header.sh_addr
|
|
264
|
+
<= address
|
|
265
|
+
<= sections[lower_range].section_header.sh_addr
|
|
266
|
+
+ sections[lower_range].section_header.sh_size
|
|
267
|
+
):
|
|
268
|
+
return sections[
|
|
269
|
+
lower_range
|
|
270
|
+
] # Select the best hypothesis if it qualifies
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
elf_symbol_class = {
|
|
274
|
+
(False, False): Elf32LittleEndianSymbolTableEntry,
|
|
275
|
+
(True, False): Elf32BigEndianSymbolTableEntry,
|
|
276
|
+
(False, True): Elf64LittleEndianSymbolTableEntry,
|
|
277
|
+
(True, True): Elf64BigEndianSymbolTableEntry,
|
|
278
|
+
}[(kernel.is_big_endian, kernel.is_64_bits)]
|
|
279
|
+
|
|
280
|
+
for symbol in kallsyms_finder.symbols:
|
|
281
|
+
elf_symbol = elf_symbol_class(kernel.is_big_endian, kernel.is_64_bits)
|
|
282
|
+
|
|
283
|
+
elf_symbol.symbol_name = symbol.name
|
|
284
|
+
elf_symbol.st_value = symbol.virtual_address
|
|
285
|
+
|
|
286
|
+
if symbol.symbol_type not in (
|
|
287
|
+
KallsymsSymbolType.TEXT,
|
|
288
|
+
KallsymsSymbolType.WEAK_SYMBOL_WITH_DEFAULT,
|
|
289
|
+
):
|
|
290
|
+
elf_symbol.st_info_type = ST_INFO_TYPE.STT_OBJECT
|
|
291
|
+
else:
|
|
292
|
+
elf_symbol.st_info_type = ST_INFO_TYPE.STT_FUNC
|
|
293
|
+
|
|
294
|
+
if symbol.symbol_type in (
|
|
295
|
+
KallsymsSymbolType.WEAK_OBJECT_WITH_DEFAULT,
|
|
296
|
+
KallsymsSymbolType.WEAK_SYMBOL_WITH_DEFAULT,
|
|
297
|
+
):
|
|
298
|
+
elf_symbol.st_info_binding = ST_INFO_BINDING.STB_WEAK
|
|
299
|
+
elif symbol.is_global:
|
|
300
|
+
elf_symbol.st_info_binding = ST_INFO_BINDING.STB_GLOBAL
|
|
301
|
+
else:
|
|
302
|
+
elf_symbol.st_info_binding = ST_INFO_BINDING.STB_LOCAL
|
|
303
|
+
|
|
304
|
+
if symbol.symbol_type == KallsymsSymbolType.ABSOLUTE:
|
|
305
|
+
elf_symbol.st_shndx = SPECIAL_SECTION_INDEX.SHN_ABS
|
|
306
|
+
else:
|
|
307
|
+
elf_symbol.associated_section = _find_section(symbol.virtual_address)
|
|
308
|
+
|
|
309
|
+
symtab.symbol_table.append(elf_symbol)
|
|
310
|
+
|
|
311
|
+
if kallsyms_finder.elf64_rela:
|
|
312
|
+
srela = ElfRela(kernel)
|
|
313
|
+
srela.section_name = ".rela.dyn"
|
|
314
|
+
relocation_class = {
|
|
315
|
+
(False, False): Elf32LittleEndianRelocationWithAddendTableEntry,
|
|
316
|
+
(True, False): Elf32BigEndianRelocationWithAddendTableEntry,
|
|
317
|
+
(False, True): Elf64LittleEndianRelocationWithAddendTableEntry,
|
|
318
|
+
(True, True): Elf64BigEndianRelocationWithAddendTableEntry,
|
|
319
|
+
}[(kernel.is_big_endian, kernel.is_64_bits)]
|
|
320
|
+
srela.relocation_table = []
|
|
321
|
+
srela.symtab_section = symtab
|
|
322
|
+
kernel.sections += [srela]
|
|
323
|
+
for rela in kallsyms_finder.elf64_rela:
|
|
324
|
+
relocation = relocation_class(kernel.is_big_endian, kernel.is_64_bits)
|
|
325
|
+
|
|
326
|
+
relocation.r_offset = rela[0]
|
|
327
|
+
relocation.r_info_type = 1027
|
|
328
|
+
relocation.r_addend = rela[2]
|
|
329
|
+
|
|
330
|
+
srela.relocation_table.append(relocation)
|
|
331
|
+
|
|
332
|
+
# Save the modified ELF
|
|
333
|
+
|
|
334
|
+
with open(output_file, "wb") as fd:
|
|
335
|
+
kernel.serialize(fd)
|
|
336
|
+
|
|
337
|
+
logging.info("[+] Successfully wrote the new ELF kernel to %s" % output_file)
|