yaralyzer 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +6 -0
- yaralyzer/__init__.py +5 -0
- yaralyzer/bytes_match.py +109 -18
- yaralyzer/config.py +17 -5
- yaralyzer/decoding/bytes_decoder.py +31 -9
- yaralyzer/decoding/decoding_attempt.py +7 -7
- yaralyzer/encoding_detection/character_encodings.py +2 -1
- yaralyzer/encoding_detection/encoding_assessment.py +8 -2
- yaralyzer/encoding_detection/encoding_detector.py +14 -9
- yaralyzer/helpers/bytes_helper.py +112 -15
- yaralyzer/helpers/dict_helper.py +1 -1
- yaralyzer/helpers/file_helper.py +3 -3
- yaralyzer/helpers/rich_text_helper.py +6 -4
- yaralyzer/helpers/string_helper.py +1 -1
- yaralyzer/output/file_export.py +1 -0
- yaralyzer/output/file_hashes_table.py +30 -2
- yaralyzer/output/regex_match_metrics.py +13 -10
- yaralyzer/output/rich_console.py +17 -2
- yaralyzer/util/argument_parser.py +1 -0
- yaralyzer/util/logging.py +5 -5
- yaralyzer/yaralyzer.py +39 -23
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.8.dist-info}/METADATA +8 -6
- yaralyzer-1.0.8.dist-info/RECORD +32 -0
- yaralyzer-1.0.7.dist-info/RECORD +0 -32
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.8.dist-info}/LICENSE +0 -0
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.8.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.8.dist-info}/entry_points.txt +0 -0
|
@@ -25,15 +25,35 @@ HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def get_bytes_before_and_after_match(_bytes: bytes, match: re.Match, num_before=None, num_after=None) -> bytes:
|
|
28
|
-
"""
|
|
29
|
-
Get
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
r"""
|
|
29
|
+
Get bytes before and after a regex match within a byte sequence.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
_bytes (bytes): The full byte sequence.
|
|
33
|
+
match (re.Match): The regex match object.
|
|
34
|
+
num_before (int, optional): Number of bytes before the match to include. Defaults to config.
|
|
35
|
+
num_after (int, optional): Number of bytes after the match to include. Defaults to either config or num_before value.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
bytes: The surrounding bytes including the match.
|
|
32
39
|
"""
|
|
33
40
|
return get_bytes_surrounding_range(_bytes, match.start(), match.end(), num_before, num_after)
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num_before=None, num_after=None) -> bytes:
|
|
44
|
+
r"""
|
|
45
|
+
Get bytes surrounding a specified range in a byte sequence.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
_bytes (bytes): The full byte sequence.
|
|
49
|
+
start_idx (int): Start index of the range.
|
|
50
|
+
end_idx (int): End index of the range.
|
|
51
|
+
num_before (int, optional): Number of bytes before the range. Defaults to config.
|
|
52
|
+
num_after (int, optional): Number of bytes after the range. Defaults to config.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
bytes: The surrounding bytes including the range.
|
|
56
|
+
"""
|
|
37
57
|
num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
|
|
38
58
|
num_before = num_before or YaralyzerConfig.args.surrounding_bytes
|
|
39
59
|
start_idx = max(start_idx - num_before, 0)
|
|
@@ -42,7 +62,16 @@ def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num
|
|
|
42
62
|
|
|
43
63
|
|
|
44
64
|
def clean_byte_string(bytes_array: bytes) -> str:
|
|
45
|
-
"""
|
|
65
|
+
r"""
|
|
66
|
+
Return a clean string representation of bytes, without Python's b'' or b"" wrappers.
|
|
67
|
+
e.g. '\x80\nx44' instead of "b'\x80\nx44'".
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
bytes_array (bytes): The bytes to convert.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
str: Clean string representation of the bytes.
|
|
74
|
+
"""
|
|
46
75
|
byte_printer = Console(file=StringIO())
|
|
47
76
|
byte_printer.out(bytes_array, end='')
|
|
48
77
|
bytestr = byte_printer.file.getvalue()
|
|
@@ -58,7 +87,16 @@ def clean_byte_string(bytes_array: bytes) -> str:
|
|
|
58
87
|
|
|
59
88
|
|
|
60
89
|
def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
61
|
-
"""
|
|
90
|
+
r"""
|
|
91
|
+
Return a rich Text object of raw bytes, highlighting the matched bytes.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
_bytes (bytes): The full byte sequence.
|
|
95
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Text: Rich Text object with highlighted match.
|
|
99
|
+
"""
|
|
62
100
|
surrounding_bytes_str = clean_byte_string(_bytes)
|
|
63
101
|
highlighted_bytes_str = clean_byte_string(bytes_match.bytes)
|
|
64
102
|
highlighted_bytes_str_length = len(highlighted_bytes_str)
|
|
@@ -72,6 +110,16 @@ def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
72
110
|
|
|
73
111
|
|
|
74
112
|
def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
113
|
+
r"""
|
|
114
|
+
Return a hexadecimal view of raw bytes, highlighting the matched bytes.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
_bytes (bytes): The full byte sequence.
|
|
118
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Text: Rich Text object with highlighted match in hex view.
|
|
122
|
+
"""
|
|
75
123
|
hex_str = hex_text(_bytes)
|
|
76
124
|
highlight_start_idx = bytes_match.highlight_start_idx * 3
|
|
77
125
|
highlight_end_idx = bytes_match.highlight_end_idx * 3
|
|
@@ -81,6 +129,16 @@ def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
81
129
|
|
|
82
130
|
|
|
83
131
|
def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
132
|
+
r"""
|
|
133
|
+
Return an ASCII view of raw bytes, highlighting the matched bytes.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
_bytes (bytes): The full byte sequence.
|
|
137
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Text: Rich Text object with highlighted match in ASCII view.
|
|
141
|
+
"""
|
|
84
142
|
txt = Text('', style=BYTES)
|
|
85
143
|
|
|
86
144
|
for i, b in enumerate(_bytes):
|
|
@@ -113,23 +171,54 @@ def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
113
171
|
|
|
114
172
|
|
|
115
173
|
def hex_text(_bytes: bytes) -> Text:
|
|
174
|
+
r"""
|
|
175
|
+
Return a rich Text object of the hex string for the given bytes.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
_bytes (bytes): The bytes to convert.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Text: Rich Text object of the hex string.
|
|
182
|
+
"""
|
|
116
183
|
return Text(hex_string(_bytes), style=GREY)
|
|
117
184
|
|
|
118
185
|
|
|
119
186
|
def hex_string(_bytes: bytes) -> str:
|
|
187
|
+
r"""
|
|
188
|
+
Return a hex string representation of the given bytes.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
_bytes (bytes): The bytes to convert.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
str: Hex string representation of the bytes.
|
|
195
|
+
"""
|
|
120
196
|
return ' '.join([hex(b).removeprefix('0x').rjust(2, '0') for i, b in enumerate(_bytes)])
|
|
121
197
|
|
|
122
198
|
|
|
123
199
|
def print_bytes(bytes_array: bytes, style=None) -> None:
|
|
124
|
-
"""
|
|
200
|
+
r"""
|
|
201
|
+
Print a string representation of bytes to the console.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
bytes_array (bytes): The bytes to print.
|
|
205
|
+
style (str, optional): Style to use for printing. Defaults to 'bytes'.
|
|
206
|
+
"""
|
|
125
207
|
for line in bytes_array.split(NEWLINE_BYTE):
|
|
126
208
|
console.print(escape(clean_byte_string(line)), style=style or 'bytes')
|
|
127
209
|
|
|
128
210
|
|
|
129
211
|
def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
130
|
-
"""
|
|
131
|
-
Truncate bytes to
|
|
132
|
-
For utf-16 this means
|
|
212
|
+
r"""
|
|
213
|
+
Truncate bytes to a multiple of the character width for the given encoding.
|
|
214
|
+
For example, for utf-16 this means truncating to a multiple of 2, for utf-32 to a multiple of 4.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
_bytes (bytes): The bytes to truncate.
|
|
218
|
+
encoding (str): The encoding to consider.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
bytes: Truncated bytes.
|
|
133
222
|
"""
|
|
134
223
|
char_width = encoding_width(encoding)
|
|
135
224
|
num_bytes = len(_bytes)
|
|
@@ -142,11 +231,19 @@ def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
|
142
231
|
|
|
143
232
|
|
|
144
233
|
def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
|
|
145
|
-
"""
|
|
146
|
-
Find the position of
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
234
|
+
r"""
|
|
235
|
+
Find the position of the highlighted bytes string within the surrounding bytes string.
|
|
236
|
+
|
|
237
|
+
Both arguments are string representations of binary data. This is needed because the string
|
|
238
|
+
representation of bytes can be longer than the actual bytes (e.g., '\\xcc' is 4 chars for 1 byte).
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
surrounding_bytes_str (str): String representation of the full byte sequence.
|
|
242
|
+
highlighted_bytes_str (str): String representation of the matched bytes.
|
|
243
|
+
highlighted_bytes (BytesMatch): The BytesMatch object for context.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
int: The index in the surrounding string where the highlighted bytes start, or -1 if not found.
|
|
150
247
|
"""
|
|
151
248
|
# Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
|
|
152
249
|
# in the extra bytes. Tthis isn't perfect - it's starting us at the first index into the *bytes* that's safe to
|
yaralyzer/helpers/dict_helper.py
CHANGED
yaralyzer/helpers/file_helper.py
CHANGED
|
@@ -7,12 +7,12 @@ from typing import List, Optional
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def timestamp_for_filename() -> str:
|
|
10
|
-
"""Returns a string showing current time in a file name friendly format"""
|
|
10
|
+
"""Returns a string showing current time in a file name friendly format."""
|
|
11
11
|
return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def files_in_dir(dir: str, with_extname: Optional[str] = None) -> List[str]:
|
|
15
|
-
"""paths for non dot files, optionally ending in 'with_extname'"""
|
|
15
|
+
"""paths for non dot files, optionally ending in 'with_extname'."""
|
|
16
16
|
files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
|
|
17
17
|
files = [file for file in files if not path.isdir(file)]
|
|
18
18
|
|
|
@@ -27,7 +27,7 @@ def files_with_extname(files: List[str], extname: str) -> List[str]:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def load_word_list(file_path):
|
|
30
|
-
"""For very simple files (1 col CSVs, if you
|
|
30
|
+
"""For very simple files (1 col CSVs, if you will)."""
|
|
31
31
|
with open(file_path, 'r') as f:
|
|
32
32
|
return [line.rstrip().lstrip() for line in f.readlines()]
|
|
33
33
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Methods to handle turning various objects into Rich text/table/etc representations
|
|
3
|
+
|
|
3
4
|
Rich colors: https://rich.readthedocs.io/en/stable/appendix/colors.html
|
|
4
5
|
TODO: interesting colors # row_styles[0] = 'reverse bold on color(144)' <-
|
|
5
6
|
"""
|
|
@@ -41,12 +42,12 @@ def na_txt(style: Union[str, Style] = 'white'):
|
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def prefix_with_style(_str: str, style: str, root_style=None) -> Text:
|
|
44
|
-
"""Sometimes you need a Text() object to start plain lest the underline or whatever last forever"""
|
|
45
|
+
"""Sometimes you need a Text() object to start plain lest the underline or whatever last forever."""
|
|
45
46
|
return Text('', style=root_style or 'white') + Text(_str, style)
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def meter_style(meter_pct):
|
|
49
|
-
"""For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer"""
|
|
50
|
+
"""For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer."""
|
|
50
51
|
if meter_pct > 100 or meter_pct < 0:
|
|
51
52
|
log.warning(f"Invalid meter_pct: {meter_pct}")
|
|
52
53
|
|
|
@@ -81,7 +82,7 @@ def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
def reverse_color(style: Style) -> Style:
|
|
84
|
-
"""Reverses the color for a given style"""
|
|
85
|
+
"""Reverses the color for a given style."""
|
|
85
86
|
return Style(color=style.bgcolor, bgcolor=style.color, underline=style.underline, bold=style.bold)
|
|
86
87
|
|
|
87
88
|
|
|
@@ -104,7 +105,7 @@ def show_color_theme(styles: dict) -> None:
|
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
def size_text(num_bytes: int) -> Text:
|
|
107
|
-
"""Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)"""
|
|
108
|
+
"""Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)."""
|
|
108
109
|
kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
|
|
109
110
|
kb_txt.append(' kb ')
|
|
110
111
|
bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
|
|
@@ -116,4 +117,5 @@ def size_in_bytes_text(num_bytes: int) -> Text:
|
|
|
116
117
|
|
|
117
118
|
|
|
118
119
|
def newline_join(texts: List[Text]) -> Text:
|
|
120
|
+
"""Join a list of Text objects with newlines between them."""
|
|
119
121
|
return Text("\n").join(texts)
|
|
@@ -17,7 +17,7 @@ def line_count(_string: str) -> int:
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def hex_to_string(_string: str) -> str:
|
|
20
|
-
"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
|
|
20
|
+
r"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
|
|
21
21
|
return bytearray.fromhex(_string.replace(' ', '')).decode()
|
|
22
22
|
|
|
23
23
|
|
yaralyzer/output/file_export.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods for
|
|
2
|
+
Methods for computing and displaying various file hashes.
|
|
3
3
|
"""
|
|
4
4
|
import hashlib
|
|
5
5
|
from collections import namedtuple
|
|
@@ -18,7 +18,17 @@ def bytes_hashes_table(
|
|
|
18
18
|
title: Optional[str] = None,
|
|
19
19
|
title_justify: str = LEFT
|
|
20
20
|
) -> Table:
|
|
21
|
-
"""
|
|
21
|
+
"""
|
|
22
|
+
Build a Rich Table displaying the size, MD5, SHA1, and SHA256 hashes of a byte sequence.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
bytes_or_bytes_info (Union[bytes, BytesInfo]): The bytes to hash, or a BytesInfo namedtuple with precomputed values.
|
|
26
|
+
title (Optional[str], optional): Optional title for the table. Defaults to None.
|
|
27
|
+
title_justify (str, optional): Justification for the table title. Defaults to LEFT.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Table: A Rich Table object with the size and hash values.
|
|
31
|
+
"""
|
|
22
32
|
if isinstance(bytes_or_bytes_info, bytes):
|
|
23
33
|
bytes_info = compute_file_hashes(bytes_or_bytes_info)
|
|
24
34
|
else:
|
|
@@ -40,6 +50,15 @@ def bytes_hashes_table(
|
|
|
40
50
|
|
|
41
51
|
|
|
42
52
|
def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
53
|
+
"""
|
|
54
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for a given byte sequence.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
_bytes (bytes): The bytes to hash.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
BytesInfo: Namedtuple containing size, md5, sha1, and sha256 values.
|
|
61
|
+
"""
|
|
43
62
|
return BytesInfo(
|
|
44
63
|
size=len(_bytes),
|
|
45
64
|
md5=hashlib.md5(_bytes).hexdigest().upper(),
|
|
@@ -49,5 +68,14 @@ def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
|
49
68
|
|
|
50
69
|
|
|
51
70
|
def compute_file_hashes_for_file(file_path) -> BytesInfo:
|
|
71
|
+
"""
|
|
72
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for the contents of a file.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
file_path (str): Path to the file to hash.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
BytesInfo: Namedtuple containing size, md5, sha1, and sha256 values for the file contents.
|
|
79
|
+
"""
|
|
52
80
|
with open(file_path, 'rb') as file:
|
|
53
81
|
return compute_file_hashes(file.read())
|
|
@@ -1,13 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Class to measure what we enounter as we iterate over every single match of a relatively simple byte level regex
|
|
3
|
-
(e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data
|
|
4
|
-
|
|
5
|
-
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
6
|
-
were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
|
|
7
|
-
that way?
|
|
8
|
-
|
|
9
|
-
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
10
|
-
"""
|
|
1
|
+
"""RegexMatchMetrics class."""
|
|
11
2
|
from collections import defaultdict
|
|
12
3
|
|
|
13
4
|
from yaralyzer.decoding.bytes_decoder import BytesDecoder
|
|
@@ -15,6 +6,18 @@ from yaralyzer.util.logging import log
|
|
|
15
6
|
|
|
16
7
|
|
|
17
8
|
class RegexMatchMetrics:
|
|
9
|
+
"""
|
|
10
|
+
Class to measure what we enounter as we iterate over every single match of a relatively simple byte level regex.
|
|
11
|
+
|
|
12
|
+
(e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data.
|
|
13
|
+
|
|
14
|
+
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
15
|
+
were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
|
|
16
|
+
that way?
|
|
17
|
+
|
|
18
|
+
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
19
|
+
"""
|
|
20
|
+
|
|
18
21
|
def __init__(self) -> None:
|
|
19
22
|
self.match_count = 0
|
|
20
23
|
self.bytes_matched = 0
|
yaralyzer/output/rich_console.py
CHANGED
|
@@ -81,12 +81,13 @@ YARALYZER_THEME = Theme(YARALYZER_THEME_DICT)
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
def console_width_possibilities():
|
|
84
|
+
"""Returns a list of possible console widths, the first being the current terminal width."""
|
|
84
85
|
# Subtract 2 from terminal cols just as a precaution in case things get weird
|
|
85
86
|
return [get_terminal_size().columns - 2, DEFAULT_CONSOLE_WIDTH]
|
|
86
87
|
|
|
87
88
|
|
|
88
89
|
def console_width() -> int:
|
|
89
|
-
"""Current width set in console obj"""
|
|
90
|
+
"""Current width set in console obj."""
|
|
90
91
|
return console._width or 40
|
|
91
92
|
|
|
92
93
|
|
|
@@ -104,7 +105,7 @@ console = Console(theme=YARALYZER_THEME, color_system='256', highlight=False, wi
|
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
def console_print_with_fallback(_string, style=None) -> None:
|
|
107
|
-
"""Fallback to regular print() if there's a Markup issue"""
|
|
108
|
+
"""Fallback to regular print() if there's a Markup issue."""
|
|
108
109
|
try:
|
|
109
110
|
console.print(_string, style=style)
|
|
110
111
|
except MarkupError:
|
|
@@ -113,10 +114,12 @@ def console_print_with_fallback(_string, style=None) -> None:
|
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
def theme_colors_with_prefix(prefix: str) -> List[Text]:
|
|
117
|
+
"""Return a list of (name, style) Text objects for all styles in the theme that start with 'prefix'."""
|
|
116
118
|
return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
|
|
117
119
|
|
|
118
120
|
|
|
119
121
|
def print_fatal_error_and_exit(error_message: str) -> None:
|
|
122
|
+
"""Print a fatal error message in a panel and exit."""
|
|
120
123
|
console.line(1)
|
|
121
124
|
print_header_panel(error_message, style='bold red reverse')
|
|
122
125
|
console.line(1)
|
|
@@ -124,4 +127,16 @@ def print_fatal_error_and_exit(error_message: str) -> None:
|
|
|
124
127
|
|
|
125
128
|
|
|
126
129
|
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0, 2)) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Print a headline inside a styled Rich Panel to the console.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
headline (str): The text to display as the panel's headline.
|
|
135
|
+
style (str): The style to apply to the panel (e.g., color, bold, reverse).
|
|
136
|
+
expand (bool, optional): Whether the panel should expand to the full console width. Defaults to True.
|
|
137
|
+
padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to (0, 2).
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
None
|
|
141
|
+
"""
|
|
127
142
|
console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
|
yaralyzer/util/logging.py
CHANGED
|
@@ -37,7 +37,7 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def configure_logger(log_label: str) -> logging.Logger:
|
|
40
|
-
"""Set up a file or stream logger depending on the configuration"""
|
|
40
|
+
"""Set up a file or stream logger depending on the configuration."""
|
|
41
41
|
log_name = f"yaralyzer.{log_label}"
|
|
42
42
|
logger = logging.getLogger(log_name)
|
|
43
43
|
|
|
@@ -71,13 +71,13 @@ if YaralyzerConfig.LOG_DIR:
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def log_and_print(msg: str, log_level='INFO'):
|
|
74
|
-
"""Both print and log (at INFO level) a string"""
|
|
74
|
+
"""Both print and log (at INFO level) a string."""
|
|
75
75
|
log.log(logging.getLevelName(log_level), msg)
|
|
76
76
|
print(msg)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
def log_current_config():
|
|
80
|
-
"""Write current state of YaralyzerConfig object to the logs"""
|
|
80
|
+
"""Write current state of YaralyzerConfig object to the logs."""
|
|
81
81
|
msg = f"{YaralyzerConfig.__name__} current attributes:\n"
|
|
82
82
|
config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
|
|
83
83
|
|
|
@@ -88,14 +88,14 @@ def log_current_config():
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
def log_invocation() -> None:
|
|
91
|
-
"""Log the command used to launch the yaralyzer to the invocation log"""
|
|
91
|
+
"""Log the command used to launch the yaralyzer to the invocation log."""
|
|
92
92
|
msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
|
|
93
93
|
log.info(msg)
|
|
94
94
|
invocation_log.info(msg)
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
def log_argparse_result(args, label: str):
|
|
98
|
-
"""Logs the result of argparse"""
|
|
98
|
+
"""Logs the result of argparse."""
|
|
99
99
|
args_dict = vars(args)
|
|
100
100
|
log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
|
|
101
101
|
|
yaralyzer/yaralyzer.py
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Central class that handles setting up / compiling rules and reading binary data from files as needed.
|
|
3
|
-
Alternate constructors are provided depending on whether:
|
|
4
|
-
1. YARA rules are already compiled
|
|
5
|
-
2. YARA rules should be compiled from a string
|
|
6
|
-
3. YARA rules should be read from a file
|
|
7
|
-
4. YARA rules should be read from a directory of .yara files
|
|
8
|
-
|
|
9
|
-
The real action happens in the __rich__console__() dunder method.
|
|
10
|
-
"""
|
|
1
|
+
"""Main Yaralyzer class and alternate constructors."""
|
|
11
2
|
from os import path
|
|
12
3
|
from typing import Iterator, List, Optional, Tuple, Union
|
|
13
4
|
|
|
@@ -34,6 +25,22 @@ YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't
|
|
|
34
25
|
|
|
35
26
|
# TODO: might be worth introducing a Scannable namedtuple or similar
|
|
36
27
|
class Yaralyzer:
|
|
28
|
+
"""
|
|
29
|
+
Central class that handles setting up / compiling rules and reading binary data from files as needed.
|
|
30
|
+
|
|
31
|
+
Alternate constructors are provided depending on whether:
|
|
32
|
+
|
|
33
|
+
* YARA rules are already compiled
|
|
34
|
+
|
|
35
|
+
* YARA rules should be compiled from a string
|
|
36
|
+
|
|
37
|
+
* YARA rules should be read from a file
|
|
38
|
+
|
|
39
|
+
* YARA rules should be read from a directory of .yara files
|
|
40
|
+
|
|
41
|
+
The real action happens in the __rich__console__() dunder method.
|
|
42
|
+
"""
|
|
43
|
+
|
|
37
44
|
def __init__(
|
|
38
45
|
self,
|
|
39
46
|
rules: Union[str, yara.Rules],
|
|
@@ -43,10 +50,17 @@ class Yaralyzer:
|
|
|
43
50
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
44
51
|
) -> None:
|
|
45
52
|
"""
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
Initialize a Yaralyzer instance for scanning binary data with YARA rules.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string (YARA rule source) or a pre-compiled yara.Rules object. If a string is provided, it will be compiled.
|
|
57
|
+
rules_label (str): Label to identify the ruleset in output and logs.
|
|
58
|
+
scannable (Union[bytes, str]): The data to scan. If bytes, raw data is scanned; if str, it is treated as a file path to load bytes from.
|
|
59
|
+
scannable_label (Optional[str], optional): Label for the scannable data. Required if scannable is bytes. If scannable is a file path, defaults to the file's basename.
|
|
60
|
+
highlight_style (str, optional): Style to use for highlighting matches in output. Defaults to YaralyzerConfig.HIGHLIGHT_STYLE.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
TypeError: If scannable is bytes and scannable_label is not provided.
|
|
50
64
|
"""
|
|
51
65
|
if 'args' not in vars(YaralyzerConfig):
|
|
52
66
|
YaralyzerConfig.set_default_args()
|
|
@@ -87,7 +101,7 @@ class Yaralyzer:
|
|
|
87
101
|
scannable: Union[bytes, str],
|
|
88
102
|
scannable_label: Optional[str] = None
|
|
89
103
|
) -> 'Yaralyzer':
|
|
90
|
-
"""Alternate constructor
|
|
104
|
+
"""Alternate constructor to load yara rules from files and label rules with the filenames."""
|
|
91
105
|
if not isinstance(yara_rules_files, list):
|
|
92
106
|
raise TypeError(f"{yara_rules_files} is not a list")
|
|
93
107
|
|
|
@@ -112,7 +126,7 @@ class Yaralyzer:
|
|
|
112
126
|
scannable: Union[bytes, str],
|
|
113
127
|
scannable_label: Optional[str] = None
|
|
114
128
|
) -> 'Yaralyzer':
|
|
115
|
-
"""Alternate constructor that will load all .yara files in yara_rules_dir"""
|
|
129
|
+
"""Alternate constructor that will load all .yara files in yara_rules_dir."""
|
|
116
130
|
if not (isinstance(dirs, list) and all(path.isdir(dir) for dir in dirs)):
|
|
117
131
|
raise TypeError(f"'{dirs}' is not a list of valid directories")
|
|
118
132
|
|
|
@@ -130,7 +144,7 @@ class Yaralyzer:
|
|
|
130
144
|
pattern_label: Optional[str] = None,
|
|
131
145
|
regex_modifier: Optional[str] = None,
|
|
132
146
|
) -> 'Yaralyzer':
|
|
133
|
-
"""Constructor taking regex pattern strings. Rules label defaults to patterns joined by comma"""
|
|
147
|
+
"""Constructor taking regex pattern strings. Rules label defaults to patterns joined by comma."""
|
|
134
148
|
rule_strings = []
|
|
135
149
|
|
|
136
150
|
for i, pattern in enumerate(patterns):
|
|
@@ -149,7 +163,7 @@ class Yaralyzer:
|
|
|
149
163
|
return cls(rules_string, rules_label, scannable, scannable_label)
|
|
150
164
|
|
|
151
165
|
def yaralyze(self) -> None:
|
|
152
|
-
"""Use YARA to find matches and then force decode them"""
|
|
166
|
+
"""Use YARA to find matches and then force decode them."""
|
|
153
167
|
console.print(self)
|
|
154
168
|
|
|
155
169
|
def match_iterator(self) -> Iterator[Tuple[BytesMatch, BytesDecoder]]:
|
|
@@ -168,6 +182,7 @@ class Yaralyzer:
|
|
|
168
182
|
self._print_non_matches()
|
|
169
183
|
|
|
170
184
|
def _yara_callback(self, data: dict):
|
|
185
|
+
"""YARA callback to handle matches and non-matches as they are discovered."""
|
|
171
186
|
if data['matches']:
|
|
172
187
|
self.matches.append(YaraMatch(data, self._panel_text()))
|
|
173
188
|
else:
|
|
@@ -176,7 +191,7 @@ class Yaralyzer:
|
|
|
176
191
|
return yara.CALLBACK_CONTINUE
|
|
177
192
|
|
|
178
193
|
def _print_non_matches(self) -> None:
|
|
179
|
-
"""Print info about the YARA rules that didn't match the bytes"""
|
|
194
|
+
"""Print info about the YARA rules that didn't match the bytes."""
|
|
180
195
|
if len(self.non_matches) == 0:
|
|
181
196
|
return
|
|
182
197
|
|
|
@@ -193,21 +208,21 @@ class Yaralyzer:
|
|
|
193
208
|
console.print(Padding(Text(', ', 'white').join(non_matches_text), (0, 0, 1, 4)))
|
|
194
209
|
|
|
195
210
|
def _panel_text(self) -> Text:
|
|
196
|
-
"""Inverted colors for the panel at the top of the match section of the output"""
|
|
211
|
+
"""Inverted colors for the panel at the top of the match section of the output."""
|
|
197
212
|
styles = [reverse_color(YARALYZER_THEME.styles[f"yara.{s}"]) for s in ('scanned', 'rules')]
|
|
198
213
|
return self.__text__(*styles)
|
|
199
214
|
|
|
200
215
|
def _filename_string(self):
|
|
201
|
-
"""The string to use when exporting this yaralyzer to SVG/HTML/etc"""
|
|
216
|
+
"""The string to use when exporting this yaralyzer to SVG/HTML/etc."""
|
|
202
217
|
return str(self).replace('>', '').replace('<', '').replace(' ', '_')
|
|
203
218
|
|
|
204
219
|
def __text__(self, byte_style: str = 'yara.scanned', rule_style: str = 'yara.rules') -> Text:
|
|
205
|
-
"""Text representation of this YARA scan (__text__() was taken)"""
|
|
220
|
+
"""Text representation of this YARA scan (__text__() was taken)."""
|
|
206
221
|
txt = Text('').append(self.scannable_label, style=byte_style or 'yara.scanned')
|
|
207
222
|
return txt.append(' scanned with <').append(self.rules_label, style=rule_style or 'yara.rules').append('>')
|
|
208
223
|
|
|
209
224
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
210
|
-
"""Does the stuff. TODO: not the best place to put the core logic"""
|
|
225
|
+
"""Does the stuff. TODO: not the best place to put the core logic."""
|
|
211
226
|
yield bytes_hashes_table(self.bytes, self.scannable_label)
|
|
212
227
|
|
|
213
228
|
for _bytes_match, bytes_decoder in self.match_iterator():
|
|
@@ -215,4 +230,5 @@ class Yaralyzer:
|
|
|
215
230
|
yield attempt
|
|
216
231
|
|
|
217
232
|
def __str__(self) -> str:
|
|
233
|
+
"""Plain text (no rich colors) representation of the scan for display."""
|
|
218
234
|
return self.__text__().plain
|