yaralyzer 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +9 -0
- yaralyzer/__init__.py +5 -3
- yaralyzer/bytes_match.py +106 -16
- yaralyzer/config.py +18 -5
- yaralyzer/decoding/bytes_decoder.py +41 -11
- yaralyzer/decoding/decoding_attempt.py +54 -17
- yaralyzer/encoding_detection/character_encodings.py +11 -7
- yaralyzer/encoding_detection/encoding_assessment.py +31 -5
- yaralyzer/encoding_detection/encoding_detector.py +43 -9
- yaralyzer/helpers/bytes_helper.py +113 -15
- yaralyzer/helpers/dict_helper.py +1 -1
- yaralyzer/helpers/file_helper.py +20 -13
- yaralyzer/helpers/rich_text_helper.py +16 -13
- yaralyzer/helpers/string_helper.py +1 -1
- yaralyzer/output/decoding_attempts_table.py +43 -9
- yaralyzer/output/file_export.py +23 -6
- yaralyzer/output/file_hashes_table.py +31 -2
- yaralyzer/output/regex_match_metrics.py +33 -8
- yaralyzer/output/rich_console.py +26 -9
- yaralyzer/util/argument_parser.py +12 -3
- yaralyzer/util/logging.py +31 -16
- yaralyzer/yara/yara_match.py +40 -17
- yaralyzer/yara/yara_rule_builder.py +55 -11
- yaralyzer/yaralyzer.py +117 -31
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.9.dist-info}/METADATA +10 -9
- yaralyzer-1.0.9.dist-info/RECORD +32 -0
- yaralyzer-1.0.7.dist-info/RECORD +0 -32
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.9.dist-info}/LICENSE +0 -0
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.9.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.7.dist-info → yaralyzer-1.0.9.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods for
|
|
2
|
+
Methods for computing and displaying various file hashes.
|
|
3
3
|
"""
|
|
4
4
|
import hashlib
|
|
5
5
|
from collections import namedtuple
|
|
@@ -18,7 +18,18 @@ def bytes_hashes_table(
|
|
|
18
18
|
title: Optional[str] = None,
|
|
19
19
|
title_justify: str = LEFT
|
|
20
20
|
) -> Table:
|
|
21
|
-
"""
|
|
21
|
+
"""
|
|
22
|
+
Build a Rich `Table` displaying the size, MD5, SHA1, and SHA256 hashes of a byte sequence.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
bytes_or_bytes_info (Union[bytes, BytesInfo]): The `bytes` to hash, or a `BytesInfo`
|
|
26
|
+
namedtuple with precomputed values.
|
|
27
|
+
title (Optional[str], optional): Optional title for the table. Defaults to `None`.
|
|
28
|
+
title_justify (str, optional): Justification for the table title. Defaults to `"LEFT"`.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Table: A Rich `Table` object with the size and hash values.
|
|
32
|
+
"""
|
|
22
33
|
if isinstance(bytes_or_bytes_info, bytes):
|
|
23
34
|
bytes_info = compute_file_hashes(bytes_or_bytes_info)
|
|
24
35
|
else:
|
|
@@ -40,6 +51,15 @@ def bytes_hashes_table(
|
|
|
40
51
|
|
|
41
52
|
|
|
42
53
|
def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
54
|
+
"""
|
|
55
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for a given byte sequence.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
_bytes (bytes): The `bytes` to hash.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values.
|
|
62
|
+
"""
|
|
43
63
|
return BytesInfo(
|
|
44
64
|
size=len(_bytes),
|
|
45
65
|
md5=hashlib.md5(_bytes).hexdigest().upper(),
|
|
@@ -49,5 +69,14 @@ def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
|
49
69
|
|
|
50
70
|
|
|
51
71
|
def compute_file_hashes_for_file(file_path) -> BytesInfo:
|
|
72
|
+
"""
|
|
73
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for the contents of a file.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
file_path (str): Path to the file to hash.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values for the file contents.
|
|
80
|
+
"""
|
|
52
81
|
with open(file_path, 'rb') as file:
|
|
53
82
|
return compute_file_hashes(file.read())
|
|
@@ -1,12 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
(e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data
|
|
4
|
-
|
|
5
|
-
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
6
|
-
were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
|
|
7
|
-
that way?
|
|
8
|
-
|
|
9
|
-
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
2
|
+
`RegexMatchMetrics` class.
|
|
10
3
|
"""
|
|
11
4
|
from collections import defaultdict
|
|
12
5
|
|
|
@@ -15,6 +8,30 @@ from yaralyzer.util.logging import log
|
|
|
15
8
|
|
|
16
9
|
|
|
17
10
|
class RegexMatchMetrics:
|
|
11
|
+
"""
|
|
12
|
+
Class to measure what we enounter as we iterate over all matches of a relatively simple byte level regex.
|
|
13
|
+
|
|
14
|
+
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
15
|
+
were some encodings have a higher pct of success than others (indicating part of our mystery data might be
|
|
16
|
+
encoded that way?
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
"Find bytes between quotes" against a relatively large pool of close to random encrypted binary data.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
match_count (int): Total number of matches found.
|
|
23
|
+
bytes_matched (int): Total number of bytes matched across all matches.
|
|
24
|
+
matches_decoded (int): Number of matches where we were able to decode at least some of the matched bytes.
|
|
25
|
+
easy_decode_count (int): Number of matches where we were able to decode the matched bytes without forcing.
|
|
26
|
+
forced_decode_count (int): Number of matches where we were only able to decode the matched bytes by forcing.
|
|
27
|
+
undecodable_count (int): Number of matches where we were unable to decode any of the matched bytes.
|
|
28
|
+
skipped_matches_lengths (defaultdict): Dictionary mapping lengths of skipped matches to their counts.
|
|
29
|
+
bytes_match_objs (list): List of `BytesMatch` objects for all matches encountered.
|
|
30
|
+
per_encoding_stats (defaultdict): Dictionary mapping encoding names to their respective `RegexMatchMetrics`.
|
|
31
|
+
|
|
32
|
+
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
33
|
+
"""
|
|
34
|
+
|
|
18
35
|
def __init__(self) -> None:
|
|
19
36
|
self.match_count = 0
|
|
20
37
|
self.bytes_matched = 0
|
|
@@ -27,12 +44,20 @@ class RegexMatchMetrics:
|
|
|
27
44
|
self.per_encoding_stats = defaultdict(lambda: RegexMatchMetrics())
|
|
28
45
|
|
|
29
46
|
def num_matches_skipped_for_being_empty(self) -> int:
|
|
47
|
+
"""Number of matches skipped for being empty (0 length)."""
|
|
30
48
|
return self.skipped_matches_lengths[0]
|
|
31
49
|
|
|
32
50
|
def num_matches_skipped_for_being_too_big(self) -> int:
|
|
51
|
+
"""Number of matches skipped for being too big to decode."""
|
|
33
52
|
return sum({k: v for k, v in self.skipped_matches_lengths.items() if k > 0}.values())
|
|
34
53
|
|
|
35
54
|
def tally_match(self, decoder: BytesDecoder) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Tally statistics from a `BytesDecoder` after it has processed a match.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
decoder (BytesDecoder): The `BytesDecoder` that processed a match.
|
|
60
|
+
"""
|
|
36
61
|
log.debug(f"Tallying {decoder.bytes_match} ({len(decoder.decodings)} decodings)")
|
|
37
62
|
self.match_count += 1
|
|
38
63
|
self.bytes_matched += decoder.bytes_match.match_length
|
yaralyzer/output/rich_console.py
CHANGED
|
@@ -81,15 +81,11 @@ YARALYZER_THEME = Theme(YARALYZER_THEME_DICT)
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
def console_width_possibilities():
|
|
84
|
+
"""Returns a list of possible console widths, the first being the current terminal width."""
|
|
84
85
|
# Subtract 2 from terminal cols just as a precaution in case things get weird
|
|
85
86
|
return [get_terminal_size().columns - 2, DEFAULT_CONSOLE_WIDTH]
|
|
86
87
|
|
|
87
88
|
|
|
88
|
-
def console_width() -> int:
|
|
89
|
-
"""Current width set in console obj"""
|
|
90
|
-
return console._width or 40
|
|
91
|
-
|
|
92
|
-
|
|
93
89
|
# Maximize output width if YARALYZER_MAXIMIZE_WIDTH is set (also can changed with --maximize-width option)
|
|
94
90
|
if is_invoked_by_pytest():
|
|
95
91
|
CONSOLE_WIDTH = DEFAULT_CONSOLE_WIDTH
|
|
@@ -103,8 +99,8 @@ CONSOLE_PRINT_BYTE_WIDTH = int(CONSOLE_WIDTH / 4.0)
|
|
|
103
99
|
console = Console(theme=YARALYZER_THEME, color_system='256', highlight=False, width=CONSOLE_WIDTH)
|
|
104
100
|
|
|
105
101
|
|
|
106
|
-
def console_print_with_fallback(_string, style=None) -> None:
|
|
107
|
-
"""
|
|
102
|
+
def console_print_with_fallback(_string: Text | str, style=None) -> None:
|
|
103
|
+
"""`rich.console.print()` with fallback to regular `print()` if there's a Rich Markup issue."""
|
|
108
104
|
try:
|
|
109
105
|
console.print(_string, style=style)
|
|
110
106
|
except MarkupError:
|
|
@@ -112,11 +108,18 @@ def console_print_with_fallback(_string, style=None) -> None:
|
|
|
112
108
|
print(_string.plain if isinstance(_string, Text) else _string)
|
|
113
109
|
|
|
114
110
|
|
|
115
|
-
def
|
|
116
|
-
|
|
111
|
+
def console_width() -> int:
|
|
112
|
+
"""Current width set in `console` object."""
|
|
113
|
+
return console._width or 40
|
|
117
114
|
|
|
118
115
|
|
|
119
116
|
def print_fatal_error_and_exit(error_message: str) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Print a fatal error message in a `Panel` and exit.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
error_message (str): The error message to display.
|
|
122
|
+
"""
|
|
120
123
|
console.line(1)
|
|
121
124
|
print_header_panel(error_message, style='bold red reverse')
|
|
122
125
|
console.line(1)
|
|
@@ -124,4 +127,18 @@ def print_fatal_error_and_exit(error_message: str) -> None:
|
|
|
124
127
|
|
|
125
128
|
|
|
126
129
|
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0, 2)) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Print a headline inside a styled Rich `Panel` to the console.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
headline (str): The text to display as the panel's headline.
|
|
135
|
+
style (str): The style to apply to the panel (e.g., color, bold, reverse).
|
|
136
|
+
expand (bool, optional): Whether the panel should expand to the full console width. Defaults to `True`.
|
|
137
|
+
padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to `(0, 2)`.
|
|
138
|
+
"""
|
|
127
139
|
console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def theme_colors_with_prefix(prefix: str) -> List[Text]:
|
|
143
|
+
"""Return a list of (name, style) `Text` objects for all styles in the theme that start with `prefix`."""
|
|
144
|
+
return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
"""Argument parsing for yaralyzer CLI tool."""
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
import sys
|
|
@@ -217,9 +218,16 @@ YaralyzerConfig.set_argument_parser(parser)
|
|
|
217
218
|
|
|
218
219
|
def parse_arguments(args: Optional[Namespace] = None):
|
|
219
220
|
"""
|
|
220
|
-
Parse command line args. Most
|
|
221
|
-
If args are passed neither rules nor a regex need be provided as it is assumed
|
|
222
|
-
the constructor will instantiate a Yaralyzer object directly.
|
|
221
|
+
Parse command line args. Most arguments can also be communicated to the app by setting env vars.
|
|
222
|
+
If `args` are passed neither rules nor a regex need be provided as it is assumed
|
|
223
|
+
the constructor will instantiate a `Yaralyzer` object directly.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
|
|
227
|
+
Defaults to `None`.
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ArgumentError: If args are invalid.
|
|
223
231
|
"""
|
|
224
232
|
if '--version' in sys.argv:
|
|
225
233
|
print(f"yaralyzer {version('yaralyzer')}")
|
|
@@ -281,6 +289,7 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
281
289
|
|
|
282
290
|
|
|
283
291
|
def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
|
|
292
|
+
"""Get the basepath (directory + filename without extension) for exported files."""
|
|
284
293
|
file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
|
|
285
294
|
args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
|
|
286
295
|
args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
|
yaralyzer/util/logging.py
CHANGED
|
@@ -1,28 +1,34 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Handle logging for `yaralyzer`.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
There's two possible log sinks other than `STDOUT`:
|
|
5
|
+
|
|
6
|
+
1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
|
|
5
7
|
2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
|
|
6
8
|
|
|
7
|
-
The regular log file at APPLICATION_LOG_PATH is where the quite verbose application logs
|
|
9
|
+
The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
|
|
8
10
|
will be written if things ever need to get that formal. For now those logs are only accessible
|
|
9
|
-
on STDOUT with the
|
|
11
|
+
on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
|
|
10
12
|
needs/wants that sort of thing.
|
|
11
13
|
|
|
12
|
-
Logs are not normally ephemeral/not written
|
|
13
|
-
the YARALYZER_LOG_DIR env var. See
|
|
14
|
-
YARALYZER_LOG_DIR to a value.
|
|
14
|
+
Logs are not normally ephemeral/not written to files but can be configured to do so by setting
|
|
15
|
+
the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
|
|
16
|
+
of setting `YARALYZER_LOG_DIR` to a value.
|
|
17
|
+
|
|
18
|
+
* [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
|
|
15
19
|
|
|
16
|
-
https://
|
|
17
|
-
https://realpython.com/python-logging/
|
|
20
|
+
* [realpython.com/python-logging/](https://realpython.com/python-logging/)
|
|
18
21
|
|
|
19
22
|
Python log levels for reference:
|
|
23
|
+
|
|
24
|
+
```
|
|
20
25
|
CRITICAL 50
|
|
21
26
|
ERROR 40
|
|
22
27
|
WARNING 30
|
|
23
28
|
INFO 20
|
|
24
29
|
DEBUG 10
|
|
25
30
|
NOTSET 0
|
|
31
|
+
```
|
|
26
32
|
"""
|
|
27
33
|
import logging
|
|
28
34
|
import sys
|
|
@@ -37,13 +43,22 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
|
|
|
37
43
|
|
|
38
44
|
|
|
39
45
|
def configure_logger(log_label: str) -> logging.Logger:
|
|
40
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
Set up a file or stream `logger` depending on the configuration.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
log_label (str): The label for the `logger`, e.g. "run" or "invocation".
|
|
51
|
+
Actual name will be `"yaralyzer.{log_label}"`.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
logging.Logger: The configured `logger`.
|
|
55
|
+
"""
|
|
41
56
|
log_name = f"yaralyzer.{log_label}"
|
|
42
57
|
logger = logging.getLogger(log_name)
|
|
43
58
|
|
|
44
59
|
if YaralyzerConfig.LOG_DIR:
|
|
45
60
|
if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
|
|
46
|
-
raise
|
|
61
|
+
raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
|
|
47
62
|
|
|
48
63
|
log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
|
|
49
64
|
log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
|
@@ -70,14 +85,14 @@ if YaralyzerConfig.LOG_DIR:
|
|
|
70
85
|
invocation_log.setLevel('INFO')
|
|
71
86
|
|
|
72
87
|
|
|
73
|
-
def log_and_print(msg: str, log_level='INFO'):
|
|
74
|
-
"""Both print and log (
|
|
88
|
+
def log_and_print(msg: str, log_level: str = 'INFO'):
|
|
89
|
+
"""Both print (to console) and log (to file) a string."""
|
|
75
90
|
log.log(logging.getLevelName(log_level), msg)
|
|
76
91
|
print(msg)
|
|
77
92
|
|
|
78
93
|
|
|
79
94
|
def log_current_config():
|
|
80
|
-
"""Write current state of YaralyzerConfig object to the logs"""
|
|
95
|
+
"""Write current state of `YaralyzerConfig` object to the logs."""
|
|
81
96
|
msg = f"{YaralyzerConfig.__name__} current attributes:\n"
|
|
82
97
|
config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
|
|
83
98
|
|
|
@@ -88,14 +103,14 @@ def log_current_config():
|
|
|
88
103
|
|
|
89
104
|
|
|
90
105
|
def log_invocation() -> None:
|
|
91
|
-
"""Log the command used to launch the yaralyzer to the invocation log"""
|
|
106
|
+
"""Log the command used to launch the `yaralyzer` to the invocation log."""
|
|
92
107
|
msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
|
|
93
108
|
log.info(msg)
|
|
94
109
|
invocation_log.info(msg)
|
|
95
110
|
|
|
96
111
|
|
|
97
112
|
def log_argparse_result(args, label: str):
|
|
98
|
-
"""Logs the result of argparse"""
|
|
113
|
+
"""Logs the result of `argparse`."""
|
|
99
114
|
args_dict = vars(args)
|
|
100
115
|
log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
|
|
101
116
|
|
yaralyzer/yara/yara_match.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Rich text decorator for YARA match dicts
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
2
|
+
Rich text decorator for YARA match dicts.
|
|
3
|
+
|
|
4
|
+
A YARA match is returned as a `dict` with this structure:
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
```
|
|
8
|
+
{
|
|
9
|
+
'tags': ['foo', 'bar'],
|
|
10
|
+
'matches': True,
|
|
11
|
+
'namespace': 'default',
|
|
12
|
+
'rule': 'my_rule',
|
|
13
|
+
'meta': {},
|
|
14
|
+
'strings': [
|
|
15
|
+
StringMatch1,
|
|
16
|
+
StringMatch2
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
```
|
|
15
20
|
"""
|
|
16
21
|
import re
|
|
17
22
|
from numbers import Number
|
|
@@ -30,11 +35,12 @@ from yaralyzer.output.rich_console import console_width, theme_colors_with_prefi
|
|
|
30
35
|
from yaralyzer.util.logging import log
|
|
31
36
|
|
|
32
37
|
MATCH_PADDING = (0, 0, 0, 1)
|
|
33
|
-
|
|
38
|
+
|
|
39
|
+
DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
|
|
34
40
|
DIGITS_REGEX = re.compile("^\\d+$")
|
|
35
41
|
HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
|
|
36
|
-
DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
|
|
37
42
|
MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
|
|
43
|
+
URL_REGEX = re.compile('^https?:')
|
|
38
44
|
|
|
39
45
|
YARA_STRING_STYLES: Dict[re.Pattern, str] = {
|
|
40
46
|
URL_REGEX: 'yara.url',
|
|
@@ -50,14 +56,21 @@ RAW_YARA_THEME_TXT.justify = CENTER
|
|
|
50
56
|
|
|
51
57
|
|
|
52
58
|
class YaraMatch:
|
|
59
|
+
"""Rich text decorator for YARA match dicts."""
|
|
60
|
+
|
|
53
61
|
def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Args:
|
|
64
|
+
match (dict): The YARA match dict.
|
|
65
|
+
matched_against_bytes_label (Text): Label indicating what bytes were matched against.
|
|
66
|
+
"""
|
|
54
67
|
self.match = match
|
|
55
68
|
self.rule_name = match['rule']
|
|
56
69
|
self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
|
|
57
70
|
self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
|
|
58
71
|
|
|
59
72
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
60
|
-
"""Renders a
|
|
73
|
+
"""Renders a rich `Panel` showing the color highlighted raw YARA match info."""
|
|
61
74
|
yield Text("\n")
|
|
62
75
|
yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
|
|
63
76
|
yield RAW_YARA_THEME_TXT
|
|
@@ -65,7 +78,16 @@ class YaraMatch:
|
|
|
65
78
|
|
|
66
79
|
|
|
67
80
|
def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
68
|
-
"""
|
|
81
|
+
"""
|
|
82
|
+
Painful/hacky way of recursively coloring a YARA match dict.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
|
|
86
|
+
depth (int): Current recursion depth (used for indentation).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Text: The rich `Text` representation of the element.
|
|
90
|
+
"""
|
|
69
91
|
indent = Text((depth + 1) * INDENT_SPACES)
|
|
70
92
|
end_indent = Text(depth * INDENT_SPACES)
|
|
71
93
|
|
|
@@ -130,6 +152,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
|
130
152
|
|
|
131
153
|
|
|
132
154
|
def _yara_string(_string: str) -> Text:
|
|
155
|
+
"""Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
|
|
133
156
|
for regex in YARA_STRING_STYLES.keys():
|
|
134
157
|
if regex.match(_string):
|
|
135
158
|
return Text(_string, YARA_STRING_STYLES[regex])
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Builds bare bones YARA rules to match strings and regex patterns.
|
|
2
|
+
Builds bare bones YARA rules to match strings and regex patterns.
|
|
3
3
|
|
|
4
|
+
Example rule string:
|
|
5
|
+
|
|
6
|
+
```
|
|
4
7
|
rule Just_A_Piano_Man {
|
|
5
8
|
meta:
|
|
6
9
|
author = "Tim"
|
|
@@ -9,19 +12,23 @@ rule Just_A_Piano_Man {
|
|
|
9
12
|
condition:
|
|
10
13
|
$hilton_producer
|
|
11
14
|
}
|
|
15
|
+
```
|
|
12
16
|
"""
|
|
13
17
|
import re
|
|
14
|
-
from typing import Optional
|
|
18
|
+
from typing import Literal, Optional
|
|
15
19
|
|
|
16
20
|
import yara
|
|
17
21
|
|
|
18
22
|
from yaralyzer.config import YARALYZE
|
|
19
23
|
from yaralyzer.util.logging import log
|
|
20
24
|
|
|
25
|
+
PatternType = Literal['hex', 'regex']
|
|
26
|
+
YaraModifierType = Literal['ascii', 'fullword', 'nocase', 'wide']
|
|
27
|
+
|
|
21
28
|
HEX = 'hex'
|
|
29
|
+
PATTERN = 'pattern'
|
|
22
30
|
REGEX = 'regex'
|
|
23
31
|
RULE = 'rule'
|
|
24
|
-
PATTERN = 'pattern'
|
|
25
32
|
UNDERSCORE = '_'
|
|
26
33
|
YARA_REGEX_MODIFIERS = ['nocase', 'ascii', 'wide', 'fullword']
|
|
27
34
|
|
|
@@ -60,12 +67,25 @@ rule {rule_name} {{
|
|
|
60
67
|
|
|
61
68
|
def yara_rule_string(
|
|
62
69
|
pattern: str,
|
|
63
|
-
pattern_type:
|
|
70
|
+
pattern_type: PatternType = REGEX,
|
|
64
71
|
rule_name: str = YARALYZE,
|
|
65
72
|
pattern_label: Optional[str] = PATTERN,
|
|
66
|
-
modifier: Optional[
|
|
73
|
+
modifier: Optional[YaraModifierType] = None
|
|
67
74
|
) -> str:
|
|
68
|
-
"""
|
|
75
|
+
"""
|
|
76
|
+
Build a YARA rule string for a given `pattern`.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
pattern (str): The string or regex pattern to match.
|
|
80
|
+
pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
|
|
81
|
+
rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
|
|
82
|
+
pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
|
|
83
|
+
modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
|
|
84
|
+
Only valid if `pattern_type` is `"regex"`.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
str: The constructed YARA rule as a string.
|
|
88
|
+
"""
|
|
69
89
|
if not (modifier is None or modifier in YARA_REGEX_MODIFIERS):
|
|
70
90
|
raise TypeError(f"Modifier '{modifier}' is not one of {YARA_REGEX_MODIFIERS}")
|
|
71
91
|
|
|
@@ -73,6 +93,8 @@ def yara_rule_string(
|
|
|
73
93
|
pattern = f"/{pattern}/"
|
|
74
94
|
elif pattern_type == HEX:
|
|
75
95
|
pattern = f"{{{pattern}}}"
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"pattern_type must be either '{REGEX}' or '{HEX}'")
|
|
76
98
|
|
|
77
99
|
if modifier:
|
|
78
100
|
pattern += f" {modifier}"
|
|
@@ -81,7 +103,8 @@ def yara_rule_string(
|
|
|
81
103
|
rule_name=rule_name,
|
|
82
104
|
pattern_label=pattern_label,
|
|
83
105
|
pattern=pattern,
|
|
84
|
-
modifier='' if modifier is None else f" {modifier}"
|
|
106
|
+
modifier='' if modifier is None else f" {modifier}"
|
|
107
|
+
)
|
|
85
108
|
|
|
86
109
|
log.debug(f"Built YARA rule: \n{rule}")
|
|
87
110
|
return rule
|
|
@@ -89,18 +112,39 @@ def yara_rule_string(
|
|
|
89
112
|
|
|
90
113
|
def build_yara_rule(
|
|
91
114
|
pattern: str,
|
|
92
|
-
pattern_type:
|
|
115
|
+
pattern_type: PatternType = REGEX,
|
|
93
116
|
rule_name: str = YARALYZE,
|
|
94
117
|
pattern_label: Optional[str] = PATTERN,
|
|
95
|
-
modifier: Optional[
|
|
118
|
+
modifier: Optional[YaraModifierType] = None
|
|
96
119
|
) -> yara.Rule:
|
|
97
|
-
"""
|
|
120
|
+
"""
|
|
121
|
+
Build a compiled `yara.Rule` object.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
pattern (str): The string or regex pattern to match.
|
|
125
|
+
pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
|
|
126
|
+
rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
|
|
127
|
+
pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
|
|
128
|
+
modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
|
|
129
|
+
Only valid if `pattern_type` is `"regex"`.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
yara.Rule: Compiled YARA rule object.
|
|
133
|
+
"""
|
|
98
134
|
rule_string = yara_rule_string(pattern, pattern_type, rule_name, pattern_label, modifier)
|
|
99
135
|
return yara.compile(source=rule_string)
|
|
100
136
|
|
|
101
137
|
|
|
102
138
|
def safe_label(_label: str) -> str:
|
|
103
|
-
"""
|
|
139
|
+
"""
|
|
140
|
+
YARA rule and pattern names can only contain alphanumeric chars.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
_label (str): The label to sanitize.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
str: A sanitized label safe for use in YARA rules.
|
|
147
|
+
"""
|
|
104
148
|
label = _label
|
|
105
149
|
|
|
106
150
|
for char, replacement in SAFE_LABEL_REPLACEMENTS.items():
|