yaralyzer 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +3 -0
- yaralyzer/__init__.py +1 -4
- yaralyzer/bytes_match.py +23 -24
- yaralyzer/config.py +13 -12
- yaralyzer/decoding/bytes_decoder.py +33 -25
- yaralyzer/decoding/decoding_attempt.py +55 -18
- yaralyzer/encoding_detection/character_encodings.py +9 -6
- yaralyzer/encoding_detection/encoding_assessment.py +26 -6
- yaralyzer/encoding_detection/encoding_detector.py +39 -10
- yaralyzer/helpers/bytes_helper.py +19 -18
- yaralyzer/helpers/file_helper.py +20 -13
- yaralyzer/helpers/rich_text_helper.py +10 -9
- yaralyzer/output/decoding_attempts_table.py +43 -9
- yaralyzer/output/file_export.py +23 -7
- yaralyzer/output/file_hashes_table.py +9 -8
- yaralyzer/output/regex_match_metrics.py +28 -6
- yaralyzer/output/rich_console.py +19 -17
- yaralyzer/util/argument_parser.py +11 -3
- yaralyzer/util/logging.py +31 -16
- yaralyzer/yara/yara_match.py +40 -17
- yaralyzer/yara/yara_rule_builder.py +55 -11
- yaralyzer/yaralyzer.py +90 -20
- {yaralyzer-1.0.8.dist-info → yaralyzer-1.0.9.dist-info}/METADATA +5 -6
- yaralyzer-1.0.9.dist-info/RECORD +32 -0
- yaralyzer-1.0.8.dist-info/RECORD +0 -32
- {yaralyzer-1.0.8.dist-info → yaralyzer-1.0.9.dist-info}/LICENSE +0 -0
- {yaralyzer-1.0.8.dist-info → yaralyzer-1.0.9.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.8.dist-info → yaralyzer-1.0.9.dist-info}/entry_points.txt +0 -0
|
@@ -218,9 +218,16 @@ YaralyzerConfig.set_argument_parser(parser)
|
|
|
218
218
|
|
|
219
219
|
def parse_arguments(args: Optional[Namespace] = None):
|
|
220
220
|
"""
|
|
221
|
-
Parse command line args. Most
|
|
222
|
-
If args are passed neither rules nor a regex need be provided as it is assumed
|
|
223
|
-
the constructor will instantiate a Yaralyzer object directly.
|
|
221
|
+
Parse command line args. Most arguments can also be communicated to the app by setting env vars.
|
|
222
|
+
If `args` are passed neither rules nor a regex need be provided as it is assumed
|
|
223
|
+
the constructor will instantiate a `Yaralyzer` object directly.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
|
|
227
|
+
Defaults to `None`.
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ArgumentError: If args are invalid.
|
|
224
231
|
"""
|
|
225
232
|
if '--version' in sys.argv:
|
|
226
233
|
print(f"yaralyzer {version('yaralyzer')}")
|
|
@@ -282,6 +289,7 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
282
289
|
|
|
283
290
|
|
|
284
291
|
def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
|
|
292
|
+
"""Get the basepath (directory + filename without extension) for exported files."""
|
|
285
293
|
file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
|
|
286
294
|
args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
|
|
287
295
|
args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
|
yaralyzer/util/logging.py
CHANGED
|
@@ -1,28 +1,34 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Handle logging for `yaralyzer`.
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
There's two possible log sinks other than `STDOUT`:
|
|
5
|
+
|
|
6
|
+
1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
|
|
5
7
|
2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
|
|
6
8
|
|
|
7
|
-
The regular log file at APPLICATION_LOG_PATH is where the quite verbose application logs
|
|
9
|
+
The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
|
|
8
10
|
will be written if things ever need to get that formal. For now those logs are only accessible
|
|
9
|
-
on STDOUT with the
|
|
11
|
+
on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
|
|
10
12
|
needs/wants that sort of thing.
|
|
11
13
|
|
|
12
|
-
Logs are not normally ephemeral/not written
|
|
13
|
-
the YARALYZER_LOG_DIR env var. See
|
|
14
|
-
YARALYZER_LOG_DIR to a value.
|
|
14
|
+
Logs are not normally ephemeral/not written to files but can be configured to do so by setting
|
|
15
|
+
the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
|
|
16
|
+
of setting `YARALYZER_LOG_DIR` to a value.
|
|
17
|
+
|
|
18
|
+
* [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
|
|
15
19
|
|
|
16
|
-
https://
|
|
17
|
-
https://realpython.com/python-logging/
|
|
20
|
+
* [realpython.com/python-logging/](https://realpython.com/python-logging/)
|
|
18
21
|
|
|
19
22
|
Python log levels for reference:
|
|
23
|
+
|
|
24
|
+
```
|
|
20
25
|
CRITICAL 50
|
|
21
26
|
ERROR 40
|
|
22
27
|
WARNING 30
|
|
23
28
|
INFO 20
|
|
24
29
|
DEBUG 10
|
|
25
30
|
NOTSET 0
|
|
31
|
+
```
|
|
26
32
|
"""
|
|
27
33
|
import logging
|
|
28
34
|
import sys
|
|
@@ -37,13 +43,22 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
|
|
|
37
43
|
|
|
38
44
|
|
|
39
45
|
def configure_logger(log_label: str) -> logging.Logger:
|
|
40
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
Set up a file or stream `logger` depending on the configuration.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
log_label (str): The label for the `logger`, e.g. "run" or "invocation".
|
|
51
|
+
Actual name will be `"yaralyzer.{log_label}"`.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
logging.Logger: The configured `logger`.
|
|
55
|
+
"""
|
|
41
56
|
log_name = f"yaralyzer.{log_label}"
|
|
42
57
|
logger = logging.getLogger(log_name)
|
|
43
58
|
|
|
44
59
|
if YaralyzerConfig.LOG_DIR:
|
|
45
60
|
if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
|
|
46
|
-
raise
|
|
61
|
+
raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
|
|
47
62
|
|
|
48
63
|
log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
|
|
49
64
|
log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
|
@@ -70,14 +85,14 @@ if YaralyzerConfig.LOG_DIR:
|
|
|
70
85
|
invocation_log.setLevel('INFO')
|
|
71
86
|
|
|
72
87
|
|
|
73
|
-
def log_and_print(msg: str, log_level='INFO'):
|
|
74
|
-
"""Both print and log (
|
|
88
|
+
def log_and_print(msg: str, log_level: str = 'INFO'):
|
|
89
|
+
"""Both print (to console) and log (to file) a string."""
|
|
75
90
|
log.log(logging.getLevelName(log_level), msg)
|
|
76
91
|
print(msg)
|
|
77
92
|
|
|
78
93
|
|
|
79
94
|
def log_current_config():
|
|
80
|
-
"""Write current state of YaralyzerConfig object to the logs."""
|
|
95
|
+
"""Write current state of `YaralyzerConfig` object to the logs."""
|
|
81
96
|
msg = f"{YaralyzerConfig.__name__} current attributes:\n"
|
|
82
97
|
config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
|
|
83
98
|
|
|
@@ -88,14 +103,14 @@ def log_current_config():
|
|
|
88
103
|
|
|
89
104
|
|
|
90
105
|
def log_invocation() -> None:
|
|
91
|
-
"""Log the command used to launch the yaralyzer to the invocation log."""
|
|
106
|
+
"""Log the command used to launch the `yaralyzer` to the invocation log."""
|
|
92
107
|
msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
|
|
93
108
|
log.info(msg)
|
|
94
109
|
invocation_log.info(msg)
|
|
95
110
|
|
|
96
111
|
|
|
97
112
|
def log_argparse_result(args, label: str):
|
|
98
|
-
"""Logs the result of argparse
|
|
113
|
+
"""Logs the result of `argparse`."""
|
|
99
114
|
args_dict = vars(args)
|
|
100
115
|
log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
|
|
101
116
|
|
yaralyzer/yara/yara_match.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Rich text decorator for YARA match dicts
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
2
|
+
Rich text decorator for YARA match dicts.
|
|
3
|
+
|
|
4
|
+
A YARA match is returned as a `dict` with this structure:
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
```
|
|
8
|
+
{
|
|
9
|
+
'tags': ['foo', 'bar'],
|
|
10
|
+
'matches': True,
|
|
11
|
+
'namespace': 'default',
|
|
12
|
+
'rule': 'my_rule',
|
|
13
|
+
'meta': {},
|
|
14
|
+
'strings': [
|
|
15
|
+
StringMatch1,
|
|
16
|
+
StringMatch2
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
```
|
|
15
20
|
"""
|
|
16
21
|
import re
|
|
17
22
|
from numbers import Number
|
|
@@ -30,11 +35,12 @@ from yaralyzer.output.rich_console import console_width, theme_colors_with_prefi
|
|
|
30
35
|
from yaralyzer.util.logging import log
|
|
31
36
|
|
|
32
37
|
MATCH_PADDING = (0, 0, 0, 1)
|
|
33
|
-
|
|
38
|
+
|
|
39
|
+
DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
|
|
34
40
|
DIGITS_REGEX = re.compile("^\\d+$")
|
|
35
41
|
HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
|
|
36
|
-
DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
|
|
37
42
|
MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
|
|
43
|
+
URL_REGEX = re.compile('^https?:')
|
|
38
44
|
|
|
39
45
|
YARA_STRING_STYLES: Dict[re.Pattern, str] = {
|
|
40
46
|
URL_REGEX: 'yara.url',
|
|
@@ -50,14 +56,21 @@ RAW_YARA_THEME_TXT.justify = CENTER
|
|
|
50
56
|
|
|
51
57
|
|
|
52
58
|
class YaraMatch:
|
|
59
|
+
"""Rich text decorator for YARA match dicts."""
|
|
60
|
+
|
|
53
61
|
def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Args:
|
|
64
|
+
match (dict): The YARA match dict.
|
|
65
|
+
matched_against_bytes_label (Text): Label indicating what bytes were matched against.
|
|
66
|
+
"""
|
|
54
67
|
self.match = match
|
|
55
68
|
self.rule_name = match['rule']
|
|
56
69
|
self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
|
|
57
70
|
self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
|
|
58
71
|
|
|
59
72
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
60
|
-
"""Renders a
|
|
73
|
+
"""Renders a rich `Panel` showing the color highlighted raw YARA match info."""
|
|
61
74
|
yield Text("\n")
|
|
62
75
|
yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
|
|
63
76
|
yield RAW_YARA_THEME_TXT
|
|
@@ -65,7 +78,16 @@ class YaraMatch:
|
|
|
65
78
|
|
|
66
79
|
|
|
67
80
|
def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
68
|
-
"""
|
|
81
|
+
"""
|
|
82
|
+
Painful/hacky way of recursively coloring a YARA match dict.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
|
|
86
|
+
depth (int): Current recursion depth (used for indentation).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Text: The rich `Text` representation of the element.
|
|
90
|
+
"""
|
|
69
91
|
indent = Text((depth + 1) * INDENT_SPACES)
|
|
70
92
|
end_indent = Text(depth * INDENT_SPACES)
|
|
71
93
|
|
|
@@ -130,6 +152,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
|
130
152
|
|
|
131
153
|
|
|
132
154
|
def _yara_string(_string: str) -> Text:
|
|
155
|
+
"""Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
|
|
133
156
|
for regex in YARA_STRING_STYLES.keys():
|
|
134
157
|
if regex.match(_string):
|
|
135
158
|
return Text(_string, YARA_STRING_STYLES[regex])
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Builds bare bones YARA rules to match strings and regex patterns.
|
|
2
|
+
Builds bare bones YARA rules to match strings and regex patterns.
|
|
3
3
|
|
|
4
|
+
Example rule string:
|
|
5
|
+
|
|
6
|
+
```
|
|
4
7
|
rule Just_A_Piano_Man {
|
|
5
8
|
meta:
|
|
6
9
|
author = "Tim"
|
|
@@ -9,19 +12,23 @@ rule Just_A_Piano_Man {
|
|
|
9
12
|
condition:
|
|
10
13
|
$hilton_producer
|
|
11
14
|
}
|
|
15
|
+
```
|
|
12
16
|
"""
|
|
13
17
|
import re
|
|
14
|
-
from typing import Optional
|
|
18
|
+
from typing import Literal, Optional
|
|
15
19
|
|
|
16
20
|
import yara
|
|
17
21
|
|
|
18
22
|
from yaralyzer.config import YARALYZE
|
|
19
23
|
from yaralyzer.util.logging import log
|
|
20
24
|
|
|
25
|
+
PatternType = Literal['hex', 'regex']
|
|
26
|
+
YaraModifierType = Literal['ascii', 'fullword', 'nocase', 'wide']
|
|
27
|
+
|
|
21
28
|
HEX = 'hex'
|
|
29
|
+
PATTERN = 'pattern'
|
|
22
30
|
REGEX = 'regex'
|
|
23
31
|
RULE = 'rule'
|
|
24
|
-
PATTERN = 'pattern'
|
|
25
32
|
UNDERSCORE = '_'
|
|
26
33
|
YARA_REGEX_MODIFIERS = ['nocase', 'ascii', 'wide', 'fullword']
|
|
27
34
|
|
|
@@ -60,12 +67,25 @@ rule {rule_name} {{
|
|
|
60
67
|
|
|
61
68
|
def yara_rule_string(
|
|
62
69
|
pattern: str,
|
|
63
|
-
pattern_type:
|
|
70
|
+
pattern_type: PatternType = REGEX,
|
|
64
71
|
rule_name: str = YARALYZE,
|
|
65
72
|
pattern_label: Optional[str] = PATTERN,
|
|
66
|
-
modifier: Optional[
|
|
73
|
+
modifier: Optional[YaraModifierType] = None
|
|
67
74
|
) -> str:
|
|
68
|
-
"""
|
|
75
|
+
"""
|
|
76
|
+
Build a YARA rule string for a given `pattern`.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
pattern (str): The string or regex pattern to match.
|
|
80
|
+
pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
|
|
81
|
+
rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
|
|
82
|
+
pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
|
|
83
|
+
modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
|
|
84
|
+
Only valid if `pattern_type` is `"regex"`.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
str: The constructed YARA rule as a string.
|
|
88
|
+
"""
|
|
69
89
|
if not (modifier is None or modifier in YARA_REGEX_MODIFIERS):
|
|
70
90
|
raise TypeError(f"Modifier '{modifier}' is not one of {YARA_REGEX_MODIFIERS}")
|
|
71
91
|
|
|
@@ -73,6 +93,8 @@ def yara_rule_string(
|
|
|
73
93
|
pattern = f"/{pattern}/"
|
|
74
94
|
elif pattern_type == HEX:
|
|
75
95
|
pattern = f"{{{pattern}}}"
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"pattern_type must be either '{REGEX}' or '{HEX}'")
|
|
76
98
|
|
|
77
99
|
if modifier:
|
|
78
100
|
pattern += f" {modifier}"
|
|
@@ -81,7 +103,8 @@ def yara_rule_string(
|
|
|
81
103
|
rule_name=rule_name,
|
|
82
104
|
pattern_label=pattern_label,
|
|
83
105
|
pattern=pattern,
|
|
84
|
-
modifier='' if modifier is None else f" {modifier}"
|
|
106
|
+
modifier='' if modifier is None else f" {modifier}"
|
|
107
|
+
)
|
|
85
108
|
|
|
86
109
|
log.debug(f"Built YARA rule: \n{rule}")
|
|
87
110
|
return rule
|
|
@@ -89,18 +112,39 @@ def yara_rule_string(
|
|
|
89
112
|
|
|
90
113
|
def build_yara_rule(
|
|
91
114
|
pattern: str,
|
|
92
|
-
pattern_type:
|
|
115
|
+
pattern_type: PatternType = REGEX,
|
|
93
116
|
rule_name: str = YARALYZE,
|
|
94
117
|
pattern_label: Optional[str] = PATTERN,
|
|
95
|
-
modifier: Optional[
|
|
118
|
+
modifier: Optional[YaraModifierType] = None
|
|
96
119
|
) -> yara.Rule:
|
|
97
|
-
"""
|
|
120
|
+
"""
|
|
121
|
+
Build a compiled `yara.Rule` object.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
pattern (str): The string or regex pattern to match.
|
|
125
|
+
pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
|
|
126
|
+
rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
|
|
127
|
+
pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
|
|
128
|
+
modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
|
|
129
|
+
Only valid if `pattern_type` is `"regex"`.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
yara.Rule: Compiled YARA rule object.
|
|
133
|
+
"""
|
|
98
134
|
rule_string = yara_rule_string(pattern, pattern_type, rule_name, pattern_label, modifier)
|
|
99
135
|
return yara.compile(source=rule_string)
|
|
100
136
|
|
|
101
137
|
|
|
102
138
|
def safe_label(_label: str) -> str:
|
|
103
|
-
"""
|
|
139
|
+
"""
|
|
140
|
+
YARA rule and pattern names can only contain alphanumeric chars.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
_label (str): The label to sanitize.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
str: A sanitized label safe for use in YARA rules.
|
|
147
|
+
"""
|
|
104
148
|
label = _label
|
|
105
149
|
|
|
106
150
|
for char, replacement in SAFE_LABEL_REPLACEMENTS.items():
|
yaralyzer/yaralyzer.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Main Yaralyzer class and alternate constructors."""
|
|
2
2
|
from os import path
|
|
3
|
-
from typing import Iterator, List, Optional, Tuple, Union
|
|
3
|
+
from typing import Callable, Iterator, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import yara
|
|
6
6
|
from rich.console import Console, ConsoleOptions, RenderResult
|
|
7
7
|
from rich.padding import Padding
|
|
8
|
+
from rich.style import Style
|
|
8
9
|
from rich.text import Text
|
|
9
10
|
|
|
10
11
|
from yaralyzer.bytes_match import BytesMatch
|
|
@@ -26,7 +27,7 @@ YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't
|
|
|
26
27
|
# TODO: might be worth introducing a Scannable namedtuple or similar
|
|
27
28
|
class Yaralyzer:
|
|
28
29
|
"""
|
|
29
|
-
Central class that handles setting up / compiling rules and reading binary data from files as needed.
|
|
30
|
+
Central class that handles setting up / compiling YARA rules and reading binary data from files as needed.
|
|
30
31
|
|
|
31
32
|
Alternate constructors are provided depending on whether:
|
|
32
33
|
|
|
@@ -38,7 +39,18 @@ class Yaralyzer:
|
|
|
38
39
|
|
|
39
40
|
* YARA rules should be read from a directory of .yara files
|
|
40
41
|
|
|
41
|
-
The real action happens in the __rich__console__() dunder method.
|
|
42
|
+
The real action happens in the `__rich__console__()` dunder method.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
bytes (bytes): The binary data to scan.
|
|
46
|
+
bytes_length (int): The length of the binary data.
|
|
47
|
+
scannable_label (str): A label for the binary data, typically the filename or a user-provided label.
|
|
48
|
+
rules (yara.Rules): The compiled YARA rules to use for scanning.
|
|
49
|
+
rules_label (str): A label for the ruleset, typically derived from filenames or user input.
|
|
50
|
+
highlight_style (str): The style to use for highlighting matches in the output.
|
|
51
|
+
non_matches (List[dict]): A list of YARA rules that did not match the binary data.
|
|
52
|
+
matches (List[YaraMatch]): A list of YaraMatch objects representing the matches found.
|
|
53
|
+
extraction_stats (RegexMatchMetrics): Metrics related to decoding attempts on matched data
|
|
42
54
|
"""
|
|
43
55
|
|
|
44
56
|
def __init__(
|
|
@@ -50,17 +62,22 @@ class Yaralyzer:
|
|
|
50
62
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
51
63
|
) -> None:
|
|
52
64
|
"""
|
|
53
|
-
Initialize a Yaralyzer instance for scanning binary data with YARA rules.
|
|
65
|
+
Initialize a `Yaralyzer` instance for scanning binary data with YARA rules.
|
|
54
66
|
|
|
55
67
|
Args:
|
|
56
|
-
rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string
|
|
68
|
+
rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string or a pre-compiled
|
|
69
|
+
`yara.Rules` object (strings will be compiled to an instance of `yara.Rules`).
|
|
57
70
|
rules_label (str): Label to identify the ruleset in output and logs.
|
|
58
|
-
scannable (Union[bytes, str]): The data to scan. If bytes
|
|
59
|
-
|
|
60
|
-
|
|
71
|
+
scannable (Union[bytes, str]): The data to scan. If it's `bytes` type then that data is scanned;
|
|
72
|
+
if it's a string it is treated as a file path to load bytes from.
|
|
73
|
+
scannable_label (Optional[str], optional): Label for the `scannable` arg data.
|
|
74
|
+
Required if `scannable` is `bytes`.
|
|
75
|
+
If `scannable` is a file path `scannable_label` will default to the file's basename.
|
|
76
|
+
highlight_style (str, optional): Style to use for highlighting matches in output.
|
|
77
|
+
Defaults to `YaralyzerConfig.HIGHLIGHT_STYLE`.
|
|
61
78
|
|
|
62
79
|
Raises:
|
|
63
|
-
TypeError: If scannable is bytes and scannable_label is not provided.
|
|
80
|
+
TypeError: If `scannable` is `bytes` and `scannable_label` is not provided.
|
|
64
81
|
"""
|
|
65
82
|
if 'args' not in vars(YaralyzerConfig):
|
|
66
83
|
YaralyzerConfig.set_default_args()
|
|
@@ -72,7 +89,7 @@ class Yaralyzer:
|
|
|
72
89
|
|
|
73
90
|
if isinstance(scannable, bytes):
|
|
74
91
|
if scannable_label is None:
|
|
75
|
-
raise TypeError("Must provide scannable_label arg when yaralyzing raw bytes")
|
|
92
|
+
raise TypeError("Must provide 'scannable_label' arg when yaralyzing raw bytes")
|
|
76
93
|
|
|
77
94
|
self.bytes: bytes = scannable
|
|
78
95
|
self.scannable_label: str = scannable_label
|
|
@@ -101,13 +118,26 @@ class Yaralyzer:
|
|
|
101
118
|
scannable: Union[bytes, str],
|
|
102
119
|
scannable_label: Optional[str] = None
|
|
103
120
|
) -> 'Yaralyzer':
|
|
104
|
-
"""
|
|
121
|
+
"""
|
|
122
|
+
Alternate constructor to load YARA rules from files and label rules with the filenames.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
yara_rules_files (List[str]): List of file paths to YARA rules files.
|
|
126
|
+
scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
|
|
127
|
+
if `str`, it is treated as a file path to load bytes from.
|
|
128
|
+
scannable_label (Optional[str], optional): Label for the `scannable` data.
|
|
129
|
+
Required if `scannable` is `bytes`. If scannable is a file path, defaults to the file's basename.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
TypeError: If `yara_rules_files` is not a list.
|
|
133
|
+
FileNotFoundError: If any file in `yara_rules_files` does not exist.
|
|
134
|
+
"""
|
|
105
135
|
if not isinstance(yara_rules_files, list):
|
|
106
136
|
raise TypeError(f"{yara_rules_files} is not a list")
|
|
107
137
|
|
|
108
138
|
for file in yara_rules_files:
|
|
109
139
|
if not path.exists(file):
|
|
110
|
-
raise
|
|
140
|
+
raise FileNotFoundError(f"'{file}' {YARA_FILE_DOES_NOT_EXIST_ERROR_MSG}")
|
|
111
141
|
|
|
112
142
|
filepaths_arg = {path.basename(file): file for file in yara_rules_files}
|
|
113
143
|
|
|
@@ -126,9 +156,21 @@ class Yaralyzer:
|
|
|
126
156
|
scannable: Union[bytes, str],
|
|
127
157
|
scannable_label: Optional[str] = None
|
|
128
158
|
) -> 'Yaralyzer':
|
|
129
|
-
"""
|
|
159
|
+
"""
|
|
160
|
+
Alternate constructor that will load all `.yara` files in `yara_rules_dir`.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
dirs (List[str]): List of directories to search for `.yara` files.
|
|
164
|
+
scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
|
|
165
|
+
if `str`, it is treated as a file path to load bytes from.
|
|
166
|
+
scannable_label (Optional[str], optional): Label for the `scannable` data.
|
|
167
|
+
Required if `scannable` is `bytes`. If scannable is a file path, defaults to the file's basename.
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
FileNotFoundError: If `dirs` is not a list of valid directories.
|
|
171
|
+
"""
|
|
130
172
|
if not (isinstance(dirs, list) and all(path.isdir(dir) for dir in dirs)):
|
|
131
|
-
raise
|
|
173
|
+
raise FileNotFoundError(f"'{dirs}' is not a list of valid directories")
|
|
132
174
|
|
|
133
175
|
rules_files = [path.join(dir, f) for dir in dirs for f in files_in_dir(dir)]
|
|
134
176
|
return cls.for_rules_files(rules_files, scannable, scannable_label)
|
|
@@ -144,7 +186,22 @@ class Yaralyzer:
|
|
|
144
186
|
pattern_label: Optional[str] = None,
|
|
145
187
|
regex_modifier: Optional[str] = None,
|
|
146
188
|
) -> 'Yaralyzer':
|
|
147
|
-
"""
|
|
189
|
+
"""
|
|
190
|
+
Alternate constructor taking regex pattern strings. Rules label defaults to the patterns joined by comma.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
patterns (List[str]): List of regex or hex patterns to build rules from.
|
|
194
|
+
patterns_type (str): Either `"regex"` or `"hex"` to indicate the type of patterns provided.
|
|
195
|
+
scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
|
|
196
|
+
if `str`, it is treated as a file path to load bytes from.
|
|
197
|
+
scannable_label (Optional[str], optional): Label for the `scannable` data.
|
|
198
|
+
Required if `scannable` is `bytes`.
|
|
199
|
+
If scannable is a file path, defaults to the file's basename.
|
|
200
|
+
rules_label (Optional[str], optional): Label for the ruleset. Defaults to the patterns joined by comma.
|
|
201
|
+
pattern_label (Optional[str], optional): Label for each pattern in the YARA rules. Defaults to "pattern".
|
|
202
|
+
regex_modifier (Optional[str], optional): Optional regex modifier (e.g. "nocase", "ascii", "wide", etc).
|
|
203
|
+
Only valid if `patterns_type` is `"regex"`.
|
|
204
|
+
"""
|
|
148
205
|
rule_strings = []
|
|
149
206
|
|
|
150
207
|
for i, pattern in enumerate(patterns):
|
|
@@ -167,7 +224,12 @@ class Yaralyzer:
|
|
|
167
224
|
console.print(self)
|
|
168
225
|
|
|
169
226
|
def match_iterator(self) -> Iterator[Tuple[BytesMatch, BytesDecoder]]:
|
|
170
|
-
"""
|
|
227
|
+
"""
|
|
228
|
+
Iterator version of `yaralyze()`.
|
|
229
|
+
|
|
230
|
+
Yields:
|
|
231
|
+
Tuple[BytesMatch, BytesDecoder]: Match and decode data tuple.
|
|
232
|
+
"""
|
|
171
233
|
self.rules.match(data=self.bytes, callback=self._yara_callback)
|
|
172
234
|
|
|
173
235
|
for yara_match in self.matches:
|
|
@@ -181,8 +243,16 @@ class Yaralyzer:
|
|
|
181
243
|
|
|
182
244
|
self._print_non_matches()
|
|
183
245
|
|
|
184
|
-
def _yara_callback(self, data: dict):
|
|
185
|
-
"""
|
|
246
|
+
def _yara_callback(self, data: dict) -> Callable:
|
|
247
|
+
"""
|
|
248
|
+
Callback invoked by `yara-python` to handle matches and non-matches as they are discovered.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
data (dict): Data provided when `yara-python` invokes the callback.
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Callable: Always returns `yara.CALLBACK_CONTINUE` to signal `yara-python` should continue processing.
|
|
255
|
+
"""
|
|
186
256
|
if data['matches']:
|
|
187
257
|
self.matches.append(YaraMatch(data, self._panel_text()))
|
|
188
258
|
else:
|
|
@@ -212,11 +282,11 @@ class Yaralyzer:
|
|
|
212
282
|
styles = [reverse_color(YARALYZER_THEME.styles[f"yara.{s}"]) for s in ('scanned', 'rules')]
|
|
213
283
|
return self.__text__(*styles)
|
|
214
284
|
|
|
215
|
-
def _filename_string(self):
|
|
285
|
+
def _filename_string(self) -> str:
|
|
216
286
|
"""The string to use when exporting this yaralyzer to SVG/HTML/etc."""
|
|
217
287
|
return str(self).replace('>', '').replace('<', '').replace(' ', '_')
|
|
218
288
|
|
|
219
|
-
def __text__(self, byte_style: str = 'yara.scanned', rule_style: str = 'yara.rules') -> Text:
|
|
289
|
+
def __text__(self, byte_style: Style | str = 'yara.scanned', rule_style: Style | str = 'yara.rules') -> Text:
|
|
220
290
|
"""Text representation of this YARA scan (__text__() was taken)."""
|
|
221
291
|
txt = Text('').append(self.scannable_label, style=byte_style or 'yara.scanned')
|
|
222
292
|
return txt.append(' scanned with <').append(self.rules_label, style=rule_style or 'yara.rules').append('>')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.9
|
|
4
4
|
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -78,7 +78,7 @@ YARA just tells you the byte position and the matched string but it can't tell y
|
|
|
78
78
|
|
|
79
79
|
Enter **The Yaralyzer**, which lets you quickly scan the regions around matches while also showing you what those regions would look like if they were forced into various character encodings.
|
|
80
80
|
|
|
81
|
-
|
|
81
|
+
**The Yaralyzer** isn't a malware reversing tool. It can't do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn't try to. It's more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).
|
|
82
82
|
|
|
83
83
|
# Installation
|
|
84
84
|
Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a marginally better solution as it guarantees any packages installed with it will be isolated from the rest of your local python environment. Of course if you don't really have a local python environment this is a moot point and you can feel free to install with `pip`/`pip3`.
|
|
@@ -86,6 +86,7 @@ Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a ma
|
|
|
86
86
|
pipx install yaralyzer
|
|
87
87
|
```
|
|
88
88
|
|
|
89
|
+
|
|
89
90
|
# Usage
|
|
90
91
|
Run `yaralyze -h` to see the command line options (screenshot below).
|
|
91
92
|
|
|
@@ -99,7 +100,7 @@ If you place a file called `.yaralyzer` in your home directory or the current wo
|
|
|
99
100
|
Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
|
|
100
101
|
|
|
101
102
|
### As A Library
|
|
102
|
-
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. It has a variety of constructors supporting:
|
|
103
|
+
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. Auto generated documentation for `Yaralyzer`'s various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/). It has a variety of [alternate constructors](https://michelcrypt4d4mus.github.io/yaralyzer/api/yaralyzer/) supporting:
|
|
103
104
|
|
|
104
105
|
1. Precompiled YARA rules
|
|
105
106
|
1. Creating a YARA rule from a string
|
|
@@ -108,7 +109,7 @@ Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyze
|
|
|
108
109
|
1. Scanning `bytes`
|
|
109
110
|
1. Scanning a file
|
|
110
111
|
|
|
111
|
-
Should you want to iterate over the `BytesMatch` (like a `re.Match` object for a YARA match) and `BytesDecoder` (tracks decoding attempt stats) objects
|
|
112
|
+
Should you want to iterate over the [`BytesMatch`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_match/) (like a `re.Match` object for a YARA match) and [`BytesDecoder`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_decoder/) (tracks decoding attempt stats) objects used by The Yaralyzer, you can do so like this:
|
|
112
113
|
|
|
113
114
|
```python
|
|
114
115
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
@@ -119,8 +120,6 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
119
120
|
do_stuff()
|
|
120
121
|
```
|
|
121
122
|
|
|
122
|
-
#### API Documentation
|
|
123
|
-
Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
|
|
124
123
|
|
|
125
124
|
# Example Output
|
|
126
125
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
.yaralyzer.example,sha256=z3_mk41xxm0Pr_8MGM7AKQG0xEFRtGcyJLboMuelRp4,3504
|
|
2
|
+
CHANGELOG.md,sha256=lepFLLmnoHWaac4ae49WqSbpqXXxge2S2mDvE2qbixE,3408
|
|
3
|
+
LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
4
|
+
yaralyzer/__init__.py,sha256=FHfzll5jfldsqx3pXVBPu9xwDqFKjEVfTL7dha9BYX8,2793
|
|
5
|
+
yaralyzer/bytes_match.py,sha256=ROMv9gK0R1bDP5IpheNyxQ44_oEJPkHn_XYwkoIYKdQ,10901
|
|
6
|
+
yaralyzer/config.py,sha256=uVT8Jjw6kViH_PvBQ0etaH3JXPWOIXgiaoAv3ompnJA,4558
|
|
7
|
+
yaralyzer/decoding/bytes_decoder.py,sha256=8uKmqXEchjhTFULrcIKk699bfbBJrwvr9A8GlzCq0Z0,10200
|
|
8
|
+
yaralyzer/decoding/decoding_attempt.py,sha256=gUroTUSgWrgD-EZH8t5vsdDk0DSPqHMt0ow947sSFok,10290
|
|
9
|
+
yaralyzer/encoding_detection/character_encodings.py,sha256=_b3Vk5abAcKVDZ7QQyrAMQODAgMjG54AjqxdSGSdaj0,5637
|
|
10
|
+
yaralyzer/encoding_detection/encoding_assessment.py,sha256=q7wa2rls5nXEioX9UqzaNk4TxdW5WKzXjQik9e9AHs4,3262
|
|
11
|
+
yaralyzer/encoding_detection/encoding_detector.py,sha256=9zV1ZA6D3z9t6-Bz2IhcmqufJ_7zGJ0Rzh2gn0fmaO8,6487
|
|
12
|
+
yaralyzer/helpers/bytes_helper.py,sha256=7l0EycirLsPl--BakAEH-P7ruAgGgu75zYEfiw0OwO4,10212
|
|
13
|
+
yaralyzer/helpers/dict_helper.py,sha256=rhyu-xlpl4yevXdLZUIgVwap0b57O9I3DNAEv8MfTlI,186
|
|
14
|
+
yaralyzer/helpers/file_helper.py,sha256=tjiwCr8EMFHHmX4R13J4Sba5xv0IWXhEGyWUvGvCSa8,1588
|
|
15
|
+
yaralyzer/helpers/list_helper.py,sha256=zX6VzJDbnyxuwQpth5Mc7k7yeJytqWPzpo1v5nXCMtE,394
|
|
16
|
+
yaralyzer/helpers/rich_text_helper.py,sha256=7h3MOORdfZ8vrfUJ5sei4GOMxyfTonxmzii_VhrJZ6U,4383
|
|
17
|
+
yaralyzer/helpers/string_helper.py,sha256=8XsvYlKn-fGhKihfJBOG6mqi5nV_8LM-IWgHzvkRgCc,933
|
|
18
|
+
yaralyzer/output/decoding_attempts_table.py,sha256=wQ3cyN9czZkC3cbwjgflSu0t4wDKGDIs5NPOE6UwBLk,5004
|
|
19
|
+
yaralyzer/output/file_export.py,sha256=iTlCYErquuy6tqBZ1_BQHxBk-6jZ2ihTnGe83HEI_5o,3300
|
|
20
|
+
yaralyzer/output/file_hashes_table.py,sha256=pKbIc1bHJIIorqk9R2gz3IhTxKJpYU1TioGgceyoxiI,2615
|
|
21
|
+
yaralyzer/output/regex_match_metrics.py,sha256=ZQjzePPXpq_g99KNQjHrRQ1N6u_OUxD32uf9xnqcOw8,4341
|
|
22
|
+
yaralyzer/output/rich_console.py,sha256=mQEK0hq2qyCzqebzNDmNTqG2O8pqwBKs_UFIC0DEvxM,5124
|
|
23
|
+
yaralyzer/util/argument_parser.py,sha256=ZOsBf5xkAWHFSWPbZt7_OdkYHIY3RIjtK1QIXOj2g6U,13281
|
|
24
|
+
yaralyzer/util/logging.py,sha256=aBvpNukwZTGOgzm_zpwWzTWFrptThk-g2cqi8D4Fkmo,4616
|
|
25
|
+
yaralyzer/yara/yara_match.py,sha256=BwWbVgYYCybT9TMhWgkT5vA54C9XJ7fAmGf6JKncjhA,5845
|
|
26
|
+
yaralyzer/yara/yara_rule_builder.py,sha256=PeuhPtO4FvXJoTegQr0NXwGpX7wxPfGzAO1tMozaZd8,4535
|
|
27
|
+
yaralyzer/yaralyzer.py,sha256=CLczlTW2ppyoChkPIGvQWwAo-5F0LG_rMEJpCy4cucg,13813
|
|
28
|
+
yaralyzer-1.0.9.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
29
|
+
yaralyzer-1.0.9.dist-info/METADATA,sha256=sN9ZZxRsjj79m5miQ535kers7OVuSYLcvB6Uuu8COqY,11255
|
|
30
|
+
yaralyzer-1.0.9.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
31
|
+
yaralyzer-1.0.9.dist-info/entry_points.txt,sha256=7LnLJrNTfql0vuctjRWwp_ZD-BYvtv9ENVipdjuT7XI,136
|
|
32
|
+
yaralyzer-1.0.9.dist-info/RECORD,,
|