yaralyzer 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .yaralyzer.example +65 -0
- CHANGELOG.md +128 -0
- LICENSE +674 -0
- yaralyzer/__init__.py +76 -0
- yaralyzer/bytes_match.py +276 -0
- yaralyzer/config.py +126 -0
- yaralyzer/decoding/bytes_decoder.py +207 -0
- yaralyzer/decoding/decoding_attempt.py +222 -0
- yaralyzer/encoding_detection/character_encodings.py +197 -0
- yaralyzer/encoding_detection/encoding_assessment.py +83 -0
- yaralyzer/encoding_detection/encoding_detector.py +145 -0
- yaralyzer/helpers/bytes_helper.py +268 -0
- yaralyzer/helpers/dict_helper.py +8 -0
- yaralyzer/helpers/file_helper.py +49 -0
- yaralyzer/helpers/list_helper.py +16 -0
- yaralyzer/helpers/rich_text_helper.py +150 -0
- yaralyzer/helpers/string_helper.py +34 -0
- yaralyzer/output/decoding_attempts_table.py +82 -0
- yaralyzer/output/decoding_table_row.py +60 -0
- yaralyzer/output/file_export.py +111 -0
- yaralyzer/output/file_hashes_table.py +82 -0
- yaralyzer/output/regex_match_metrics.py +97 -0
- yaralyzer/output/rich_console.py +114 -0
- yaralyzer/util/argument_parser.py +297 -0
- yaralyzer/util/logging.py +135 -0
- yaralyzer/yara/error.py +90 -0
- yaralyzer/yara/yara_match.py +160 -0
- yaralyzer/yara/yara_rule_builder.py +164 -0
- yaralyzer/yaralyzer.py +304 -0
- yaralyzer-1.0.11.dist-info/LICENSE +674 -0
- yaralyzer-1.0.11.dist-info/METADATA +151 -0
- yaralyzer-1.0.11.dist-info/RECORD +34 -0
- yaralyzer-1.0.11.dist-info/WHEEL +4 -0
- yaralyzer-1.0.11.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""Argument parsing for yaralyzer CLI tool."""
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from argparse import ArgumentError, ArgumentParser, Namespace
|
|
6
|
+
from collections import namedtuple
|
|
7
|
+
from importlib.metadata import version
|
|
8
|
+
from os import getcwd, path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from rich_argparse_plus import RichHelpFormatterPlus
|
|
12
|
+
|
|
13
|
+
from yaralyzer.config import YaralyzerConfig
|
|
14
|
+
from yaralyzer.encoding_detection.encoding_detector import CONFIDENCE_SCORE_RANGE, EncodingDetector
|
|
15
|
+
from yaralyzer.helpers.file_helper import timestamp_for_filename
|
|
16
|
+
from yaralyzer.helpers.string_helper import comma_join
|
|
17
|
+
from yaralyzer.output import rich_console
|
|
18
|
+
from yaralyzer.yara.yara_rule_builder import YARA_REGEX_MODIFIERS
|
|
19
|
+
from yaralyzer.util.logging import log, log_argparse_result, log_current_config, log_invocation
|
|
20
|
+
from yaralyzer.yaralyzer import Yaralyzer
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# NamedTuple to keep our argument selection orderly
|
|
24
|
+
OutputSection = namedtuple('OutputSection', ['argument', 'method'])
|
|
25
|
+
|
|
26
|
+
YARA_PATTERN_LABEL_REGEX = re.compile('^\\w+$')
|
|
27
|
+
YARA_RULES_ARGS = ['yara_rules_files', 'yara_rules_dirs', 'hex_patterns', 'regex_patterns']
|
|
28
|
+
DESCRIPTION = "Get a good hard colorful look at all the byte sequences that make up a YARA rule match. "
|
|
29
|
+
|
|
30
|
+
EPILOG = "* Values for various config options can be set permanently by a .yaralyzer file in your home directory; " + \
|
|
31
|
+
"see the documentation for details.\n" + \
|
|
32
|
+
f"* A registry of previous yaralyzer invocations will be incribed to a file if the " + \
|
|
33
|
+
f"{YaralyzerConfig.LOG_DIR_ENV_VAR} environment variable is configured."
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Positional args, version, help, etc
|
|
37
|
+
RichHelpFormatterPlus.choose_theme('prince')
|
|
38
|
+
parser = ArgumentParser(formatter_class=RichHelpFormatterPlus, description=DESCRIPTION, epilog=EPILOG)
|
|
39
|
+
parser.add_argument('--version', action='store_true', help='show version number and exit')
|
|
40
|
+
parser.add_argument('file_to_scan_path', metavar='FILE', help='file to scan')
|
|
41
|
+
|
|
42
|
+
source = parser.add_argument_group(
|
|
43
|
+
'YARA RULES',
|
|
44
|
+
"Load YARA rules from preconfigured files or use one off YARA regular expression strings")
|
|
45
|
+
|
|
46
|
+
source.add_argument('--yara-file', '-Y',
|
|
47
|
+
help='path to a YARA rules file to check against (can be supplied more than once)',
|
|
48
|
+
action='append',
|
|
49
|
+
metavar='FILE',
|
|
50
|
+
dest='yara_rules_files')
|
|
51
|
+
|
|
52
|
+
source.add_argument('--rule-dir', '-dir',
|
|
53
|
+
help='directory with yara rules files (all files are used, can be supplied more than once)',
|
|
54
|
+
action='append',
|
|
55
|
+
metavar='DIR',
|
|
56
|
+
dest='yara_rules_dirs')
|
|
57
|
+
|
|
58
|
+
source.add_argument('--regex-pattern', '-re',
|
|
59
|
+
help='build a YARA rule from PATTERN and run it (can be supplied more than once for boolean OR)',
|
|
60
|
+
action='append',
|
|
61
|
+
metavar='PATTERN',
|
|
62
|
+
dest='regex_patterns')
|
|
63
|
+
|
|
64
|
+
source.add_argument('--hex-pattern', '-hex',
|
|
65
|
+
help='build a YARA rule from HEX_STRING and run it (can be supplied more than once for boolean OR)',
|
|
66
|
+
action='append',
|
|
67
|
+
metavar='HEX_STRING',
|
|
68
|
+
dest='hex_patterns')
|
|
69
|
+
|
|
70
|
+
source.add_argument('--patterns-label', '-rpl',
|
|
71
|
+
help='supplying an optional STRING to label your YARA patterns makes it easier to scan results',
|
|
72
|
+
metavar='STRING')
|
|
73
|
+
|
|
74
|
+
source.add_argument('--regex-modifier', '-mod',
|
|
75
|
+
help=f"optional modifier keyword for YARA regexes ({comma_join(YARA_REGEX_MODIFIERS)})",
|
|
76
|
+
metavar='MODIFIER',
|
|
77
|
+
choices=YARA_REGEX_MODIFIERS)
|
|
78
|
+
|
|
79
|
+
# Fine tuning
|
|
80
|
+
tuning = parser.add_argument_group(
|
|
81
|
+
'FINE TUNING',
|
|
82
|
+
"Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
|
|
83
|
+
"a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
|
|
84
|
+
"dramatically improve run times and only occasionally leads to a fatal lack of insight.")
|
|
85
|
+
|
|
86
|
+
tuning.add_argument('--maximize-width', action='store_true',
|
|
87
|
+
help="maximize the display width to fill the terminal")
|
|
88
|
+
|
|
89
|
+
tuning.add_argument('--surrounding-bytes',
|
|
90
|
+
help="number of bytes to display/decode before and after YARA match start positions",
|
|
91
|
+
default=YaralyzerConfig.DEFAULT_SURROUNDING_BYTES,
|
|
92
|
+
metavar='N',
|
|
93
|
+
type=int)
|
|
94
|
+
|
|
95
|
+
tuning.add_argument('--suppress-decodes-table', action='store_true',
|
|
96
|
+
help='suppress decodes table entirely (including hex/raw output)')
|
|
97
|
+
|
|
98
|
+
tuning.add_argument('--suppress-decoding-attempts', action='store_true',
|
|
99
|
+
help='suppress decode attempts for matched bytes (only hex/raw output will be shown)')
|
|
100
|
+
|
|
101
|
+
tuning.add_argument('--min-decode-length',
|
|
102
|
+
help='suppress decode attempts for quoted byte sequences shorter than N',
|
|
103
|
+
default=YaralyzerConfig.DEFAULT_MIN_DECODE_LENGTH,
|
|
104
|
+
metavar='N',
|
|
105
|
+
type=int)
|
|
106
|
+
|
|
107
|
+
tuning.add_argument('--max-decode-length',
|
|
108
|
+
help='suppress decode attempts for quoted byte sequences longer than N',
|
|
109
|
+
default=YaralyzerConfig.DEFAULT_MAX_DECODE_LENGTH,
|
|
110
|
+
metavar='N',
|
|
111
|
+
type=int)
|
|
112
|
+
|
|
113
|
+
tuning.add_argument('--suppress-chardet', action='store_true',
|
|
114
|
+
help="suppress the display of the full table of chardet's encoding likelihood scores")
|
|
115
|
+
|
|
116
|
+
tuning.add_argument('--min-chardet-bytes',
|
|
117
|
+
help="minimum number of bytes to run chardet.detect() and the decodings it suggests",
|
|
118
|
+
default=YaralyzerConfig.DEFAULT_MIN_CHARDET_BYTES,
|
|
119
|
+
metavar='N',
|
|
120
|
+
type=int)
|
|
121
|
+
|
|
122
|
+
tuning.add_argument('--min-chardet-table-confidence',
|
|
123
|
+
help="minimum chardet confidence to display the encoding name/score in the character " +
|
|
124
|
+
"decection scores table",
|
|
125
|
+
default=YaralyzerConfig.DEFAULT_MIN_CHARDET_TABLE_CONFIDENCE,
|
|
126
|
+
metavar='PCT_CONFIDENCE',
|
|
127
|
+
type=int)
|
|
128
|
+
|
|
129
|
+
tuning.add_argument('--force-display-threshold',
|
|
130
|
+
help="encodings with chardet confidence below this number will neither be displayed nor " +
|
|
131
|
+
"decoded in the decodings table",
|
|
132
|
+
default=EncodingDetector.force_display_threshold,
|
|
133
|
+
metavar='PCT_CONFIDENCE',
|
|
134
|
+
type=int,
|
|
135
|
+
choices=CONFIDENCE_SCORE_RANGE)
|
|
136
|
+
|
|
137
|
+
tuning.add_argument('--force-decode-threshold',
|
|
138
|
+
help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
|
|
139
|
+
"as to the likelihood some bytes were written with a particular encoding will cause " +
|
|
140
|
+
"the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
|
|
141
|
+
"configured encoding",
|
|
142
|
+
default=EncodingDetector.force_decode_threshold,
|
|
143
|
+
metavar='PCT_CONFIDENCE',
|
|
144
|
+
type=int,
|
|
145
|
+
choices=CONFIDENCE_SCORE_RANGE)
|
|
146
|
+
|
|
147
|
+
tuning.add_argument('--max-match-length',
|
|
148
|
+
help="max bytes YARA will return for a match",
|
|
149
|
+
default=YaralyzerConfig.DEFAULT_MAX_MATCH_LENGTH,
|
|
150
|
+
metavar='N',
|
|
151
|
+
type=int)
|
|
152
|
+
|
|
153
|
+
tuning.add_argument('--yara-stack-size',
|
|
154
|
+
help="YARA matching engine internal stack size",
|
|
155
|
+
default=YaralyzerConfig.DEFAULT_YARA_STACK_SIZE,
|
|
156
|
+
metavar='N',
|
|
157
|
+
type=int)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# Export options
|
|
161
|
+
export = parser.add_argument_group(
|
|
162
|
+
'FILE EXPORT',
|
|
163
|
+
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
|
|
164
|
+
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
|
|
165
|
+
"Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
|
|
166
|
+
"filenames more unique and beautiful to their beholder.")
|
|
167
|
+
|
|
168
|
+
export.add_argument('-svg', '--export-svg',
|
|
169
|
+
action='store_const',
|
|
170
|
+
const='svg',
|
|
171
|
+
help='export analysis to SVG images')
|
|
172
|
+
|
|
173
|
+
export.add_argument('-txt', '--export-txt',
|
|
174
|
+
action='store_const',
|
|
175
|
+
const='txt',
|
|
176
|
+
help='export analysis to ANSI colored text files')
|
|
177
|
+
|
|
178
|
+
export.add_argument('-html', '--export-html',
|
|
179
|
+
action='store_const',
|
|
180
|
+
const='html',
|
|
181
|
+
help='export analysis to styled html files')
|
|
182
|
+
|
|
183
|
+
export.add_argument('-json', '--export-json',
|
|
184
|
+
action='store_const',
|
|
185
|
+
const='json',
|
|
186
|
+
help='export analysis to JSON files')
|
|
187
|
+
|
|
188
|
+
export.add_argument('-out', '--output-dir',
|
|
189
|
+
metavar='OUTPUT_DIR',
|
|
190
|
+
help='write files to OUTPUT_DIR instead of current dir, does nothing if not exporting a file')
|
|
191
|
+
|
|
192
|
+
export.add_argument('-pfx', '--file-prefix',
|
|
193
|
+
metavar='PREFIX',
|
|
194
|
+
help='optional string to use as the prefix for exported files of any kind')
|
|
195
|
+
|
|
196
|
+
export.add_argument('-sfx', '--file-suffix',
|
|
197
|
+
metavar='SUFFIX',
|
|
198
|
+
help='optional string to use as the suffix for exported files of any kind')
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# Debugging
|
|
202
|
+
debug = parser.add_argument_group(
|
|
203
|
+
'DEBUG',
|
|
204
|
+
'Debugging/interactive options.')
|
|
205
|
+
|
|
206
|
+
debug.add_argument('-I', '--interact', action='store_true',
|
|
207
|
+
help='drop into interactive python REPL when parsing is complete')
|
|
208
|
+
|
|
209
|
+
debug.add_argument('-D', '--debug', action='store_true',
|
|
210
|
+
help='show verbose debug log output')
|
|
211
|
+
|
|
212
|
+
debug.add_argument('-L', '--log-level',
|
|
213
|
+
help='set the log level',
|
|
214
|
+
choices=['DEBUG', 'INFO', 'WARN', 'ERROR'])
|
|
215
|
+
|
|
216
|
+
YaralyzerConfig.set_argument_parser(parser)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def parse_arguments(args: Optional[Namespace] = None):
|
|
220
|
+
"""
|
|
221
|
+
Parse command line args. Most arguments can also be communicated to the app by setting env vars.
|
|
222
|
+
If `args` are passed neither rules nor a regex need be provided as it is assumed
|
|
223
|
+
the constructor will instantiate a `Yaralyzer` object directly.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
|
|
227
|
+
Defaults to `None`.
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ArgumentError: If args are invalid.
|
|
231
|
+
"""
|
|
232
|
+
if '--version' in sys.argv:
|
|
233
|
+
print(f"yaralyzer {version('yaralyzer')}")
|
|
234
|
+
sys.exit()
|
|
235
|
+
|
|
236
|
+
# Hacky way to adjust arg parsing based on whether yaralyzer is used as a library vs. CLI tool
|
|
237
|
+
used_as_library = args is not None
|
|
238
|
+
args = args or parser.parse_args()
|
|
239
|
+
log_argparse_result(args, 'RAW')
|
|
240
|
+
args.standalone_mode = not used_as_library
|
|
241
|
+
args.invoked_at_str = timestamp_for_filename()
|
|
242
|
+
|
|
243
|
+
if args.debug:
|
|
244
|
+
log.setLevel(logging.DEBUG)
|
|
245
|
+
|
|
246
|
+
if args.log_level and args.log_level != 'DEBUG':
|
|
247
|
+
log.warning("Ignoring --log-level option as debug mode means log level is DEBUG")
|
|
248
|
+
elif args.log_level:
|
|
249
|
+
log.setLevel(args.log_level)
|
|
250
|
+
|
|
251
|
+
yara_rules_args = [arg for arg in YARA_RULES_ARGS if vars(args)[arg] is not None]
|
|
252
|
+
|
|
253
|
+
if used_as_library:
|
|
254
|
+
pass
|
|
255
|
+
elif len(yara_rules_args) > 1:
|
|
256
|
+
raise ArgumentError(None, "Cannot mix rules files, rules dirs, and regex patterns (for now).")
|
|
257
|
+
elif len(yara_rules_args) == 0:
|
|
258
|
+
raise ArgumentError(None, "You must provide either a YARA rules file or a regex pattern")
|
|
259
|
+
else:
|
|
260
|
+
log_invocation()
|
|
261
|
+
|
|
262
|
+
if args.maximize_width:
|
|
263
|
+
rich_console.console.width = max(rich_console.console_width_possibilities())
|
|
264
|
+
|
|
265
|
+
if args.patterns_label and not YARA_PATTERN_LABEL_REGEX.match(args.patterns_label):
|
|
266
|
+
raise ArgumentError(None, 'Pattern can only include alphanumeric chars and underscore')
|
|
267
|
+
|
|
268
|
+
# chardet.detect() action thresholds
|
|
269
|
+
if args.force_decode_threshold:
|
|
270
|
+
EncodingDetector.force_decode_threshold = args.force_decode_threshold
|
|
271
|
+
|
|
272
|
+
if args.force_display_threshold:
|
|
273
|
+
EncodingDetector.force_display_threshold = args.force_display_threshold
|
|
274
|
+
|
|
275
|
+
# File export options
|
|
276
|
+
if args.export_html or args.export_json or args.export_svg or args.export_txt:
|
|
277
|
+
args.output_dir = args.output_dir or getcwd()
|
|
278
|
+
elif args.output_dir:
|
|
279
|
+
log.warning('--output-dir provided but no export option was chosen')
|
|
280
|
+
|
|
281
|
+
YaralyzerConfig.set_args(args)
|
|
282
|
+
|
|
283
|
+
if not used_as_library:
|
|
284
|
+
log_argparse_result(args, 'parsed')
|
|
285
|
+
log_current_config()
|
|
286
|
+
log_argparse_result(YaralyzerConfig.args, 'with_env_vars')
|
|
287
|
+
|
|
288
|
+
return args
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
|
|
292
|
+
"""Get the basepath (directory + filename without extension) for exported files."""
|
|
293
|
+
file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
|
|
294
|
+
args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
|
|
295
|
+
args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
|
|
296
|
+
args.output_basename += ('_' + args.file_suffix) if args.file_suffix else ''
|
|
297
|
+
return path.join(args.output_dir, args.output_basename + f"__at_{args.invoked_at_str}")
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Handle logging for `yaralyzer`.
|
|
3
|
+
|
|
4
|
+
There's two possible log sinks other than `STDOUT`:
|
|
5
|
+
|
|
6
|
+
1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
|
|
7
|
+
2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
|
|
8
|
+
|
|
9
|
+
The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
|
|
10
|
+
will be written if things ever need to get that formal. For now those logs are only accessible
|
|
11
|
+
on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
|
|
12
|
+
needs/wants that sort of thing.
|
|
13
|
+
|
|
14
|
+
Logs are not normally ephemeral/not written to files but can be configured to do so by setting
|
|
15
|
+
the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
|
|
16
|
+
of setting `YARALYZER_LOG_DIR` to a value.
|
|
17
|
+
|
|
18
|
+
* [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
|
|
19
|
+
|
|
20
|
+
* [realpython.com/python-logging/](https://realpython.com/python-logging/)
|
|
21
|
+
|
|
22
|
+
Python log levels for reference:
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
CRITICAL 50
|
|
26
|
+
ERROR 40
|
|
27
|
+
WARNING 30
|
|
28
|
+
INFO 20
|
|
29
|
+
DEBUG 10
|
|
30
|
+
NOTSET 0
|
|
31
|
+
```
|
|
32
|
+
"""
|
|
33
|
+
import logging
|
|
34
|
+
import sys
|
|
35
|
+
from os import path
|
|
36
|
+
from typing import Union
|
|
37
|
+
|
|
38
|
+
from rich.logging import RichHandler
|
|
39
|
+
|
|
40
|
+
from yaralyzer.config import YaralyzerConfig
|
|
41
|
+
|
|
42
|
+
ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def configure_logger(log_label: str) -> logging.Logger:
|
|
46
|
+
"""
|
|
47
|
+
Set up a file or stream `logger` depending on the configuration.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
log_label (str): The label for the `logger`, e.g. "run" or "invocation".
|
|
51
|
+
Actual name will be `"yaralyzer.{log_label}"`.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
logging.Logger: The configured `logger`.
|
|
55
|
+
"""
|
|
56
|
+
log_name = f"yaralyzer.{log_label}"
|
|
57
|
+
logger = logging.getLogger(log_name)
|
|
58
|
+
|
|
59
|
+
if YaralyzerConfig.LOG_DIR:
|
|
60
|
+
if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
|
|
61
|
+
raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
|
|
62
|
+
|
|
63
|
+
log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
|
|
64
|
+
log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
|
|
65
|
+
log_file_handler = logging.FileHandler(log_file_path)
|
|
66
|
+
log_file_handler.setFormatter(log_formatter)
|
|
67
|
+
logger.addHandler(log_file_handler)
|
|
68
|
+
# rich_stream_handler is for printing warnings
|
|
69
|
+
rich_stream_handler = RichHandler(rich_tracebacks=True)
|
|
70
|
+
rich_stream_handler.setLevel('WARN')
|
|
71
|
+
logger.addHandler(rich_stream_handler)
|
|
72
|
+
else:
|
|
73
|
+
logger.addHandler(RichHandler(rich_tracebacks=True))
|
|
74
|
+
|
|
75
|
+
logger.setLevel(YaralyzerConfig.LOG_LEVEL)
|
|
76
|
+
return logger
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# See comment at top. 'log' is the standard application log, 'invocation_log' is a history of yaralyzer runs
|
|
80
|
+
log = configure_logger('run')
|
|
81
|
+
invocation_log = configure_logger('invocation')
|
|
82
|
+
|
|
83
|
+
# If we're logging to files make sure invocation_log has the right level
|
|
84
|
+
if YaralyzerConfig.LOG_DIR:
|
|
85
|
+
invocation_log.setLevel('INFO')
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def log_and_print(msg: str, log_level: str = 'INFO'):
|
|
89
|
+
"""Both print (to console) and log (to file) a string."""
|
|
90
|
+
log.log(logging.getLevelName(log_level), msg)
|
|
91
|
+
print(msg)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def log_current_config():
|
|
95
|
+
"""Write current state of `YaralyzerConfig` object to the logs."""
|
|
96
|
+
msg = f"{YaralyzerConfig.__name__} current attributes:\n"
|
|
97
|
+
config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
|
|
98
|
+
|
|
99
|
+
for k in sorted(config_dict.keys()):
|
|
100
|
+
msg += f" {k: >35} {config_dict[k]}\n"
|
|
101
|
+
|
|
102
|
+
log.info(msg)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def log_invocation() -> None:
|
|
106
|
+
"""Log the command used to launch the `yaralyzer` to the invocation log."""
|
|
107
|
+
msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
|
|
108
|
+
log.info(msg)
|
|
109
|
+
invocation_log.info(msg)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def log_argparse_result(args, label: str):
|
|
113
|
+
"""Logs the result of `argparse`."""
|
|
114
|
+
args_dict = vars(args)
|
|
115
|
+
log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
|
|
116
|
+
|
|
117
|
+
for arg_var in sorted(args_dict.keys()):
|
|
118
|
+
arg_val = args_dict[arg_var]
|
|
119
|
+
row = ARGPARSE_LOG_FORMAT.format(arg_var, type(arg_val).__name__, str(arg_val))
|
|
120
|
+
log_msg += row
|
|
121
|
+
|
|
122
|
+
log_msg += "\n"
|
|
123
|
+
invocation_log.info(log_msg)
|
|
124
|
+
log.info(log_msg)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def set_log_level(level: Union[str, int]) -> None:
|
|
128
|
+
"""Set the log level at any time."""
|
|
129
|
+
for handler in log.handlers + [log]:
|
|
130
|
+
handler.setLevel(level)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# Suppress annoying chardet library logs
|
|
134
|
+
for submodule in ['universaldetector', 'charsetprober', 'codingstatemachine']:
|
|
135
|
+
logging.getLogger(f"chardet.{submodule}").setLevel(logging.WARNING)
|
yaralyzer/yara/error.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import yara
|
|
4
|
+
|
|
5
|
+
INTERNAL_ERROR_REGEX = re.compile(r"internal error: (\d+)$")
|
|
6
|
+
YARA_ERRORS_REPO_PATH = 'master/libyara/include/yara/error.h'
|
|
7
|
+
YARA_ERRORS_RAW_URL = f"https://raw.githubusercontent.com/VirusTotal/yara/refs/heads/{YARA_ERRORS_REPO_PATH}"
|
|
8
|
+
YARA_ERRORS_URL = f"https://github.com/VirusTotal/yara/blob/{YARA_ERRORS_REPO_PATH}"
|
|
9
|
+
|
|
10
|
+
# Extracted from YARA_ERRORS_RAW_URL
|
|
11
|
+
YARA_ERROR_CODES = {
|
|
12
|
+
0: 'SUCCESS',
|
|
13
|
+
1: 'INSUFICIENT_MEMORY',
|
|
14
|
+
1: 'INSUFFICIENT_MEMORY',
|
|
15
|
+
2: 'COULD_NOT_ATTACH_TO_PROCESS',
|
|
16
|
+
3: 'COULD_NOT_OPEN_FILE',
|
|
17
|
+
4: 'COULD_NOT_MAP_FILE',
|
|
18
|
+
6: 'INVALID_FILE',
|
|
19
|
+
7: 'CORRUPT_FILE',
|
|
20
|
+
8: 'UNSUPPORTED_FILE_VERSION',
|
|
21
|
+
9: 'INVALID_REGULAR_EXPRESSION',
|
|
22
|
+
10: 'INVALID_HEX_STRING',
|
|
23
|
+
11: 'SYNTAX_ERROR',
|
|
24
|
+
12: 'LOOP_NESTING_LIMIT_EXCEEDED',
|
|
25
|
+
13: 'DUPLICATED_LOOP_IDENTIFIER',
|
|
26
|
+
14: 'DUPLICATED_IDENTIFIER',
|
|
27
|
+
15: 'DUPLICATED_TAG_IDENTIFIER',
|
|
28
|
+
16: 'DUPLICATED_META_IDENTIFIER',
|
|
29
|
+
17: 'DUPLICATED_STRING_IDENTIFIER',
|
|
30
|
+
18: 'UNREFERENCED_STRING',
|
|
31
|
+
19: 'UNDEFINED_STRING',
|
|
32
|
+
20: 'UNDEFINED_IDENTIFIER',
|
|
33
|
+
21: 'MISPLACED_ANONYMOUS_STRING',
|
|
34
|
+
22: 'INCLUDES_CIRCULAR_REFERENCE',
|
|
35
|
+
23: 'INCLUDE_DEPTH_EXCEEDED',
|
|
36
|
+
24: 'WRONG_TYPE',
|
|
37
|
+
25: 'EXEC_STACK_OVERFLOW',
|
|
38
|
+
26: 'SCAN_TIMEOUT',
|
|
39
|
+
27: 'TOO_MANY_SCAN_THREADS',
|
|
40
|
+
28: 'CALLBACK_ERROR',
|
|
41
|
+
29: 'INVALID_ARGUMENT',
|
|
42
|
+
30: 'TOO_MANY_MATCHES',
|
|
43
|
+
31: 'INTERNAL_FATAL_ERROR',
|
|
44
|
+
32: 'NESTED_FOR_OF_LOOP',
|
|
45
|
+
33: 'INVALID_FIELD_NAME',
|
|
46
|
+
34: 'UNKNOWN_MODULE',
|
|
47
|
+
35: 'NOT_A_STRUCTURE',
|
|
48
|
+
36: 'NOT_INDEXABLE',
|
|
49
|
+
37: 'NOT_A_FUNCTION',
|
|
50
|
+
38: 'INVALID_FORMAT',
|
|
51
|
+
39: 'TOO_MANY_ARGUMENTS',
|
|
52
|
+
40: 'WRONG_ARGUMENTS',
|
|
53
|
+
41: 'WRONG_RETURN_TYPE',
|
|
54
|
+
42: 'DUPLICATED_STRUCTURE_MEMBER',
|
|
55
|
+
43: 'EMPTY_STRING',
|
|
56
|
+
44: 'DIVISION_BY_ZERO',
|
|
57
|
+
45: 'REGULAR_EXPRESSION_TOO_LARGE',
|
|
58
|
+
46: 'TOO_MANY_RE_FIBERS',
|
|
59
|
+
47: 'COULD_NOT_READ_PROCESS_MEMORY',
|
|
60
|
+
48: 'INVALID_EXTERNAL_VARIABLE_TYPE',
|
|
61
|
+
49: 'REGULAR_EXPRESSION_TOO_COMPLEX',
|
|
62
|
+
50: 'INVALID_MODULE_NAME',
|
|
63
|
+
51: 'TOO_MANY_STRINGS',
|
|
64
|
+
52: 'INTEGER_OVERFLOW',
|
|
65
|
+
53: 'CALLBACK_REQUIRED',
|
|
66
|
+
54: 'INVALID_OPERAND',
|
|
67
|
+
55: 'COULD_NOT_READ_FILE',
|
|
68
|
+
56: 'DUPLICATED_EXTERNAL_VARIABLE',
|
|
69
|
+
57: 'INVALID_MODULE_DATA',
|
|
70
|
+
58: 'WRITING_FILE',
|
|
71
|
+
59: 'INVALID_MODIFIER',
|
|
72
|
+
60: 'DUPLICATED_MODIFIER',
|
|
73
|
+
61: 'BLOCK_NOT_READY',
|
|
74
|
+
62: 'INVALID_PERCENTAGE',
|
|
75
|
+
63: 'IDENTIFIER_MATCHES_WILDCARD',
|
|
76
|
+
64: 'INVALID_VALUE',
|
|
77
|
+
65: 'TOO_SLOW_SCANNING',
|
|
78
|
+
66: 'UNKNOWN_ESCAPE_SEQUENCE',
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def yara_error_msg(exception: yara.Error) -> str:
|
|
83
|
+
internal_error_match = INTERNAL_ERROR_REGEX.search(str(exception))
|
|
84
|
+
|
|
85
|
+
if internal_error_match:
|
|
86
|
+
error_code = int(internal_error_match.group(1))
|
|
87
|
+
error_msg = YARA_ERROR_CODES[error_code]
|
|
88
|
+
return f"Internal YARA error! (code: {error_code}, type: {error_msg})"
|
|
89
|
+
else:
|
|
90
|
+
return f"YARA error: {exception}"
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rich text decorator for YARA match dicts.
|
|
3
|
+
|
|
4
|
+
A YARA match is returned as a `dict` with this structure:
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
```
|
|
8
|
+
{
|
|
9
|
+
'tags': ['foo', 'bar'],
|
|
10
|
+
'matches': True,
|
|
11
|
+
'namespace': 'default',
|
|
12
|
+
'rule': 'my_rule',
|
|
13
|
+
'meta': {},
|
|
14
|
+
'strings': [
|
|
15
|
+
StringMatch1,
|
|
16
|
+
StringMatch2
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
```
|
|
20
|
+
"""
|
|
21
|
+
import re
|
|
22
|
+
from numbers import Number
|
|
23
|
+
from typing import Any, Dict
|
|
24
|
+
|
|
25
|
+
from rich.console import Console, ConsoleOptions, RenderResult
|
|
26
|
+
from rich.padding import Padding
|
|
27
|
+
from rich.panel import Panel
|
|
28
|
+
from rich.text import Text
|
|
29
|
+
from yara import StringMatch
|
|
30
|
+
|
|
31
|
+
from yaralyzer.helpers.bytes_helper import clean_byte_string
|
|
32
|
+
from yaralyzer.helpers.rich_text_helper import CENTER
|
|
33
|
+
from yaralyzer.helpers.string_helper import INDENT_SPACES
|
|
34
|
+
from yaralyzer.output.rich_console import console_width, theme_colors_with_prefix
|
|
35
|
+
from yaralyzer.util.logging import log
|
|
36
|
+
|
|
37
|
+
MATCH_PADDING = (0, 0, 0, 1)
|
|
38
|
+
|
|
39
|
+
DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
|
|
40
|
+
DIGITS_REGEX = re.compile("^\\d+$")
|
|
41
|
+
HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
|
|
42
|
+
MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
|
|
43
|
+
URL_REGEX = re.compile('^https?:')
|
|
44
|
+
|
|
45
|
+
YARA_STRING_STYLES: Dict[re.Pattern, str] = {
|
|
46
|
+
URL_REGEX: 'yara.url',
|
|
47
|
+
DIGITS_REGEX: 'yara.number',
|
|
48
|
+
HEX_REGEX: 'yara.hex',
|
|
49
|
+
DATE_REGEX: 'yara.date',
|
|
50
|
+
MATCHER_VAR_REGEX: 'yara.match_var'
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
RAW_YARA_THEME_COLORS = [color[len('yara') + 1:] for color in theme_colors_with_prefix('yara')]
|
|
54
|
+
RAW_YARA_THEME_TXT = Text('\nColor Code: ') + Text(' ').join(RAW_YARA_THEME_COLORS)
|
|
55
|
+
RAW_YARA_THEME_TXT.justify = CENTER
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class YaraMatch:
|
|
59
|
+
"""Rich text decorator for YARA match dicts."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Args:
|
|
64
|
+
match (dict): The YARA match dict.
|
|
65
|
+
matched_against_bytes_label (Text): Label indicating what bytes were matched against.
|
|
66
|
+
"""
|
|
67
|
+
self.match = match
|
|
68
|
+
self.rule_name = match['rule']
|
|
69
|
+
self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
|
|
70
|
+
self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
|
|
71
|
+
|
|
72
|
+
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
73
|
+
"""Renders a rich `Panel` showing the color highlighted raw YARA match info."""
|
|
74
|
+
yield Text("\n")
|
|
75
|
+
yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
|
|
76
|
+
yield RAW_YARA_THEME_TXT
|
|
77
|
+
yield Padding(Panel(_rich_yara_match(self.match)), MATCH_PADDING)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
81
|
+
"""
|
|
82
|
+
Painful/hacky way of recursively coloring a YARA match dict.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
|
|
86
|
+
depth (int): Current recursion depth (used for indentation).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Text: The rich `Text` representation of the element.
|
|
90
|
+
"""
|
|
91
|
+
indent = Text((depth + 1) * INDENT_SPACES)
|
|
92
|
+
end_indent = Text(depth * INDENT_SPACES)
|
|
93
|
+
|
|
94
|
+
if isinstance(element, str):
|
|
95
|
+
txt = _yara_string(element)
|
|
96
|
+
elif isinstance(element, bytes):
|
|
97
|
+
txt = Text(clean_byte_string(element), style='bytes')
|
|
98
|
+
elif isinstance(element, Number):
|
|
99
|
+
txt = Text(str(element), style='bright_cyan')
|
|
100
|
+
elif isinstance(element, bool):
|
|
101
|
+
txt = Text(str(element), style='red' if not element else 'green')
|
|
102
|
+
elif isinstance(element, (list, tuple)):
|
|
103
|
+
if len(element) == 0:
|
|
104
|
+
txt = Text('[]', style='white')
|
|
105
|
+
else:
|
|
106
|
+
if isinstance(element[0], StringMatch):
|
|
107
|
+
# In yara-python 4.3.0 the StringMatch type was introduced so we just make it look like
|
|
108
|
+
# the old list of tuples format (see: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0)
|
|
109
|
+
match_tuples = [
|
|
110
|
+
(match.identifier, match_instance.offset, match_instance.matched_data)
|
|
111
|
+
for match in element
|
|
112
|
+
for match_instance in match.instances
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
return _rich_yara_match(match_tuples, depth)
|
|
116
|
+
|
|
117
|
+
total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + len(indent) + 2
|
|
118
|
+
elements_txt = [_rich_yara_match(e, depth + 1) for e in element]
|
|
119
|
+
list_txt = Text('[', style='white')
|
|
120
|
+
|
|
121
|
+
if total_length > console_width() or len(element) > 3:
|
|
122
|
+
join_txt = Text(f"\n{indent}")
|
|
123
|
+
list_txt.append(join_txt).append(Text(f",{join_txt}").join(elements_txt))
|
|
124
|
+
list_txt += Text(f'\n{end_indent}]', style='white')
|
|
125
|
+
else:
|
|
126
|
+
list_txt += Text(', ').join(elements_txt) + Text(']')
|
|
127
|
+
|
|
128
|
+
return list_txt
|
|
129
|
+
elif isinstance(element, dict):
|
|
130
|
+
element = {k: v for k, v in element.items() if k not in ['matches', 'rule']}
|
|
131
|
+
|
|
132
|
+
if len(element) == 0:
|
|
133
|
+
return Text('{}')
|
|
134
|
+
|
|
135
|
+
txt = Text('{\n', style='white')
|
|
136
|
+
|
|
137
|
+
for i, k in enumerate(element.keys()):
|
|
138
|
+
v = element[k]
|
|
139
|
+
txt += indent + Text(f"{k}: ", style='yara.key') + _rich_yara_match(v, depth + 1)
|
|
140
|
+
|
|
141
|
+
if (i + 1) < len(element.keys()):
|
|
142
|
+
txt.append(",\n")
|
|
143
|
+
else:
|
|
144
|
+
txt.append("\n")
|
|
145
|
+
|
|
146
|
+
txt += end_indent + Text('}', style='white')
|
|
147
|
+
else:
|
|
148
|
+
log.warning(f"Unknown yara return of type {type(element)}: {element}")
|
|
149
|
+
txt = indent + Text(str(element))
|
|
150
|
+
|
|
151
|
+
return txt
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _yara_string(_string: str) -> Text:
|
|
155
|
+
"""Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
|
|
156
|
+
for regex in YARA_STRING_STYLES.keys():
|
|
157
|
+
if regex.match(_string):
|
|
158
|
+
return Text(_string, YARA_STRING_STYLES[regex])
|
|
159
|
+
|
|
160
|
+
return Text(_string, style='yara.string')
|