yaralyzer 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ """Argument parsing for yaralyzer CLI tool."""
2
+ import logging
3
+ import re
4
+ import sys
5
+ from argparse import ArgumentError, ArgumentParser, Namespace
6
+ from collections import namedtuple
7
+ from importlib.metadata import version
8
+ from os import getcwd, path
9
+ from typing import Optional
10
+
11
+ from rich_argparse_plus import RichHelpFormatterPlus
12
+
13
+ from yaralyzer.config import YaralyzerConfig
14
+ from yaralyzer.encoding_detection.encoding_detector import CONFIDENCE_SCORE_RANGE, EncodingDetector
15
+ from yaralyzer.helpers.file_helper import timestamp_for_filename
16
+ from yaralyzer.helpers.string_helper import comma_join
17
+ from yaralyzer.output import rich_console
18
+ from yaralyzer.yara.yara_rule_builder import YARA_REGEX_MODIFIERS
19
+ from yaralyzer.util.logging import log, log_argparse_result, log_current_config, log_invocation
20
+ from yaralyzer.yaralyzer import Yaralyzer
21
+
22
+
23
+ # NamedTuple to keep our argument selection orderly
24
+ OutputSection = namedtuple('OutputSection', ['argument', 'method'])
25
+
26
+ YARA_PATTERN_LABEL_REGEX = re.compile('^\\w+$')
27
+ YARA_RULES_ARGS = ['yara_rules_files', 'yara_rules_dirs', 'hex_patterns', 'regex_patterns']
28
+ DESCRIPTION = "Get a good hard colorful look at all the byte sequences that make up a YARA rule match. "
29
+
30
+ EPILOG = "* Values for various config options can be set permanently by a .yaralyzer file in your home directory; " + \
31
+ "see the documentation for details.\n" + \
32
+ f"* A registry of previous yaralyzer invocations will be incribed to a file if the " + \
33
+ f"{YaralyzerConfig.LOG_DIR_ENV_VAR} environment variable is configured."
34
+
35
+
36
+ # Positional args, version, help, etc
37
+ RichHelpFormatterPlus.choose_theme('prince')
38
+ parser = ArgumentParser(formatter_class=RichHelpFormatterPlus, description=DESCRIPTION, epilog=EPILOG)
39
+ parser.add_argument('--version', action='store_true', help='show version number and exit')
40
+ parser.add_argument('file_to_scan_path', metavar='FILE', help='file to scan')
41
+
42
+ source = parser.add_argument_group(
43
+ 'YARA RULES',
44
+ "Load YARA rules from preconfigured files or use one off YARA regular expression strings")
45
+
46
+ source.add_argument('--yara-file', '-Y',
47
+ help='path to a YARA rules file to check against (can be supplied more than once)',
48
+ action='append',
49
+ metavar='FILE',
50
+ dest='yara_rules_files')
51
+
52
+ source.add_argument('--rule-dir', '-dir',
53
+ help='directory with yara rules files (all files are used, can be supplied more than once)',
54
+ action='append',
55
+ metavar='DIR',
56
+ dest='yara_rules_dirs')
57
+
58
+ source.add_argument('--regex-pattern', '-re',
59
+ help='build a YARA rule from PATTERN and run it (can be supplied more than once for boolean OR)',
60
+ action='append',
61
+ metavar='PATTERN',
62
+ dest='regex_patterns')
63
+
64
+ source.add_argument('--hex-pattern', '-hex',
65
+ help='build a YARA rule from HEX_STRING and run it (can be supplied more than once for boolean OR)',
66
+ action='append',
67
+ metavar='HEX_STRING',
68
+ dest='hex_patterns')
69
+
70
+ source.add_argument('--patterns-label', '-rpl',
71
+ help='supplying an optional STRING to label your YARA patterns makes it easier to scan results',
72
+ metavar='STRING')
73
+
74
+ source.add_argument('--regex-modifier', '-mod',
75
+ help=f"optional modifier keyword for YARA regexes ({comma_join(YARA_REGEX_MODIFIERS)})",
76
+ metavar='MODIFIER',
77
+ choices=YARA_REGEX_MODIFIERS)
78
+
79
+ # Fine tuning
80
+ tuning = parser.add_argument_group(
81
+ 'FINE TUNING',
82
+ "Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
83
+ "a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
84
+ "dramatically improve run times and only occasionally leads to a fatal lack of insight.")
85
+
86
+ tuning.add_argument('--maximize-width', action='store_true',
87
+ help="maximize the display width to fill the terminal")
88
+
89
+ tuning.add_argument('--surrounding-bytes',
90
+ help="number of bytes to display/decode before and after YARA match start positions",
91
+ default=YaralyzerConfig.DEFAULT_SURROUNDING_BYTES,
92
+ metavar='N',
93
+ type=int)
94
+
95
+ tuning.add_argument('--suppress-decodes-table', action='store_true',
96
+ help='suppress decodes table entirely (including hex/raw output)')
97
+
98
+ tuning.add_argument('--suppress-decoding-attempts', action='store_true',
99
+ help='suppress decode attempts for matched bytes (only hex/raw output will be shown)')
100
+
101
+ tuning.add_argument('--min-decode-length',
102
+ help='suppress decode attempts for quoted byte sequences shorter than N',
103
+ default=YaralyzerConfig.DEFAULT_MIN_DECODE_LENGTH,
104
+ metavar='N',
105
+ type=int)
106
+
107
+ tuning.add_argument('--max-decode-length',
108
+ help='suppress decode attempts for quoted byte sequences longer than N',
109
+ default=YaralyzerConfig.DEFAULT_MAX_DECODE_LENGTH,
110
+ metavar='N',
111
+ type=int)
112
+
113
+ tuning.add_argument('--suppress-chardet', action='store_true',
114
+ help="suppress the display of the full table of chardet's encoding likelihood scores")
115
+
116
+ tuning.add_argument('--min-chardet-bytes',
117
+ help="minimum number of bytes to run chardet.detect() and the decodings it suggests",
118
+ default=YaralyzerConfig.DEFAULT_MIN_CHARDET_BYTES,
119
+ metavar='N',
120
+ type=int)
121
+
122
+ tuning.add_argument('--min-chardet-table-confidence',
123
+ help="minimum chardet confidence to display the encoding name/score in the character " +
124
+ "decection scores table",
125
+ default=YaralyzerConfig.DEFAULT_MIN_CHARDET_TABLE_CONFIDENCE,
126
+ metavar='PCT_CONFIDENCE',
127
+ type=int)
128
+
129
+ tuning.add_argument('--force-display-threshold',
130
+ help="encodings with chardet confidence below this number will neither be displayed nor " +
131
+ "decoded in the decodings table",
132
+ default=EncodingDetector.force_display_threshold,
133
+ metavar='PCT_CONFIDENCE',
134
+ type=int,
135
+ choices=CONFIDENCE_SCORE_RANGE)
136
+
137
+ tuning.add_argument('--force-decode-threshold',
138
+ help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
139
+ "as to the likelihood some bytes were written with a particular encoding will cause " +
140
+ "the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
141
+ "configured encoding",
142
+ default=EncodingDetector.force_decode_threshold,
143
+ metavar='PCT_CONFIDENCE',
144
+ type=int,
145
+ choices=CONFIDENCE_SCORE_RANGE)
146
+
147
+ tuning.add_argument('--max-match-length',
148
+ help="max bytes YARA will return for a match",
149
+ default=YaralyzerConfig.DEFAULT_MAX_MATCH_LENGTH,
150
+ metavar='N',
151
+ type=int)
152
+
153
+ tuning.add_argument('--yara-stack-size',
154
+ help="YARA matching engine internal stack size",
155
+ default=YaralyzerConfig.DEFAULT_YARA_STACK_SIZE,
156
+ metavar='N',
157
+ type=int)
158
+
159
+
160
+ # Export options
161
+ export = parser.add_argument_group(
162
+ 'FILE EXPORT',
163
+ "Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
164
+ "formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
165
+ "Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
166
+ "filenames more unique and beautiful to their beholder.")
167
+
168
+ export.add_argument('-svg', '--export-svg',
169
+ action='store_const',
170
+ const='svg',
171
+ help='export analysis to SVG images')
172
+
173
+ export.add_argument('-txt', '--export-txt',
174
+ action='store_const',
175
+ const='txt',
176
+ help='export analysis to ANSI colored text files')
177
+
178
+ export.add_argument('-html', '--export-html',
179
+ action='store_const',
180
+ const='html',
181
+ help='export analysis to styled html files')
182
+
183
+ export.add_argument('-json', '--export-json',
184
+ action='store_const',
185
+ const='json',
186
+ help='export analysis to JSON files')
187
+
188
+ export.add_argument('-out', '--output-dir',
189
+ metavar='OUTPUT_DIR',
190
+ help='write files to OUTPUT_DIR instead of current dir, does nothing if not exporting a file')
191
+
192
+ export.add_argument('-pfx', '--file-prefix',
193
+ metavar='PREFIX',
194
+ help='optional string to use as the prefix for exported files of any kind')
195
+
196
+ export.add_argument('-sfx', '--file-suffix',
197
+ metavar='SUFFIX',
198
+ help='optional string to use as the suffix for exported files of any kind')
199
+
200
+
201
+ # Debugging
202
+ debug = parser.add_argument_group(
203
+ 'DEBUG',
204
+ 'Debugging/interactive options.')
205
+
206
+ debug.add_argument('-I', '--interact', action='store_true',
207
+ help='drop into interactive python REPL when parsing is complete')
208
+
209
+ debug.add_argument('-D', '--debug', action='store_true',
210
+ help='show verbose debug log output')
211
+
212
+ debug.add_argument('-L', '--log-level',
213
+ help='set the log level',
214
+ choices=['DEBUG', 'INFO', 'WARN', 'ERROR'])
215
+
216
+ YaralyzerConfig.set_argument_parser(parser)
217
+
218
+
219
+ def parse_arguments(args: Optional[Namespace] = None):
220
+ """
221
+ Parse command line args. Most arguments can also be communicated to the app by setting env vars.
222
+ If `args` are passed neither rules nor a regex need be provided as it is assumed
223
+ the constructor will instantiate a `Yaralyzer` object directly.
224
+
225
+ Args:
226
+ args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
227
+ Defaults to `None`.
228
+
229
+ Raises:
230
+ ArgumentError: If args are invalid.
231
+ """
232
+ if '--version' in sys.argv:
233
+ print(f"yaralyzer {version('yaralyzer')}")
234
+ sys.exit()
235
+
236
+ # Hacky way to adjust arg parsing based on whether yaralyzer is used as a library vs. CLI tool
237
+ used_as_library = args is not None
238
+ args = args or parser.parse_args()
239
+ log_argparse_result(args, 'RAW')
240
+ args.standalone_mode = not used_as_library
241
+ args.invoked_at_str = timestamp_for_filename()
242
+
243
+ if args.debug:
244
+ log.setLevel(logging.DEBUG)
245
+
246
+ if args.log_level and args.log_level != 'DEBUG':
247
+ log.warning("Ignoring --log-level option as debug mode means log level is DEBUG")
248
+ elif args.log_level:
249
+ log.setLevel(args.log_level)
250
+
251
+ yara_rules_args = [arg for arg in YARA_RULES_ARGS if vars(args)[arg] is not None]
252
+
253
+ if used_as_library:
254
+ pass
255
+ elif len(yara_rules_args) > 1:
256
+ raise ArgumentError(None, "Cannot mix rules files, rules dirs, and regex patterns (for now).")
257
+ elif len(yara_rules_args) == 0:
258
+ raise ArgumentError(None, "You must provide either a YARA rules file or a regex pattern")
259
+ else:
260
+ log_invocation()
261
+
262
+ if args.maximize_width:
263
+ rich_console.console.width = max(rich_console.console_width_possibilities())
264
+
265
+ if args.patterns_label and not YARA_PATTERN_LABEL_REGEX.match(args.patterns_label):
266
+ raise ArgumentError(None, 'Pattern can only include alphanumeric chars and underscore')
267
+
268
+ # chardet.detect() action thresholds
269
+ if args.force_decode_threshold:
270
+ EncodingDetector.force_decode_threshold = args.force_decode_threshold
271
+
272
+ if args.force_display_threshold:
273
+ EncodingDetector.force_display_threshold = args.force_display_threshold
274
+
275
+ # File export options
276
+ if args.export_html or args.export_json or args.export_svg or args.export_txt:
277
+ args.output_dir = args.output_dir or getcwd()
278
+ elif args.output_dir:
279
+ log.warning('--output-dir provided but no export option was chosen')
280
+
281
+ YaralyzerConfig.set_args(args)
282
+
283
+ if not used_as_library:
284
+ log_argparse_result(args, 'parsed')
285
+ log_current_config()
286
+ log_argparse_result(YaralyzerConfig.args, 'with_env_vars')
287
+
288
+ return args
289
+
290
+
291
+ def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
292
+ """Get the basepath (directory + filename without extension) for exported files."""
293
+ file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
294
+ args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
295
+ args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
296
+ args.output_basename += ('_' + args.file_suffix) if args.file_suffix else ''
297
+ return path.join(args.output_dir, args.output_basename + f"__at_{args.invoked_at_str}")
@@ -0,0 +1,135 @@
1
+ """
2
+ Handle logging for `yaralyzer`.
3
+
4
+ There's two possible log sinks other than `STDOUT`:
5
+
6
+ 1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
7
+ 2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
8
+
9
+ The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
10
+ will be written if things ever need to get that formal. For now those logs are only accessible
11
+ on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
12
+ needs/wants that sort of thing.
13
+
14
+ Logs are not normally ephemeral/not written to files but can be configured to do so by setting
15
+ the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
16
+ of setting `YARALYZER_LOG_DIR` to a value.
17
+
18
+ * [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
19
+
20
+ * [realpython.com/python-logging/](https://realpython.com/python-logging/)
21
+
22
+ Python log levels for reference:
23
+
24
+ ```
25
+ CRITICAL 50
26
+ ERROR 40
27
+ WARNING 30
28
+ INFO 20
29
+ DEBUG 10
30
+ NOTSET 0
31
+ ```
32
+ """
33
+ import logging
34
+ import sys
35
+ from os import path
36
+ from typing import Union
37
+
38
+ from rich.logging import RichHandler
39
+
40
+ from yaralyzer.config import YaralyzerConfig
41
+
42
+ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
43
+
44
+
45
+ def configure_logger(log_label: str) -> logging.Logger:
46
+ """
47
+ Set up a file or stream `logger` depending on the configuration.
48
+
49
+ Args:
50
+ log_label (str): The label for the `logger`, e.g. "run" or "invocation".
51
+ Actual name will be `"yaralyzer.{log_label}"`.
52
+
53
+ Returns:
54
+ logging.Logger: The configured `logger`.
55
+ """
56
+ log_name = f"yaralyzer.{log_label}"
57
+ logger = logging.getLogger(log_name)
58
+
59
+ if YaralyzerConfig.LOG_DIR:
60
+ if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
61
+ raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
62
+
63
+ log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
64
+ log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
65
+ log_file_handler = logging.FileHandler(log_file_path)
66
+ log_file_handler.setFormatter(log_formatter)
67
+ logger.addHandler(log_file_handler)
68
+ # rich_stream_handler is for printing warnings
69
+ rich_stream_handler = RichHandler(rich_tracebacks=True)
70
+ rich_stream_handler.setLevel('WARN')
71
+ logger.addHandler(rich_stream_handler)
72
+ else:
73
+ logger.addHandler(RichHandler(rich_tracebacks=True))
74
+
75
+ logger.setLevel(YaralyzerConfig.LOG_LEVEL)
76
+ return logger
77
+
78
+
79
+ # See comment at top. 'log' is the standard application log, 'invocation_log' is a history of yaralyzer runs
80
+ log = configure_logger('run')
81
+ invocation_log = configure_logger('invocation')
82
+
83
+ # If we're logging to files make sure invocation_log has the right level
84
+ if YaralyzerConfig.LOG_DIR:
85
+ invocation_log.setLevel('INFO')
86
+
87
+
88
+ def log_and_print(msg: str, log_level: str = 'INFO'):
89
+ """Both print (to console) and log (to file) a string."""
90
+ log.log(logging.getLevelName(log_level), msg)
91
+ print(msg)
92
+
93
+
94
+ def log_current_config():
95
+ """Write current state of `YaralyzerConfig` object to the logs."""
96
+ msg = f"{YaralyzerConfig.__name__} current attributes:\n"
97
+ config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
98
+
99
+ for k in sorted(config_dict.keys()):
100
+ msg += f" {k: >35} {config_dict[k]}\n"
101
+
102
+ log.info(msg)
103
+
104
+
105
+ def log_invocation() -> None:
106
+ """Log the command used to launch the `yaralyzer` to the invocation log."""
107
+ msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
108
+ log.info(msg)
109
+ invocation_log.info(msg)
110
+
111
+
112
+ def log_argparse_result(args, label: str):
113
+ """Logs the result of `argparse`."""
114
+ args_dict = vars(args)
115
+ log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
116
+
117
+ for arg_var in sorted(args_dict.keys()):
118
+ arg_val = args_dict[arg_var]
119
+ row = ARGPARSE_LOG_FORMAT.format(arg_var, type(arg_val).__name__, str(arg_val))
120
+ log_msg += row
121
+
122
+ log_msg += "\n"
123
+ invocation_log.info(log_msg)
124
+ log.info(log_msg)
125
+
126
+
127
+ def set_log_level(level: Union[str, int]) -> None:
128
+ """Set the log level at any time."""
129
+ for handler in log.handlers + [log]:
130
+ handler.setLevel(level)
131
+
132
+
133
+ # Suppress annoying chardet library logs
134
+ for submodule in ['universaldetector', 'charsetprober', 'codingstatemachine']:
135
+ logging.getLogger(f"chardet.{submodule}").setLevel(logging.WARNING)
@@ -0,0 +1,90 @@
1
+ import re
2
+
3
+ import yara
4
+
5
+ INTERNAL_ERROR_REGEX = re.compile(r"internal error: (\d+)$")
6
+ YARA_ERRORS_REPO_PATH = 'master/libyara/include/yara/error.h'
7
+ YARA_ERRORS_RAW_URL = f"https://raw.githubusercontent.com/VirusTotal/yara/refs/heads/{YARA_ERRORS_REPO_PATH}"
8
+ YARA_ERRORS_URL = f"https://github.com/VirusTotal/yara/blob/{YARA_ERRORS_REPO_PATH}"
9
+
10
+ # Extracted from YARA_ERRORS_RAW_URL
11
+ YARA_ERROR_CODES = {
12
+ 0: 'SUCCESS',
13
+ 1: 'INSUFICIENT_MEMORY',
14
+ 1: 'INSUFFICIENT_MEMORY',
15
+ 2: 'COULD_NOT_ATTACH_TO_PROCESS',
16
+ 3: 'COULD_NOT_OPEN_FILE',
17
+ 4: 'COULD_NOT_MAP_FILE',
18
+ 6: 'INVALID_FILE',
19
+ 7: 'CORRUPT_FILE',
20
+ 8: 'UNSUPPORTED_FILE_VERSION',
21
+ 9: 'INVALID_REGULAR_EXPRESSION',
22
+ 10: 'INVALID_HEX_STRING',
23
+ 11: 'SYNTAX_ERROR',
24
+ 12: 'LOOP_NESTING_LIMIT_EXCEEDED',
25
+ 13: 'DUPLICATED_LOOP_IDENTIFIER',
26
+ 14: 'DUPLICATED_IDENTIFIER',
27
+ 15: 'DUPLICATED_TAG_IDENTIFIER',
28
+ 16: 'DUPLICATED_META_IDENTIFIER',
29
+ 17: 'DUPLICATED_STRING_IDENTIFIER',
30
+ 18: 'UNREFERENCED_STRING',
31
+ 19: 'UNDEFINED_STRING',
32
+ 20: 'UNDEFINED_IDENTIFIER',
33
+ 21: 'MISPLACED_ANONYMOUS_STRING',
34
+ 22: 'INCLUDES_CIRCULAR_REFERENCE',
35
+ 23: 'INCLUDE_DEPTH_EXCEEDED',
36
+ 24: 'WRONG_TYPE',
37
+ 25: 'EXEC_STACK_OVERFLOW',
38
+ 26: 'SCAN_TIMEOUT',
39
+ 27: 'TOO_MANY_SCAN_THREADS',
40
+ 28: 'CALLBACK_ERROR',
41
+ 29: 'INVALID_ARGUMENT',
42
+ 30: 'TOO_MANY_MATCHES',
43
+ 31: 'INTERNAL_FATAL_ERROR',
44
+ 32: 'NESTED_FOR_OF_LOOP',
45
+ 33: 'INVALID_FIELD_NAME',
46
+ 34: 'UNKNOWN_MODULE',
47
+ 35: 'NOT_A_STRUCTURE',
48
+ 36: 'NOT_INDEXABLE',
49
+ 37: 'NOT_A_FUNCTION',
50
+ 38: 'INVALID_FORMAT',
51
+ 39: 'TOO_MANY_ARGUMENTS',
52
+ 40: 'WRONG_ARGUMENTS',
53
+ 41: 'WRONG_RETURN_TYPE',
54
+ 42: 'DUPLICATED_STRUCTURE_MEMBER',
55
+ 43: 'EMPTY_STRING',
56
+ 44: 'DIVISION_BY_ZERO',
57
+ 45: 'REGULAR_EXPRESSION_TOO_LARGE',
58
+ 46: 'TOO_MANY_RE_FIBERS',
59
+ 47: 'COULD_NOT_READ_PROCESS_MEMORY',
60
+ 48: 'INVALID_EXTERNAL_VARIABLE_TYPE',
61
+ 49: 'REGULAR_EXPRESSION_TOO_COMPLEX',
62
+ 50: 'INVALID_MODULE_NAME',
63
+ 51: 'TOO_MANY_STRINGS',
64
+ 52: 'INTEGER_OVERFLOW',
65
+ 53: 'CALLBACK_REQUIRED',
66
+ 54: 'INVALID_OPERAND',
67
+ 55: 'COULD_NOT_READ_FILE',
68
+ 56: 'DUPLICATED_EXTERNAL_VARIABLE',
69
+ 57: 'INVALID_MODULE_DATA',
70
+ 58: 'WRITING_FILE',
71
+ 59: 'INVALID_MODIFIER',
72
+ 60: 'DUPLICATED_MODIFIER',
73
+ 61: 'BLOCK_NOT_READY',
74
+ 62: 'INVALID_PERCENTAGE',
75
+ 63: 'IDENTIFIER_MATCHES_WILDCARD',
76
+ 64: 'INVALID_VALUE',
77
+ 65: 'TOO_SLOW_SCANNING',
78
+ 66: 'UNKNOWN_ESCAPE_SEQUENCE',
79
+ }
80
+
81
+
82
+ def yara_error_msg(exception: yara.Error) -> str:
83
+ internal_error_match = INTERNAL_ERROR_REGEX.search(str(exception))
84
+
85
+ if internal_error_match:
86
+ error_code = int(internal_error_match.group(1))
87
+ error_msg = YARA_ERROR_CODES[error_code]
88
+ return f"Internal YARA error! (code: {error_code}, type: {error_msg})"
89
+ else:
90
+ return f"YARA error: {exception}"
@@ -0,0 +1,160 @@
1
+ """
2
+ Rich text decorator for YARA match dicts.
3
+
4
+ A YARA match is returned as a `dict` with this structure:
5
+
6
+ Example:
7
+ ```
8
+ {
9
+ 'tags': ['foo', 'bar'],
10
+ 'matches': True,
11
+ 'namespace': 'default',
12
+ 'rule': 'my_rule',
13
+ 'meta': {},
14
+ 'strings': [
15
+ StringMatch1,
16
+ StringMatch2
17
+ ]
18
+ }
19
+ ```
20
+ """
21
+ import re
22
+ from numbers import Number
23
+ from typing import Any, Dict
24
+
25
+ from rich.console import Console, ConsoleOptions, RenderResult
26
+ from rich.padding import Padding
27
+ from rich.panel import Panel
28
+ from rich.text import Text
29
+ from yara import StringMatch
30
+
31
+ from yaralyzer.helpers.bytes_helper import clean_byte_string
32
+ from yaralyzer.helpers.rich_text_helper import CENTER
33
+ from yaralyzer.helpers.string_helper import INDENT_SPACES
34
+ from yaralyzer.output.rich_console import console_width, theme_colors_with_prefix
35
+ from yaralyzer.util.logging import log
36
+
37
+ MATCH_PADDING = (0, 0, 0, 1)
38
+
39
+ DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
40
+ DIGITS_REGEX = re.compile("^\\d+$")
41
+ HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
42
+ MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
43
+ URL_REGEX = re.compile('^https?:')
44
+
45
+ YARA_STRING_STYLES: Dict[re.Pattern, str] = {
46
+ URL_REGEX: 'yara.url',
47
+ DIGITS_REGEX: 'yara.number',
48
+ HEX_REGEX: 'yara.hex',
49
+ DATE_REGEX: 'yara.date',
50
+ MATCHER_VAR_REGEX: 'yara.match_var'
51
+ }
52
+
53
+ RAW_YARA_THEME_COLORS = [color[len('yara') + 1:] for color in theme_colors_with_prefix('yara')]
54
+ RAW_YARA_THEME_TXT = Text('\nColor Code: ') + Text(' ').join(RAW_YARA_THEME_COLORS)
55
+ RAW_YARA_THEME_TXT.justify = CENTER
56
+
57
+
58
+ class YaraMatch:
59
+ """Rich text decorator for YARA match dicts."""
60
+
61
+ def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
62
+ """
63
+ Args:
64
+ match (dict): The YARA match dict.
65
+ matched_against_bytes_label (Text): Label indicating what bytes were matched against.
66
+ """
67
+ self.match = match
68
+ self.rule_name = match['rule']
69
+ self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
70
+ self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
71
+
72
+ def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
73
+ """Renders a rich `Panel` showing the color highlighted raw YARA match info."""
74
+ yield Text("\n")
75
+ yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
76
+ yield RAW_YARA_THEME_TXT
77
+ yield Padding(Panel(_rich_yara_match(self.match)), MATCH_PADDING)
78
+
79
+
80
+ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
81
+ """
82
+ Painful/hacky way of recursively coloring a YARA match dict.
83
+
84
+ Args:
85
+ element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
86
+ depth (int): Current recursion depth (used for indentation).
87
+
88
+ Returns:
89
+ Text: The rich `Text` representation of the element.
90
+ """
91
+ indent = Text((depth + 1) * INDENT_SPACES)
92
+ end_indent = Text(depth * INDENT_SPACES)
93
+
94
+ if isinstance(element, str):
95
+ txt = _yara_string(element)
96
+ elif isinstance(element, bytes):
97
+ txt = Text(clean_byte_string(element), style='bytes')
98
+ elif isinstance(element, Number):
99
+ txt = Text(str(element), style='bright_cyan')
100
+ elif isinstance(element, bool):
101
+ txt = Text(str(element), style='red' if not element else 'green')
102
+ elif isinstance(element, (list, tuple)):
103
+ if len(element) == 0:
104
+ txt = Text('[]', style='white')
105
+ else:
106
+ if isinstance(element[0], StringMatch):
107
+ # In yara-python 4.3.0 the StringMatch type was introduced so we just make it look like
108
+ # the old list of tuples format (see: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0)
109
+ match_tuples = [
110
+ (match.identifier, match_instance.offset, match_instance.matched_data)
111
+ for match in element
112
+ for match_instance in match.instances
113
+ ]
114
+
115
+ return _rich_yara_match(match_tuples, depth)
116
+
117
+ total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + len(indent) + 2
118
+ elements_txt = [_rich_yara_match(e, depth + 1) for e in element]
119
+ list_txt = Text('[', style='white')
120
+
121
+ if total_length > console_width() or len(element) > 3:
122
+ join_txt = Text(f"\n{indent}")
123
+ list_txt.append(join_txt).append(Text(f",{join_txt}").join(elements_txt))
124
+ list_txt += Text(f'\n{end_indent}]', style='white')
125
+ else:
126
+ list_txt += Text(', ').join(elements_txt) + Text(']')
127
+
128
+ return list_txt
129
+ elif isinstance(element, dict):
130
+ element = {k: v for k, v in element.items() if k not in ['matches', 'rule']}
131
+
132
+ if len(element) == 0:
133
+ return Text('{}')
134
+
135
+ txt = Text('{\n', style='white')
136
+
137
+ for i, k in enumerate(element.keys()):
138
+ v = element[k]
139
+ txt += indent + Text(f"{k}: ", style='yara.key') + _rich_yara_match(v, depth + 1)
140
+
141
+ if (i + 1) < len(element.keys()):
142
+ txt.append(",\n")
143
+ else:
144
+ txt.append("\n")
145
+
146
+ txt += end_indent + Text('}', style='white')
147
+ else:
148
+ log.warning(f"Unknown yara return of type {type(element)}: {element}")
149
+ txt = indent + Text(str(element))
150
+
151
+ return txt
152
+
153
+
154
+ def _yara_string(_string: str) -> Text:
155
+ """Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
156
+ for regex in YARA_STRING_STYLES.keys():
157
+ if regex.match(_string):
158
+ return Text(_string, YARA_STRING_STYLES[regex])
159
+
160
+ return Text(_string, style='yara.string')