yaralyzer 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +10 -0
- yaralyzer/__init__.py +5 -2
- yaralyzer/bytes_match.py +145 -52
- yaralyzer/config.py +18 -6
- yaralyzer/decoding/bytes_decoder.py +34 -15
- yaralyzer/decoding/decoding_attempt.py +10 -9
- yaralyzer/encoding_detection/character_encodings.py +40 -40
- yaralyzer/encoding_detection/encoding_assessment.py +10 -4
- yaralyzer/encoding_detection/encoding_detector.py +17 -13
- yaralyzer/helpers/bytes_helper.py +113 -16
- yaralyzer/helpers/dict_helper.py +1 -2
- yaralyzer/helpers/file_helper.py +3 -3
- yaralyzer/helpers/list_helper.py +1 -0
- yaralyzer/helpers/rich_text_helper.py +13 -11
- yaralyzer/helpers/string_helper.py +1 -1
- yaralyzer/output/file_export.py +2 -1
- yaralyzer/output/file_hashes_table.py +34 -6
- yaralyzer/output/regex_match_metrics.py +13 -10
- yaralyzer/output/rich_console.py +18 -3
- yaralyzer/util/argument_parser.py +11 -10
- yaralyzer/util/logging.py +6 -6
- yaralyzer/yara/yara_match.py +1 -1
- yaralyzer/yara/yara_rule_builder.py +16 -17
- yaralyzer/yaralyzer.py +66 -51
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/METADATA +12 -7
- yaralyzer-1.0.8.dist-info/RECORD +32 -0
- yaralyzer-1.0.6.dist-info/RECORD +0 -32
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/LICENSE +0 -0
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Methods for
|
|
2
|
+
Methods for computing and displaying various file hashes.
|
|
3
3
|
"""
|
|
4
4
|
import hashlib
|
|
5
5
|
from collections import namedtuple
|
|
@@ -14,11 +14,21 @@ BytesInfo = namedtuple('BytesInfo', ['size', 'md5', 'sha1', 'sha256'])
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def bytes_hashes_table(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
"""
|
|
17
|
+
bytes_or_bytes_info: Union[bytes, BytesInfo],
|
|
18
|
+
title: Optional[str] = None,
|
|
19
|
+
title_justify: str = LEFT
|
|
20
|
+
) -> Table:
|
|
21
|
+
"""
|
|
22
|
+
Build a Rich Table displaying the size, MD5, SHA1, and SHA256 hashes of a byte sequence.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
bytes_or_bytes_info (Union[bytes, BytesInfo]): The bytes to hash, or a BytesInfo namedtuple with precomputed values.
|
|
26
|
+
title (Optional[str], optional): Optional title for the table. Defaults to None.
|
|
27
|
+
title_justify (str, optional): Justification for the table title. Defaults to LEFT.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Table: A Rich Table object with the size and hash values.
|
|
31
|
+
"""
|
|
22
32
|
if isinstance(bytes_or_bytes_info, bytes):
|
|
23
33
|
bytes_info = compute_file_hashes(bytes_or_bytes_info)
|
|
24
34
|
else:
|
|
@@ -40,6 +50,15 @@ def bytes_hashes_table(
|
|
|
40
50
|
|
|
41
51
|
|
|
42
52
|
def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
53
|
+
"""
|
|
54
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for a given byte sequence.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
_bytes (bytes): The bytes to hash.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
BytesInfo: Namedtuple containing size, md5, sha1, and sha256 values.
|
|
61
|
+
"""
|
|
43
62
|
return BytesInfo(
|
|
44
63
|
size=len(_bytes),
|
|
45
64
|
md5=hashlib.md5(_bytes).hexdigest().upper(),
|
|
@@ -49,5 +68,14 @@ def compute_file_hashes(_bytes: bytes) -> BytesInfo:
|
|
|
49
68
|
|
|
50
69
|
|
|
51
70
|
def compute_file_hashes_for_file(file_path) -> BytesInfo:
|
|
71
|
+
"""
|
|
72
|
+
Compute the size, MD5, SHA1, and SHA256 hashes for the contents of a file.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
file_path (str): Path to the file to hash.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
BytesInfo: Namedtuple containing size, md5, sha1, and sha256 values for the file contents.
|
|
79
|
+
"""
|
|
52
80
|
with open(file_path, 'rb') as file:
|
|
53
81
|
return compute_file_hashes(file.read())
|
|
@@ -1,13 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Class to measure what we enounter as we iterate over every single match of a relatively simple byte level regex
|
|
3
|
-
(e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data
|
|
4
|
-
|
|
5
|
-
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
6
|
-
were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
|
|
7
|
-
that way?
|
|
8
|
-
|
|
9
|
-
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
10
|
-
"""
|
|
1
|
+
"""RegexMatchMetrics class."""
|
|
11
2
|
from collections import defaultdict
|
|
12
3
|
|
|
13
4
|
from yaralyzer.decoding.bytes_decoder import BytesDecoder
|
|
@@ -15,6 +6,18 @@ from yaralyzer.util.logging import log
|
|
|
15
6
|
|
|
16
7
|
|
|
17
8
|
class RegexMatchMetrics:
|
|
9
|
+
"""
|
|
10
|
+
Class to measure what we enounter as we iterate over every single match of a relatively simple byte level regex.
|
|
11
|
+
|
|
12
|
+
(e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data.
|
|
13
|
+
|
|
14
|
+
Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
|
|
15
|
+
were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
|
|
16
|
+
that way?
|
|
17
|
+
|
|
18
|
+
TODO: use @dataclass decorator https://realpython.com/python-data-classes/
|
|
19
|
+
"""
|
|
20
|
+
|
|
18
21
|
def __init__(self) -> None:
|
|
19
22
|
self.match_count = 0
|
|
20
23
|
self.bytes_matched = 0
|
yaralyzer/output/rich_console.py
CHANGED
|
@@ -81,12 +81,13 @@ YARALYZER_THEME = Theme(YARALYZER_THEME_DICT)
|
|
|
81
81
|
|
|
82
82
|
|
|
83
83
|
def console_width_possibilities():
|
|
84
|
+
"""Returns a list of possible console widths, the first being the current terminal width."""
|
|
84
85
|
# Subtract 2 from terminal cols just as a precaution in case things get weird
|
|
85
86
|
return [get_terminal_size().columns - 2, DEFAULT_CONSOLE_WIDTH]
|
|
86
87
|
|
|
87
88
|
|
|
88
89
|
def console_width() -> int:
|
|
89
|
-
"""Current width set in console obj"""
|
|
90
|
+
"""Current width set in console obj."""
|
|
90
91
|
return console._width or 40
|
|
91
92
|
|
|
92
93
|
|
|
@@ -104,7 +105,7 @@ console = Console(theme=YARALYZER_THEME, color_system='256', highlight=False, wi
|
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
def console_print_with_fallback(_string, style=None) -> None:
|
|
107
|
-
"""Fallback to regular print() if there's a Markup issue"""
|
|
108
|
+
"""Fallback to regular print() if there's a Markup issue."""
|
|
108
109
|
try:
|
|
109
110
|
console.print(_string, style=style)
|
|
110
111
|
except MarkupError:
|
|
@@ -113,15 +114,29 @@ def console_print_with_fallback(_string, style=None) -> None:
|
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
def theme_colors_with_prefix(prefix: str) -> List[Text]:
|
|
117
|
+
"""Return a list of (name, style) Text objects for all styles in the theme that start with 'prefix'."""
|
|
116
118
|
return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
|
|
117
119
|
|
|
118
120
|
|
|
119
121
|
def print_fatal_error_and_exit(error_message: str) -> None:
|
|
122
|
+
"""Print a fatal error message in a panel and exit."""
|
|
120
123
|
console.line(1)
|
|
121
124
|
print_header_panel(error_message, style='bold red reverse')
|
|
122
125
|
console.line(1)
|
|
123
126
|
exit()
|
|
124
127
|
|
|
125
128
|
|
|
126
|
-
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0,2)) -> None:
|
|
129
|
+
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0, 2)) -> None:
|
|
130
|
+
"""
|
|
131
|
+
Print a headline inside a styled Rich Panel to the console.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
headline (str): The text to display as the panel's headline.
|
|
135
|
+
style (str): The style to apply to the panel (e.g., color, bold, reverse).
|
|
136
|
+
expand (bool, optional): Whether the panel should expand to the full console width. Defaults to True.
|
|
137
|
+
padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to (0, 2).
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
None
|
|
141
|
+
"""
|
|
127
142
|
console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
"""Argument parsing for yaralyzer CLI tool."""
|
|
1
2
|
import logging
|
|
2
3
|
import re
|
|
3
4
|
import sys
|
|
@@ -78,8 +79,8 @@ source.add_argument('--regex-modifier', '-mod',
|
|
|
78
79
|
# Fine tuning
|
|
79
80
|
tuning = parser.add_argument_group(
|
|
80
81
|
'FINE TUNING',
|
|
81
|
-
"Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
|
|
82
|
-
"a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
|
|
82
|
+
"Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
|
|
83
|
+
"a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
|
|
83
84
|
"dramatically improve run times and only occasionally leads to a fatal lack of insight.")
|
|
84
85
|
|
|
85
86
|
tuning.add_argument('--maximize-width', action='store_true',
|
|
@@ -119,14 +120,14 @@ tuning.add_argument('--min-chardet-bytes',
|
|
|
119
120
|
type=int)
|
|
120
121
|
|
|
121
122
|
tuning.add_argument('--min-chardet-table-confidence',
|
|
122
|
-
help="minimum chardet confidence to display the encoding name/score in the character " +
|
|
123
|
+
help="minimum chardet confidence to display the encoding name/score in the character " +
|
|
123
124
|
"decection scores table",
|
|
124
125
|
default=YaralyzerConfig.DEFAULT_MIN_CHARDET_TABLE_CONFIDENCE,
|
|
125
126
|
metavar='PCT_CONFIDENCE',
|
|
126
127
|
type=int)
|
|
127
128
|
|
|
128
129
|
tuning.add_argument('--force-display-threshold',
|
|
129
|
-
help="encodings with chardet confidence below this number will neither be displayed nor " +
|
|
130
|
+
help="encodings with chardet confidence below this number will neither be displayed nor " +
|
|
130
131
|
"decoded in the decodings table",
|
|
131
132
|
default=EncodingDetector.force_display_threshold,
|
|
132
133
|
metavar='PCT_CONFIDENCE',
|
|
@@ -134,9 +135,9 @@ tuning.add_argument('--force-display-threshold',
|
|
|
134
135
|
choices=CONFIDENCE_SCORE_RANGE)
|
|
135
136
|
|
|
136
137
|
tuning.add_argument('--force-decode-threshold',
|
|
137
|
-
help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
|
|
138
|
-
"as to the likelihood some bytes were written with a particular encoding will cause " +
|
|
139
|
-
"the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
|
|
138
|
+
help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
|
|
139
|
+
"as to the likelihood some bytes were written with a particular encoding will cause " +
|
|
140
|
+
"the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
|
|
140
141
|
"configured encoding",
|
|
141
142
|
default=EncodingDetector.force_decode_threshold,
|
|
142
143
|
metavar='PCT_CONFIDENCE',
|
|
@@ -159,8 +160,8 @@ tuning.add_argument('--yara-stack-size',
|
|
|
159
160
|
# Export options
|
|
160
161
|
export = parser.add_argument_group(
|
|
161
162
|
'FILE EXPORT',
|
|
162
|
-
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
|
|
163
|
-
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
|
|
163
|
+
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
|
|
164
|
+
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
|
|
164
165
|
"Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
|
|
165
166
|
"filenames more unique and beautiful to their beholder.")
|
|
166
167
|
|
|
@@ -282,7 +283,7 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
282
283
|
|
|
283
284
|
def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
|
|
284
285
|
file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
|
|
285
|
-
args.output_basename
|
|
286
|
+
args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
|
|
286
287
|
args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
|
|
287
288
|
args.output_basename += ('_' + args.file_suffix) if args.file_suffix else ''
|
|
288
289
|
return path.join(args.output_dir, args.output_basename + f"__at_{args.invoked_at_str}")
|
yaralyzer/util/logging.py
CHANGED
|
@@ -26,7 +26,7 @@ Python log levels for reference:
|
|
|
26
26
|
"""
|
|
27
27
|
import logging
|
|
28
28
|
import sys
|
|
29
|
-
from os import
|
|
29
|
+
from os import path
|
|
30
30
|
from typing import Union
|
|
31
31
|
|
|
32
32
|
from rich.logging import RichHandler
|
|
@@ -37,7 +37,7 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def configure_logger(log_label: str) -> logging.Logger:
|
|
40
|
-
"""Set up a file or stream logger depending on the configuration"""
|
|
40
|
+
"""Set up a file or stream logger depending on the configuration."""
|
|
41
41
|
log_name = f"yaralyzer.{log_label}"
|
|
42
42
|
logger = logging.getLogger(log_name)
|
|
43
43
|
|
|
@@ -71,13 +71,13 @@ if YaralyzerConfig.LOG_DIR:
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def log_and_print(msg: str, log_level='INFO'):
|
|
74
|
-
"""Both print and log (at INFO level) a string"""
|
|
74
|
+
"""Both print and log (at INFO level) a string."""
|
|
75
75
|
log.log(logging.getLevelName(log_level), msg)
|
|
76
76
|
print(msg)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
def log_current_config():
|
|
80
|
-
"""Write current state of YaralyzerConfig object to the logs"""
|
|
80
|
+
"""Write current state of YaralyzerConfig object to the logs."""
|
|
81
81
|
msg = f"{YaralyzerConfig.__name__} current attributes:\n"
|
|
82
82
|
config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
|
|
83
83
|
|
|
@@ -88,14 +88,14 @@ def log_current_config():
|
|
|
88
88
|
|
|
89
89
|
|
|
90
90
|
def log_invocation() -> None:
|
|
91
|
-
"""Log the command used to launch the yaralyzer to the invocation log"""
|
|
91
|
+
"""Log the command used to launch the yaralyzer to the invocation log."""
|
|
92
92
|
msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
|
|
93
93
|
log.info(msg)
|
|
94
94
|
invocation_log.info(msg)
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
def log_argparse_result(args, label: str):
|
|
98
|
-
"""Logs the result of argparse"""
|
|
98
|
+
"""Logs the result of argparse."""
|
|
99
99
|
args_dict = vars(args)
|
|
100
100
|
log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
|
|
101
101
|
|
yaralyzer/yara/yara_match.py
CHANGED
|
@@ -97,7 +97,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
|
97
97
|
list_txt = Text('[', style='white')
|
|
98
98
|
|
|
99
99
|
if total_length > console_width() or len(element) > 3:
|
|
100
|
-
join_txt = Text(f"\n{indent}"
|
|
100
|
+
join_txt = Text(f"\n{indent}")
|
|
101
101
|
list_txt.append(join_txt).append(Text(f",{join_txt}").join(elements_txt))
|
|
102
102
|
list_txt += Text(f'\n{end_indent}]', style='white')
|
|
103
103
|
else:
|
|
@@ -4,12 +4,11 @@ Builds bare bones YARA rules to match strings and regex patterns. Example rule s
|
|
|
4
4
|
rule Just_A_Piano_Man {
|
|
5
5
|
meta:
|
|
6
6
|
author = "Tim"
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
strings:
|
|
8
|
+
$hilton_producer = /Scott.*Storch/
|
|
9
|
+
condition:
|
|
10
|
+
$hilton_producer
|
|
11
11
|
}
|
|
12
|
-
|
|
13
12
|
"""
|
|
14
13
|
import re
|
|
15
14
|
from typing import Optional
|
|
@@ -60,12 +59,12 @@ rule {rule_name} {{
|
|
|
60
59
|
|
|
61
60
|
|
|
62
61
|
def yara_rule_string(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
62
|
+
pattern: str,
|
|
63
|
+
pattern_type: str = REGEX,
|
|
64
|
+
rule_name: str = YARALYZE,
|
|
65
|
+
pattern_label: Optional[str] = PATTERN,
|
|
66
|
+
modifier: Optional[str] = None
|
|
67
|
+
) -> str:
|
|
69
68
|
"""Build a YARA rule string for a given pattern"""
|
|
70
69
|
if not (modifier is None or modifier in YARA_REGEX_MODIFIERS):
|
|
71
70
|
raise TypeError(f"Modifier '{modifier}' is not one of {YARA_REGEX_MODIFIERS}")
|
|
@@ -89,12 +88,12 @@ def yara_rule_string(
|
|
|
89
88
|
|
|
90
89
|
|
|
91
90
|
def build_yara_rule(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
91
|
+
pattern: str,
|
|
92
|
+
pattern_type: str = REGEX,
|
|
93
|
+
rule_name: str = YARALYZE,
|
|
94
|
+
pattern_label: Optional[str] = PATTERN,
|
|
95
|
+
modifier: Optional[str] = None
|
|
96
|
+
) -> yara.Rule:
|
|
98
97
|
"""Build a compiled YARA rule"""
|
|
99
98
|
rule_string = yara_rule_string(pattern, pattern_type, rule_name, pattern_label, modifier)
|
|
100
99
|
return yara.compile(source=rule_string)
|
yaralyzer/yaralyzer.py
CHANGED
|
@@ -1,15 +1,5 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Central class that handles setting up / compiling rules and reading binary data from files as needed.
|
|
3
|
-
Alternate constructors are provided depending on whether:
|
|
4
|
-
1. YARA rules are already compiled
|
|
5
|
-
2. YARA rules should be compiled from a string
|
|
6
|
-
3. YARA rules should be read from a file
|
|
7
|
-
4. YARA rules should be read from a directory of .yara files
|
|
8
|
-
|
|
9
|
-
The real action happens in the __rich__console__() dunder method.
|
|
10
|
-
"""
|
|
1
|
+
"""Main Yaralyzer class and alternate constructors."""
|
|
11
2
|
from os import path
|
|
12
|
-
from sys import exit
|
|
13
3
|
from typing import Iterator, List, Optional, Tuple, Union
|
|
14
4
|
|
|
15
5
|
import yara
|
|
@@ -35,19 +25,42 @@ YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't
|
|
|
35
25
|
|
|
36
26
|
# TODO: might be worth introducing a Scannable namedtuple or similar
|
|
37
27
|
class Yaralyzer:
|
|
28
|
+
"""
|
|
29
|
+
Central class that handles setting up / compiling rules and reading binary data from files as needed.
|
|
30
|
+
|
|
31
|
+
Alternate constructors are provided depending on whether:
|
|
32
|
+
|
|
33
|
+
* YARA rules are already compiled
|
|
34
|
+
|
|
35
|
+
* YARA rules should be compiled from a string
|
|
36
|
+
|
|
37
|
+
* YARA rules should be read from a file
|
|
38
|
+
|
|
39
|
+
* YARA rules should be read from a directory of .yara files
|
|
40
|
+
|
|
41
|
+
The real action happens in the __rich__console__() dunder method.
|
|
42
|
+
"""
|
|
43
|
+
|
|
38
44
|
def __init__(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
self,
|
|
46
|
+
rules: Union[str, yara.Rules],
|
|
47
|
+
rules_label: str,
|
|
48
|
+
scannable: Union[bytes, str],
|
|
49
|
+
scannable_label: Optional[str] = None,
|
|
50
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
51
|
+
) -> None:
|
|
46
52
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
53
|
+
Initialize a Yaralyzer instance for scanning binary data with YARA rules.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string (YARA rule source) or a pre-compiled yara.Rules object. If a string is provided, it will be compiled.
|
|
57
|
+
rules_label (str): Label to identify the ruleset in output and logs.
|
|
58
|
+
scannable (Union[bytes, str]): The data to scan. If bytes, raw data is scanned; if str, it is treated as a file path to load bytes from.
|
|
59
|
+
scannable_label (Optional[str], optional): Label for the scannable data. Required if scannable is bytes. If scannable is a file path, defaults to the file's basename.
|
|
60
|
+
highlight_style (str, optional): Style to use for highlighting matches in output. Defaults to YaralyzerConfig.HIGHLIGHT_STYLE.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
TypeError: If scannable is bytes and scannable_label is not provided.
|
|
51
64
|
"""
|
|
52
65
|
if 'args' not in vars(YaralyzerConfig):
|
|
53
66
|
YaralyzerConfig.set_default_args()
|
|
@@ -83,12 +96,12 @@ class Yaralyzer:
|
|
|
83
96
|
|
|
84
97
|
@classmethod
|
|
85
98
|
def for_rules_files(
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
"""Alternate constructor
|
|
99
|
+
cls,
|
|
100
|
+
yara_rules_files: List[str],
|
|
101
|
+
scannable: Union[bytes, str],
|
|
102
|
+
scannable_label: Optional[str] = None
|
|
103
|
+
) -> 'Yaralyzer':
|
|
104
|
+
"""Alternate constructor to load yara rules from files and label rules with the filenames."""
|
|
92
105
|
if not isinstance(yara_rules_files, list):
|
|
93
106
|
raise TypeError(f"{yara_rules_files} is not a list")
|
|
94
107
|
|
|
@@ -108,12 +121,12 @@ class Yaralyzer:
|
|
|
108
121
|
|
|
109
122
|
@classmethod
|
|
110
123
|
def for_rules_dirs(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
"""Alternate constructor that will load all .yara files in yara_rules_dir"""
|
|
124
|
+
cls,
|
|
125
|
+
dirs: List[str],
|
|
126
|
+
scannable: Union[bytes, str],
|
|
127
|
+
scannable_label: Optional[str] = None
|
|
128
|
+
) -> 'Yaralyzer':
|
|
129
|
+
"""Alternate constructor that will load all .yara files in yara_rules_dir."""
|
|
117
130
|
if not (isinstance(dirs, list) and all(path.isdir(dir) for dir in dirs)):
|
|
118
131
|
raise TypeError(f"'{dirs}' is not a list of valid directories")
|
|
119
132
|
|
|
@@ -122,16 +135,16 @@ class Yaralyzer:
|
|
|
122
135
|
|
|
123
136
|
@classmethod
|
|
124
137
|
def for_patterns(
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
"""Constructor taking regex pattern strings. Rules label defaults to patterns joined by comma"""
|
|
138
|
+
cls,
|
|
139
|
+
patterns: List[str],
|
|
140
|
+
patterns_type: str,
|
|
141
|
+
scannable: Union[bytes, str],
|
|
142
|
+
scannable_label: Optional[str] = None,
|
|
143
|
+
rules_label: Optional[str] = None,
|
|
144
|
+
pattern_label: Optional[str] = None,
|
|
145
|
+
regex_modifier: Optional[str] = None,
|
|
146
|
+
) -> 'Yaralyzer':
|
|
147
|
+
"""Constructor taking regex pattern strings. Rules label defaults to patterns joined by comma."""
|
|
135
148
|
rule_strings = []
|
|
136
149
|
|
|
137
150
|
for i, pattern in enumerate(patterns):
|
|
@@ -150,7 +163,7 @@ class Yaralyzer:
|
|
|
150
163
|
return cls(rules_string, rules_label, scannable, scannable_label)
|
|
151
164
|
|
|
152
165
|
def yaralyze(self) -> None:
|
|
153
|
-
"""Use YARA to find matches and then force decode them"""
|
|
166
|
+
"""Use YARA to find matches and then force decode them."""
|
|
154
167
|
console.print(self)
|
|
155
168
|
|
|
156
169
|
def match_iterator(self) -> Iterator[Tuple[BytesMatch, BytesDecoder]]:
|
|
@@ -169,6 +182,7 @@ class Yaralyzer:
|
|
|
169
182
|
self._print_non_matches()
|
|
170
183
|
|
|
171
184
|
def _yara_callback(self, data: dict):
|
|
185
|
+
"""YARA callback to handle matches and non-matches as they are discovered."""
|
|
172
186
|
if data['matches']:
|
|
173
187
|
self.matches.append(YaraMatch(data, self._panel_text()))
|
|
174
188
|
else:
|
|
@@ -177,7 +191,7 @@ class Yaralyzer:
|
|
|
177
191
|
return yara.CALLBACK_CONTINUE
|
|
178
192
|
|
|
179
193
|
def _print_non_matches(self) -> None:
|
|
180
|
-
"""Print info about the YARA rules that didn't match the bytes"""
|
|
194
|
+
"""Print info about the YARA rules that didn't match the bytes."""
|
|
181
195
|
if len(self.non_matches) == 0:
|
|
182
196
|
return
|
|
183
197
|
|
|
@@ -186,7 +200,7 @@ class Yaralyzer:
|
|
|
186
200
|
# Only show the non matches if there were valid ones, otherwise just show the number
|
|
187
201
|
if len(self.matches) == 0:
|
|
188
202
|
non_match_desc = f" did not match any of the {len(self.non_matches)} yara rules"
|
|
189
|
-
console.print(dim_if(self.__text__()
|
|
203
|
+
console.print(dim_if(self.__text__() + Text(non_match_desc, style='grey'), True))
|
|
190
204
|
return
|
|
191
205
|
|
|
192
206
|
non_match_desc = f" did not match the other {len(self.non_matches)} yara rules"
|
|
@@ -194,21 +208,21 @@ class Yaralyzer:
|
|
|
194
208
|
console.print(Padding(Text(', ', 'white').join(non_matches_text), (0, 0, 1, 4)))
|
|
195
209
|
|
|
196
210
|
def _panel_text(self) -> Text:
|
|
197
|
-
"""Inverted colors for the panel at the top of the match section of the output"""
|
|
211
|
+
"""Inverted colors for the panel at the top of the match section of the output."""
|
|
198
212
|
styles = [reverse_color(YARALYZER_THEME.styles[f"yara.{s}"]) for s in ('scanned', 'rules')]
|
|
199
213
|
return self.__text__(*styles)
|
|
200
214
|
|
|
201
215
|
def _filename_string(self):
|
|
202
|
-
"""The string to use when exporting this yaralyzer to SVG/HTML/etc"""
|
|
216
|
+
"""The string to use when exporting this yaralyzer to SVG/HTML/etc."""
|
|
203
217
|
return str(self).replace('>', '').replace('<', '').replace(' ', '_')
|
|
204
218
|
|
|
205
219
|
def __text__(self, byte_style: str = 'yara.scanned', rule_style: str = 'yara.rules') -> Text:
|
|
206
|
-
"""Text representation of this YARA scan (__text__() was taken)"""
|
|
220
|
+
"""Text representation of this YARA scan (__text__() was taken)."""
|
|
207
221
|
txt = Text('').append(self.scannable_label, style=byte_style or 'yara.scanned')
|
|
208
222
|
return txt.append(' scanned with <').append(self.rules_label, style=rule_style or 'yara.rules').append('>')
|
|
209
223
|
|
|
210
224
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
211
|
-
"""Does the stuff. TODO: not the best place to put the core logic"""
|
|
225
|
+
"""Does the stuff. TODO: not the best place to put the core logic."""
|
|
212
226
|
yield bytes_hashes_table(self.bytes, self.scannable_label)
|
|
213
227
|
|
|
214
228
|
for _bytes_match, bytes_decoder in self.match_iterator():
|
|
@@ -216,4 +230,5 @@ class Yaralyzer:
|
|
|
216
230
|
yield attempt
|
|
217
231
|
|
|
218
232
|
def __str__(self) -> str:
|
|
233
|
+
"""Plain text (no rich colors) representation of the scan for display."""
|
|
219
234
|
return self.__text__().plain
|
|
@@ -1,31 +1,33 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream
|
|
3
|
+
Version: 1.0.8
|
|
4
|
+
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
7
|
-
Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,visualization,yara
|
|
7
|
+
Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
|
|
8
8
|
Author: Michel de Cryptadamus
|
|
9
9
|
Author-email: michel@cryptadamus.com
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
14
14
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
15
15
|
Classifier: Programming Language :: Python
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
21
|
Classifier: Topic :: Artistic Software
|
|
21
22
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
22
23
|
Classifier: Topic :: Security
|
|
23
24
|
Requires-Dist: chardet (>=5.0.0,<6.0.0)
|
|
24
|
-
Requires-Dist: python-dotenv (>=
|
|
25
|
+
Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
|
|
25
26
|
Requires-Dist: rich (>=14.1.0,<15.0.0)
|
|
26
27
|
Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
|
|
27
28
|
Requires-Dist: yara-python (>=4.5.4,<5.0.0)
|
|
28
|
-
Project-URL:
|
|
29
|
+
Project-URL: Changelog, https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md
|
|
30
|
+
Project-URL: Documentation, https://michelcrypt4d4mus.github.io/yaralyzer/
|
|
29
31
|
Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
|
|
30
32
|
Description-Content-Type: text/markdown
|
|
31
33
|
|
|
@@ -117,6 +119,9 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
117
119
|
do_stuff()
|
|
118
120
|
```
|
|
119
121
|
|
|
122
|
+
#### API Documentation
|
|
123
|
+
Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
|
|
124
|
+
|
|
120
125
|
# Example Output
|
|
121
126
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
122
127
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
.yaralyzer.example,sha256=z3_mk41xxm0Pr_8MGM7AKQG0xEFRtGcyJLboMuelRp4,3504
|
|
2
|
+
CHANGELOG.md,sha256=3PiqI0fAHynELKgzQFf0R7iQAEH47tXR6duTWiZ69nQ,3296
|
|
3
|
+
LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
4
|
+
yaralyzer/__init__.py,sha256=gqqIH5jb-rR9UFi-kETJmHlY4L-RGdMFahS2j24TKXU,2795
|
|
5
|
+
yaralyzer/bytes_match.py,sha256=gCpRhHQyudH2Hqxdc5HVKcZ-E499kN-ckScG3i9S_lo,10934
|
|
6
|
+
yaralyzer/config.py,sha256=CNpTnNaJBuDcS4jK-EOekiYtA3FIGa27SPzZn0YVDis,4483
|
|
7
|
+
yaralyzer/decoding/bytes_decoder.py,sha256=vdJrGTYzR842fwY9nVqVu4pIujia3R9skwSTlNCUXaY,9859
|
|
8
|
+
yaralyzer/decoding/decoding_attempt.py,sha256=lO7ihuvkxZZ16Nl5KS6WTb4FfmpbMB-ogGVw6OeUDw8,8564
|
|
9
|
+
yaralyzer/encoding_detection/character_encodings.py,sha256=KklTVt9YpPtMYUp-XjCk32M2te1k1yJW12QpytkyRd4,5465
|
|
10
|
+
yaralyzer/encoding_detection/encoding_assessment.py,sha256=SszGxFXGdoAI35Ba2bjSTLoTg6mhTyVZKd-h3qQVqjo,2505
|
|
11
|
+
yaralyzer/encoding_detection/encoding_detector.py,sha256=f43Db4kFuqBNtKdzwEYlxpX4BXY3yhcINwX10PF3bMw,4991
|
|
12
|
+
yaralyzer/helpers/bytes_helper.py,sha256=JV0xUMEf1HZO7VTts1G5lyJy7aon6OF6RarHyzi8HTA,10155
|
|
13
|
+
yaralyzer/helpers/dict_helper.py,sha256=rhyu-xlpl4yevXdLZUIgVwap0b57O9I3DNAEv8MfTlI,186
|
|
14
|
+
yaralyzer/helpers/file_helper.py,sha256=iieakYcZ4xgoKdy1CbHpYdqmUopwLpcfXdq2V0QVJpo,1258
|
|
15
|
+
yaralyzer/helpers/list_helper.py,sha256=zX6VzJDbnyxuwQpth5Mc7k7yeJytqWPzpo1v5nXCMtE,394
|
|
16
|
+
yaralyzer/helpers/rich_text_helper.py,sha256=b5Pong_mmUlwFYfM91Dvbfsg6GdQG-Lw9fCXSQMAy2I,4266
|
|
17
|
+
yaralyzer/helpers/string_helper.py,sha256=8XsvYlKn-fGhKihfJBOG6mqi5nV_8LM-IWgHzvkRgCc,933
|
|
18
|
+
yaralyzer/output/decoding_attempts_table.py,sha256=x6AViJqAj7ept92OXWl9-PVk8MyBSyYt62mUgJjsP7U,4040
|
|
19
|
+
yaralyzer/output/file_export.py,sha256=5voZi5nujj1YBYo3WinxiPvArrDJrJHC8o1ogPvUPdA,2970
|
|
20
|
+
yaralyzer/output/file_hashes_table.py,sha256=xHk18Xs6Kx6Wf15Y7MUyC5Ndjf-qvzpv8Kd64v2KsVo,2563
|
|
21
|
+
yaralyzer/output/regex_match_metrics.py,sha256=_3G4xhK0os1Ll8oNS9alaJciWRtdUFxu2oGYsNUHLy4,3070
|
|
22
|
+
yaralyzer/output/rich_console.py,sha256=2GHMgFYZrpZ_Q-8AAiAgQXEHqvGSE0L_1BwOZXw_dw0,5009
|
|
23
|
+
yaralyzer/util/argument_parser.py,sha256=XRAS6eraCdrGZRK1ybKppR3Tr7razbg4MnMD2vDq8Po,12971
|
|
24
|
+
yaralyzer/util/logging.py,sha256=BKVDDIy1eh3vdZ4aoYdemukCatxB2kYNuMd0RnYDbT8,4244
|
|
25
|
+
yaralyzer/yara/yara_match.py,sha256=F_1tn1ynbTwzOWSblis02DlVunn-vY3IPX8QjJhukMs,5118
|
|
26
|
+
yaralyzer/yara/yara_rule_builder.py,sha256=P7NPzMMz03V1rDH3PMwb3VAbpBFD-oLNkiCIJAtLa7A,2990
|
|
27
|
+
yaralyzer/yaralyzer.py,sha256=FJsyThTDWhROfQm7FoWpCJ0plfFh5xq5HgikJJ_2t2w,10209
|
|
28
|
+
yaralyzer-1.0.8.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
29
|
+
yaralyzer-1.0.8.dist-info/METADATA,sha256=onnGZCtkMOq5mlgYB8UkB_0woJu8FtN7fR52w8Zyxu0,11107
|
|
30
|
+
yaralyzer-1.0.8.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
31
|
+
yaralyzer-1.0.8.dist-info/entry_points.txt,sha256=7LnLJrNTfql0vuctjRWwp_ZD-BYvtv9ENVipdjuT7XI,136
|
|
32
|
+
yaralyzer-1.0.8.dist-info/RECORD,,
|