yaralyzer 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ """
2
+ Functions to export Yaralyzer results to various file formats.
3
+ """
4
+ import json
5
+ import time
6
+ from os import path
7
+ from typing import Callable, Optional
8
+
9
+ from rich.terminal_theme import TerminalTheme
10
+
11
+ from yaralyzer.util.logging import log_and_print
12
+ from yaralyzer.yaralyzer import Yaralyzer
13
+
14
+ # TerminalThemes are used when saving SVGS. This one just swaps white for black in DEFAULT_TERMINAL_THEME
15
+ YARALYZER_TERMINAL_THEME = TerminalTheme(
16
+ (0, 0, 0),
17
+ (255, 255, 255),
18
+ [
19
+ (0, 0, 0),
20
+ (128, 0, 0),
21
+ (0, 128, 0),
22
+ (128, 128, 0),
23
+ (0, 0, 128),
24
+ (128, 0, 128),
25
+ (0, 128, 128),
26
+ (192, 192, 192),
27
+ ],
28
+ [
29
+ (128, 128, 128),
30
+ (255, 0, 0),
31
+ (0, 255, 0),
32
+ (255, 255, 0),
33
+ (0, 0, 255),
34
+ (255, 0, 255),
35
+ (0, 255, 255),
36
+ (255, 255, 255),
37
+ ],
38
+ )
39
+
40
+ # Keys are export function names, values are options we always want to use w/that export function
41
+ # Not meant for direct access; instead call invoke_rich_export().
42
+ _EXPORT_KWARGS = {
43
+ 'save_html': {
44
+ 'inline_styles': True,
45
+ 'theme': YARALYZER_TERMINAL_THEME,
46
+ },
47
+ 'save_svg': {
48
+ 'theme': YARALYZER_TERMINAL_THEME,
49
+ },
50
+ 'save_text': {
51
+ 'styles': True,
52
+ },
53
+ }
54
+
55
+
56
+ def export_json(yaralyzer: Yaralyzer, output_basepath: Optional[str]) -> str:
57
+ """
58
+ Export YARA scan results to JSON.
59
+
60
+ Args:
61
+ yaralyzer (Yaralyzer): The `Yaralyzer` object containing the results to export.
62
+ output_basepath (Optional[str]): Base path to write output to. Should have no file extension.
63
+
64
+ Returns:
65
+ str: Path data was exported to.
66
+ """
67
+ output_path = f"{output_basepath or 'yara_matches'}.json"
68
+
69
+ matches_data = [
70
+ bytes_match.to_json()
71
+ for bytes_match, _bytes_decoder in yaralyzer.match_iterator()
72
+ ]
73
+
74
+ with open(output_path, 'w') as f:
75
+ json.dump(matches_data, f, indent=4)
76
+
77
+ log_and_print(f"YARA matches exported to JSON file: '{output_path}'")
78
+ return output_path
79
+
80
+
81
+ def invoke_rich_export(export_method: Callable, output_file_basepath: str) -> str:
82
+ """
83
+ Announce the export, perform the export, and announce completion.
84
+
85
+ Args:
86
+ export_method (Callable): Usually a `Rich.console.save_whatever()` method
87
+ output_file_basepath (str): Path to write output to. Should have no file extension.
88
+
89
+ Returns:
90
+ str: Path data was exported to.
91
+ """
92
+ method_name = export_method.__name__
93
+ extname = 'txt' if method_name == 'save_text' else method_name.split('_')[-1]
94
+ output_file_path = f"{output_file_basepath}.{extname}"
95
+
96
+ if method_name not in _EXPORT_KWARGS:
97
+ raise RuntimeError(f"{method_name} is not a valid Rich.console export method!")
98
+
99
+ kwargs = _EXPORT_KWARGS[method_name].copy()
100
+ kwargs.update({'clear': False})
101
+
102
+ if 'svg' in method_name:
103
+ kwargs.update({'title': path.basename(output_file_path)})
104
+
105
+ # Invoke it
106
+ log_and_print(f"Invoking Rich.console.{method_name}('{output_file_path}') with kwargs: '{kwargs}'...")
107
+ start_time = time.perf_counter()
108
+ export_method(output_file_path, **kwargs)
109
+ elapsed_time = time.perf_counter() - start_time
110
+ log_and_print(f"'{output_file_path}' written in {elapsed_time:02f} seconds")
111
+ return output_file_path
@@ -0,0 +1,82 @@
1
+ """
2
+ Methods for computing and displaying various file hashes.
3
+ """
4
+ import hashlib
5
+ from collections import namedtuple
6
+ from typing import Optional, Union
7
+
8
+ from rich.table import Column, Table
9
+
10
+ from yaralyzer.helpers.rich_text_helper import LEFT, size_text
11
+ from yaralyzer.output.rich_console import GREY
12
+
13
+ BytesInfo = namedtuple('BytesInfo', ['size', 'md5', 'sha1', 'sha256'])
14
+
15
+
16
+ def bytes_hashes_table(
17
+ bytes_or_bytes_info: Union[bytes, BytesInfo],
18
+ title: Optional[str] = None,
19
+ title_justify: str = LEFT
20
+ ) -> Table:
21
+ """
22
+ Build a Rich `Table` displaying the size, MD5, SHA1, and SHA256 hashes of a byte sequence.
23
+
24
+ Args:
25
+ bytes_or_bytes_info (Union[bytes, BytesInfo]): The `bytes` to hash, or a `BytesInfo`
26
+ namedtuple with precomputed values.
27
+ title (Optional[str], optional): Optional title for the table. Defaults to `None`.
28
+ title_justify (str, optional): Justification for the table title. Defaults to `"LEFT"`.
29
+
30
+ Returns:
31
+ Table: A Rich `Table` object with the size and hash values.
32
+ """
33
+ if isinstance(bytes_or_bytes_info, bytes):
34
+ bytes_info = compute_file_hashes(bytes_or_bytes_info)
35
+ else:
36
+ bytes_info = bytes_or_bytes_info
37
+
38
+ table = Table(
39
+ 'Size',
40
+ Column(size_text(bytes_info.size)),
41
+ title=f" {title} Bytes Info" if title else None,
42
+ title_style=GREY,
43
+ title_justify=title_justify
44
+ )
45
+ table.add_row('MD5', bytes_info.md5)
46
+ table.add_row('SHA1', bytes_info.sha1)
47
+ table.add_row('SHA256', bytes_info.sha256)
48
+ table.columns[1].style = 'orange3'
49
+ table.columns[1].header_style = 'bright_cyan'
50
+ return table
51
+
52
+
53
+ def compute_file_hashes(_bytes: bytes) -> BytesInfo:
54
+ """
55
+ Compute the size, MD5, SHA1, and SHA256 hashes for a given byte sequence.
56
+
57
+ Args:
58
+ _bytes (bytes): The `bytes` to hash.
59
+
60
+ Returns:
61
+ BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values.
62
+ """
63
+ return BytesInfo(
64
+ size=len(_bytes),
65
+ md5=hashlib.md5(_bytes).hexdigest().upper(),
66
+ sha1=hashlib.sha1(_bytes).hexdigest().upper(),
67
+ sha256=hashlib.sha256(_bytes).hexdigest().upper()
68
+ )
69
+
70
+
71
+ def compute_file_hashes_for_file(file_path) -> BytesInfo:
72
+ """
73
+ Compute the size, MD5, SHA1, and SHA256 hashes for the contents of a file.
74
+
75
+ Args:
76
+ file_path (str): Path to the file to hash.
77
+
78
+ Returns:
79
+ BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values for the file contents.
80
+ """
81
+ with open(file_path, 'rb') as file:
82
+ return compute_file_hashes(file.read())
@@ -0,0 +1,97 @@
1
+ """
2
+ `RegexMatchMetrics` class.
3
+ """
4
+ from collections import defaultdict
5
+
6
+ from yaralyzer.decoding.bytes_decoder import BytesDecoder
7
+ from yaralyzer.util.logging import log
8
+
9
+
10
+ class RegexMatchMetrics:
11
+ """
12
+ Class to measure what we enounter as we iterate over all matches of a relatively simple byte level regex.
13
+
14
+ Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
15
+ were some encodings have a higher pct of success than others (indicating part of our mystery data might be
16
+ encoded that way?
17
+
18
+ Example:
19
+ "Find bytes between quotes" against a relatively large pool of close to random encrypted binary data.
20
+
21
+ Attributes:
22
+ match_count (int): Total number of matches found.
23
+ bytes_matched (int): Total number of bytes matched across all matches.
24
+ matches_decoded (int): Number of matches where we were able to decode at least some of the matched bytes.
25
+ easy_decode_count (int): Number of matches where we were able to decode the matched bytes without forcing.
26
+ forced_decode_count (int): Number of matches where we were only able to decode the matched bytes by forcing.
27
+ undecodable_count (int): Number of matches where we were unable to decode any of the matched bytes.
28
+ skipped_matches_lengths (defaultdict): Dictionary mapping lengths of skipped matches to their counts.
29
+ bytes_match_objs (list): List of `BytesMatch` objects for all matches encountered.
30
+ per_encoding_stats (defaultdict): Dictionary mapping encoding names to their respective `RegexMatchMetrics`.
31
+
32
+ TODO: use @dataclass decorator https://realpython.com/python-data-classes/
33
+ """
34
+
35
+ def __init__(self) -> None:
36
+ self.match_count = 0
37
+ self.bytes_matched = 0
38
+ self.matches_decoded = 0
39
+ self.easy_decode_count = 0
40
+ self.forced_decode_count = 0
41
+ self.undecodable_count = 0
42
+ self.skipped_matches_lengths = defaultdict(lambda: 0)
43
+ self.bytes_match_objs = [] # Keep a copy of all matches in memory
44
+ self.per_encoding_stats = defaultdict(lambda: RegexMatchMetrics())
45
+
46
+ def num_matches_skipped_for_being_empty(self) -> int:
47
+ """Number of matches skipped for being empty (0 length)."""
48
+ return self.skipped_matches_lengths[0]
49
+
50
+ def num_matches_skipped_for_being_too_big(self) -> int:
51
+ """Number of matches skipped for being too big to decode."""
52
+ return sum({k: v for k, v in self.skipped_matches_lengths.items() if k > 0}.values())
53
+
54
+ def tally_match(self, decoder: BytesDecoder) -> None:
55
+ """
56
+ Tally statistics from a `BytesDecoder` after it has processed a match.
57
+
58
+ Args:
59
+ decoder (BytesDecoder): The `BytesDecoder` that processed a match.
60
+ """
61
+ log.debug(f"Tallying {decoder.bytes_match} ({len(decoder.decodings)} decodings)")
62
+ self.match_count += 1
63
+ self.bytes_matched += decoder.bytes_match.match_length
64
+ self.bytes_match_objs.append(decoder.bytes_match)
65
+
66
+ if not decoder.bytes_match.is_decodable():
67
+ self.skipped_matches_lengths[decoder.bytes_match.match_length] += 1
68
+
69
+ for decoding_attempt in decoder.decodings:
70
+ log.debug(f"Tallying decoding for {decoding_attempt.encoding}")
71
+ encoding_stats = self.per_encoding_stats[decoding_attempt.encoding]
72
+
73
+ if decoding_attempt.failed_to_decode:
74
+ encoding_stats.undecodable_count += 1
75
+ else:
76
+ encoding_stats.match_count += 1
77
+ encoding_stats.matches_decoded += 1
78
+
79
+ if decoding_attempt.was_force_decoded:
80
+ encoding_stats.forced_decode_count += 1
81
+ else:
82
+ encoding_stats.easy_decode_count += 1
83
+
84
+ def __eq__(self, other):
85
+ for k, v in vars(self).items():
86
+ if v != vars(other)[k]:
87
+ return False
88
+
89
+ return True
90
+
91
+ def __str__(self):
92
+ return f"<matches: {self.match_count}, " + \
93
+ f"bytes: {self.bytes_matched}, " + \
94
+ f"decoded: {self.matches_decoded} " + \
95
+ f"too_big: {self.num_matches_skipped_for_being_too_big()}, " + \
96
+ f"empty: {self.num_matches_skipped_for_being_empty()}>" + \
97
+ f"empty: {self.undecodable_count}>"
@@ -0,0 +1,114 @@
1
+ """
2
+ Variables and methods for working with Rich text output.
3
+ """
4
+ from shutil import get_terminal_size
5
+ from typing import List
6
+
7
+ from rich.console import Console
8
+ from rich.errors import MarkupError
9
+ from rich.style import Style
10
+ from rich.text import Text
11
+ from rich.theme import Theme
12
+
13
+ from yaralyzer.config import is_env_var_set_and_not_false, is_invoked_by_pytest
14
+
15
+ # Colors
16
+ ALERT_STYLE = 'error' # Regex Capture used when extracting quoted chunks of bytes
17
+ BYTES = 'color(100) dim'
18
+ BYTES_NO_DIM = 'color(100)'
19
+ BYTES_BRIGHTEST = 'color(220)'
20
+ BYTES_BRIGHTER = 'orange1'
21
+ BYTES_HIGHLIGHT = 'color(136)'
22
+ DANGER_HEADER = 'color(88) on white' # Red
23
+ DARK_GREY = 'color(236)'
24
+ GREY = 'color(241)'
25
+ GREY_ADDRESS = 'color(238)'
26
+ PEACH = 'color(215)'
27
+
28
+ # Theme used by main console
29
+ YARALYZER_THEME_DICT = {
30
+ # colors
31
+ 'dark_orange': 'color(58)',
32
+ 'grey': GREY,
33
+ 'grey.dark': DARK_GREY,
34
+ 'grey.dark_italic': f"{DARK_GREY} italic",
35
+ 'grey.darker_italic': 'color(8) dim italic',
36
+ 'grey.darkest': 'color(235) dim',
37
+ 'grey.light': 'color(248)',
38
+ 'off_white': 'color(245)',
39
+ 'zero_bytes': 'color(20)',
40
+ # data types
41
+ 'encoding': 'color(158) underline bold',
42
+ 'encoding.header': 'color(158) bold',
43
+ 'encoding.language': 'dark_green italic',
44
+ 'number': 'cyan',
45
+ 'regex': 'color(218) dim',
46
+ 'no_attempt': "color(60) dim italic",
47
+ # design elements
48
+ 'decode.section_header': 'color(100) reverse',
49
+ 'decode.subheading': PEACH,
50
+ 'decode.subheading_2': 'color(215) dim italic',
51
+ 'decode.table_header': 'color(101) bold',
52
+ 'headline': 'bold white underline',
53
+ # bytes
54
+ 'ascii': 'color(58)',
55
+ 'ascii_unprintable': 'color(131)',
56
+ 'bytes': BYTES,
57
+ 'bytes.title_dim': 'orange1 dim',
58
+ 'bytes.title': BYTES_BRIGHTER,
59
+ 'bytes.decoded': BYTES_BRIGHTEST,
60
+ # yara
61
+ 'matched_rule': 'on bright_black bold',
62
+ 'yara.key': DARK_GREY,
63
+ 'yara.match_var': 'color(156) italic',
64
+ 'yara.string': 'white',
65
+ 'yara.date': 'color(216)',
66
+ 'yara.url': 'color(220)',
67
+ 'yara.int': 'color(45)',
68
+ 'yara.hex': 'color(98)',
69
+ 'yara.scanned': Style(color='yellow', underline=True, bold=True),
70
+ 'yara.rules': Style(color='color(135)', underline=True, bold=True),
71
+ # error log events
72
+ 'error': 'bright_red',
73
+ }
74
+
75
+ YARALYZER_THEME = Theme(YARALYZER_THEME_DICT)
76
+ DEFAULT_CONSOLE_WIDTH = 160
77
+
78
+
79
+ def console_width_possibilities():
80
+ """Returns a list of possible console widths, the first being the current terminal width."""
81
+ # Subtract 2 from terminal cols just as a precaution in case things get weird
82
+ return [get_terminal_size().columns - 2, DEFAULT_CONSOLE_WIDTH]
83
+
84
+
85
+ # Maximize output width if YARALYZER_MAXIMIZE_WIDTH is set (also can changed with --maximize-width option)
86
+ if is_invoked_by_pytest():
87
+ CONSOLE_WIDTH = DEFAULT_CONSOLE_WIDTH
88
+ elif is_env_var_set_and_not_false('YARALYZER_MAXIMIZE_WIDTH'):
89
+ CONSOLE_WIDTH = max(console_width_possibilities())
90
+ else:
91
+ CONSOLE_WIDTH = min(console_width_possibilities())
92
+
93
+ # Many bytes take 4 chars to print (e.g. '\xcc') so this is the max bytes we can safely print in a line
94
+ CONSOLE_PRINT_BYTE_WIDTH = int(CONSOLE_WIDTH / 4.0)
95
+ console = Console(theme=YARALYZER_THEME, color_system='256', highlight=False, width=CONSOLE_WIDTH)
96
+
97
+
98
+ def console_print_with_fallback(_string: Text | str, style=None) -> None:
99
+ """`rich.console.print()` with fallback to regular `print()` if there's a Rich Markup issue."""
100
+ try:
101
+ console.print(_string, style=style)
102
+ except MarkupError:
103
+ console.print(f"Hit a bracket issue with rich.console printing, defaulting to plain print", style='warn')
104
+ print(_string.plain if isinstance(_string, Text) else _string)
105
+
106
+
107
+ def console_width() -> int:
108
+ """Current width set in `console` object."""
109
+ return console._width or 40
110
+
111
+
112
+ def theme_colors_with_prefix(prefix: str) -> List[Text]:
113
+ """Return a list of (name, style) `Text` objects for all styles in the theme that start with `prefix`."""
114
+ return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]