yaralyzer 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ """
2
+ Helper methods to work with bytes.
3
+ """
4
+ import re
5
+ from io import StringIO
6
+ from sys import byteorder
7
+
8
+ from rich.console import Console
9
+ from rich.markup import escape
10
+ from rich.padding import Padding
11
+ from rich.text import Text
12
+
13
+ from yaralyzer.bytes_match import BytesMatch
14
+ from yaralyzer.config import YaralyzerConfig
15
+ from yaralyzer.encoding_detection.character_encodings import NEWLINE_BYTE, encoding_width
16
+ from yaralyzer.helpers.rich_text_helper import newline_join
17
+ from yaralyzer.output.rich_console import (BYTES, BYTES_BRIGHTER, BYTES_BRIGHTEST,
18
+ BYTES_HIGHLIGHT, GREY, console, console_width)
19
+ from yaralyzer.util.logging import log
20
+
21
+ HEX_CHARS_PER_GROUP = 8
22
+ SUBTABLE_MAX_WIDTH = console_width() - 35 - 5 # 35 for first 3 cols, 5 for in between hex and ascii
23
+ HEX_UNIT_LENGTH = (HEX_CHARS_PER_GROUP * 3) + HEX_CHARS_PER_GROUP + 4 # 4 for padding between groups
24
+ HEX_GROUPS_PER_LINE = divmod(SUBTABLE_MAX_WIDTH, HEX_UNIT_LENGTH)[0]
25
+ HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE
26
+
27
+
28
+ def get_bytes_before_and_after_match(_bytes: bytes, match: re.Match, num_before=None, num_after=None) -> bytes:
29
+ """
30
+ Get bytes before and after a regex match within a byte sequence.
31
+
32
+ Args:
33
+ _bytes (bytes): The full byte sequence.
34
+ match (re.Match): The regex `Match` object.
35
+ num_before (int, optional): Number of bytes before the match to include. Defaults to configured value.
36
+ num_after (int, optional): Number of bytes after the match to include. Defaults to either configured value
37
+ or the `num_before` arg value.
38
+
39
+ Returns:
40
+ bytes: The surrounding bytes including the match.
41
+ """
42
+ return get_bytes_surrounding_range(_bytes, match.start(), match.end(), num_before, num_after)
43
+
44
+
45
+ def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num_before=None, num_after=None) -> bytes:
46
+ """
47
+ Get bytes surrounding a specified range in a byte sequence.
48
+
49
+ Args:
50
+ _bytes (bytes): The full byte sequence.
51
+ start_idx (int): Start index of the range.
52
+ end_idx (int): End index of the range.
53
+ num_before (int, optional): Number of bytes before the range. Defaults to configured value.
54
+ num_after (int, optional): Number of bytes after the range. Defaults to configured value.
55
+
56
+ Returns:
57
+ bytes: The surrounding bytes including the range.
58
+ """
59
+ num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
60
+ num_before = num_before or YaralyzerConfig.args.surrounding_bytes
61
+ start_idx = max(start_idx - num_before, 0)
62
+ end_idx = min(end_idx + num_after, len(_bytes))
63
+ return _bytes[start_idx:end_idx]
64
+
65
+
66
+ def clean_byte_string(bytes_array: bytes) -> str:
67
+ r"""
68
+ Return a clean string representation of bytes, without Python's b'' or b"" wrappers.
69
+ e.g. '\x80\nx44' instead of "b'\x80\nx44'".
70
+
71
+ Args:
72
+ bytes_array (bytes): The bytes to convert.
73
+
74
+ Returns:
75
+ str: Clean string representation of the bytes.
76
+ """
77
+ byte_printer = Console(file=StringIO())
78
+ byte_printer.out(bytes_array, end='')
79
+ bytestr = byte_printer.file.getvalue()
80
+
81
+ if bytestr.startswith("b'"):
82
+ bytestr = bytestr.removeprefix("b'").removesuffix("'")
83
+ elif bytestr.startswith('b"'):
84
+ bytestr = bytestr.removeprefix('b"').removesuffix('"')
85
+ else:
86
+ raise RuntimeError(f"Unexpected byte string {bytestr}")
87
+
88
+ return bytestr
89
+
90
+
91
+ def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
92
+ """
93
+ Return a rich `Text` object of raw bytes, highlighting the matched bytes.
94
+
95
+ Args:
96
+ _bytes (bytes): The full byte sequence.
97
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
98
+
99
+ Returns:
100
+ Text: Rich Text object with highlighted match.
101
+ """
102
+ surrounding_bytes_str = clean_byte_string(_bytes)
103
+ highlighted_bytes_str = clean_byte_string(bytes_match.bytes)
104
+ highlighted_bytes_str_length = len(highlighted_bytes_str)
105
+ highlight_idx = _find_str_rep_of_bytes(surrounding_bytes_str, highlighted_bytes_str, bytes_match)
106
+
107
+ txt = Text(surrounding_bytes_str[:highlight_idx], style=GREY)
108
+ matched_bytes_str = surrounding_bytes_str[highlight_idx:highlight_idx + highlighted_bytes_str_length]
109
+ txt.append(matched_bytes_str, style=bytes_match.highlight_style)
110
+ txt.append(surrounding_bytes_str[highlight_idx + highlighted_bytes_str_length:], style=GREY)
111
+ return txt
112
+
113
+
114
+ def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
115
+ """
116
+ Return a hexadecimal view of raw bytes, highlighting the matched bytes.
117
+
118
+ Args:
119
+ _bytes (bytes): The full byte sequence.
120
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
121
+
122
+ Returns:
123
+ Text: Rich Text object with highlighted match in hex view.
124
+ """
125
+ hex_str = hex_text(_bytes)
126
+ highlight_start_idx = bytes_match.highlight_start_idx * 3
127
+ highlight_end_idx = bytes_match.highlight_end_idx * 3
128
+ hex_str.stylize(bytes_match.highlight_style, highlight_start_idx, highlight_end_idx)
129
+ lines = hex_str.wrap(console, HEX_CHARS_PER_LINE * 3)
130
+ return newline_join([Text(' ').join(line.wrap(console, HEX_CHARS_PER_GROUP * 3)) for line in lines])
131
+
132
+
133
+ def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
134
+ """
135
+ Return an ASCII view of raw bytes, highlighting the matched bytes.
136
+
137
+ Args:
138
+ _bytes (bytes): The full byte sequence.
139
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
140
+
141
+ Returns:
142
+ Text: Rich Text object with highlighted match in ASCII view.
143
+ """
144
+ txt = Text('', style=BYTES)
145
+
146
+ for i, b in enumerate(_bytes):
147
+ if i < bytes_match.highlight_start_idx or i > bytes_match.highlight_end_idx:
148
+ style1 = 'color(246)'
149
+ style2 = 'color(234)'
150
+ else:
151
+ style1 = None
152
+ style2 = None
153
+
154
+ _byte = b.to_bytes(1, byteorder)
155
+
156
+ if b < 32:
157
+ txt.append('*', style=style2 or BYTES_BRIGHTER)
158
+ elif b < 127:
159
+ txt.append(_byte.decode('UTF-8'), style1 or BYTES_BRIGHTEST)
160
+ elif b <= 160:
161
+ txt.append('*', style=style2 or BYTES_HIGHLIGHT)
162
+ else:
163
+ txt.append('*', style=style2 or BYTES)
164
+
165
+ segments = [txt[i:i + HEX_CHARS_PER_GROUP] for i in range(0, len(txt), HEX_CHARS_PER_GROUP)]
166
+
167
+ lines = [
168
+ Text(' ').join(segments[i:min(len(segments), i + HEX_GROUPS_PER_LINE)])
169
+ for i in range(0, len(segments), HEX_GROUPS_PER_LINE)
170
+ ]
171
+
172
+ return newline_join(lines)
173
+
174
+
175
+ def hex_text(_bytes: bytes) -> Text:
176
+ """
177
+ Return a rich Text object of the hex string for the given bytes.
178
+
179
+ Args:
180
+ _bytes (bytes): The bytes to convert.
181
+
182
+ Returns:
183
+ Text: Rich Text object of the hex string.
184
+ """
185
+ return Text(hex_string(_bytes), style=GREY)
186
+
187
+
188
+ def hex_string(_bytes: bytes) -> str:
189
+ """
190
+ Return a hex string representation of the given bytes.
191
+
192
+ Args:
193
+ _bytes (bytes): The bytes to convert.
194
+
195
+ Returns:
196
+ str: Hex string representation of the bytes.
197
+ """
198
+ return ' '.join([hex(b).removeprefix('0x').rjust(2, '0') for i, b in enumerate(_bytes)])
199
+
200
+
201
+ def print_bytes(bytes_array: bytes, style: str | None = None, indent: int = 0) -> None:
202
+ """
203
+ Print a string representation of some bytes to the console.
204
+
205
+ Args:
206
+ bytes_array (bytes): The bytes to print.
207
+ style (str, optional): Style to use for printing. Defaults to 'bytes'.
208
+ """
209
+ for line in bytes_array.split(NEWLINE_BYTE):
210
+ padded_bytes = Padding(escape(clean_byte_string(line)), (0, 0, 0, indent))
211
+ console.print(padded_bytes, style=style or 'bytes')
212
+
213
+
214
+ def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
215
+ """
216
+ Truncate bytes to a multiple of the character width for the given encoding.
217
+ For example, for utf-16 this means truncating to a multiple of 2, for utf-32 to a multiple of 4.
218
+
219
+ Args:
220
+ _bytes (bytes): The bytes to truncate.
221
+ encoding (str): The encoding to consider.
222
+
223
+ Returns:
224
+ bytes: Truncated bytes.
225
+ """
226
+ char_width = encoding_width(encoding)
227
+ num_bytes = len(_bytes)
228
+ num_extra_bytes = num_bytes % char_width
229
+
230
+ if char_width <= 1 or num_bytes <= char_width or num_extra_bytes == 0:
231
+ return _bytes
232
+ else:
233
+ return _bytes[:-num_extra_bytes]
234
+
235
+
236
+ def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
237
+ r"""
238
+ Find the position of the highlighted bytes string within the surrounding bytes string.
239
+
240
+ Both arguments are string representations of binary data. This is needed because the string
241
+ representation of bytes can be longer than the actual bytes (e.g., '\\xcc' is 4 chars for 1 byte).
242
+
243
+ Args:
244
+ surrounding_bytes_str (str): String representation of the full byte sequence.
245
+ highlighted_bytes_str (str): String representation of the matched bytes.
246
+ highlighted_bytes (BytesMatch): The BytesMatch object for context.
247
+
248
+ Returns:
249
+ int: The index in the surrounding string where the highlighted bytes start, or -1 if not found.
250
+ """
251
+ # Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
252
+ # in the extra bytes. This isn't perfect - it's starting us at the first index into the *bytes* that's safe to
253
+ # check but this is almost certainly too soon given the large % of bytes that take 4 chars to print ('\x02' etc)
254
+ highlight_idx = surrounding_bytes_str.find(highlighted_bytes_str, highlighted_bytes.highlight_start_idx)
255
+
256
+ # TODO: Somehow \' and ' don't always come out the same :(
257
+ if highlight_idx == -1:
258
+ log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
259
+ "Highlighting may be off by a few chars,")
260
+
261
+ surrounding_bytes_str = surrounding_bytes_str.replace("\\'", "'")
262
+ highlight_idx = surrounding_bytes_str.find(highlighted_bytes_str)
263
+
264
+ if highlight_idx == -1:
265
+ log.warning(f"Failed to find\n{highlighted_bytes_str}\nin surrounding bytes:\n{surrounding_bytes_str}")
266
+ log.warning("Highlighting will not work on this decoded string.")
267
+
268
+ return highlight_idx
@@ -0,0 +1,8 @@
1
+ """
2
+ Help with dicts.
3
+ """
4
+
5
+
6
+ def get_dict_key_by_value(_dict: dict, value):
7
+ """Inverse of the usual dict operation."""
8
+ return list(_dict.keys())[list(_dict.values()).index(value)]
@@ -0,0 +1,49 @@
1
+ """
2
+ Helper methods to work with files.
3
+ """
4
+ from datetime import datetime
5
+ from os import listdir, path
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+
10
+ def files_in_dir(dir: Path | str, with_extname: Optional[str] = None) -> List[str]:
11
+ """
12
+ Returns paths for all non dot files in `dir` (optionally filtered to only those ending in 'with_extname').
13
+
14
+ Args:
15
+ dir (str): Directory to list files from.
16
+ with_extname (Optional[str], optional): If set, only return files with this extension. Defaults to None.
17
+
18
+ Returns:
19
+ List[str]: List of file paths.
20
+ """
21
+ files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
22
+ files = [file for file in files if not path.isdir(file)]
23
+
24
+ if with_extname:
25
+ return files_with_extname(files, with_extname)
26
+ else:
27
+ return files
28
+
29
+
30
+ def files_with_extname(files: List[str], extname: str) -> List[str]:
31
+ """Return only files from the list that end with the given `extname`."""
32
+ return [f for f in files if f.endswith(f".{extname}")]
33
+
34
+
35
+ def load_binary_data(file_path: Path | str) -> bytes:
36
+ """Load and return the raw `bytes` from a file."""
37
+ with open(file_path, 'rb') as f:
38
+ return f.read()
39
+
40
+
41
+ def load_file(file_path: Path | str) -> str:
42
+ """Load and return the text contents of a file."""
43
+ with open(file_path, 'r') as f:
44
+ return f.read()
45
+
46
+
47
+ def timestamp_for_filename() -> str:
48
+ """Returns a string showing current time in a file name friendly format."""
49
+ return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
@@ -0,0 +1,16 @@
1
+ """
2
+ Help with lists.
3
+ """
4
+
5
+
6
+ def flatten(a):
7
+ """From https://www.geeksforgeeks.org/python/python-flatten-list-to-individual-elements/"""
8
+ return_value = []
9
+
10
+ for x in a:
11
+ if isinstance(x, list):
12
+ return_value.extend(flatten(x)) # Recursively flatten nested lists
13
+ else:
14
+ return_value.append(x) # Append individual elements
15
+
16
+ return return_value
@@ -0,0 +1,150 @@
1
+ """
2
+ Methods to handle turning various objects into Rich text/table/etc representations
3
+
4
+ [Rich color names](https://rich.readthedocs.io/en/stable/appendix/colors.html)
5
+ TODO: interesting colors # row_styles[0] = 'reverse bold on color(144)' <-
6
+ """
7
+ from sys import exit
8
+ from typing import List, Optional, Union
9
+
10
+ from rich import box
11
+ from rich.columns import Columns
12
+ from rich.panel import Panel
13
+ from rich.style import Style
14
+ from rich.text import Text
15
+
16
+ from yaralyzer.output.rich_console import BYTES_BRIGHTEST, BYTES_HIGHLIGHT, YARALYZER_THEME_DICT, console
17
+ from yaralyzer.util.logging import log
18
+
19
+ # String constants
20
+ CENTER = 'center'
21
+ FOLD = 'fold'
22
+ LEFT = 'left'
23
+ MIDDLE = 'middle'
24
+ RIGHT = 'right'
25
+
26
+ # Color meter realted constants. Make even sized buckets color coded from blue (cold) to green (go)
27
+ METER_COLORS = list(reversed([82, 85, 71, 60, 67, 30, 24, 16]))
28
+ METER_INTERVAL = (100 / float(len(METER_COLORS))) + 0.1
29
+ # Color meter extra style thresholds (these are assuming a scale of 0-100)
30
+ UNDERLINE_CONFIDENCE_THRESHOLD = 90
31
+ BOLD_CONFIDENCE_THRESHOLD = 60
32
+ DIM_COUNTRY_THRESHOLD = 25
33
+
34
+ # For the table shown by running yaralyzer_show_color_theme
35
+ MAX_THEME_COL_SIZE = 35
36
+
37
+ # Text object defaults mostly for table entries
38
+ NO_DECODING_ERRORS_MSG = Text('No', style='green4 dim')
39
+ DECODING_ERRORS_MSG = Text('Yes', style='dark_red dim')
40
+
41
+
42
+ def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
43
+ """Apply 'dim' style if 'is_dim'. 'style' overrides for Text and applies for strings."""
44
+ txt = txt.copy() if isinstance(txt, Text) else Text(txt, style=style or '')
45
+
46
+ if is_dim:
47
+ txt.stylize('dim')
48
+
49
+ return txt
50
+
51
+
52
+ def meter_style(meter_pct: float | int) -> str:
53
+ """For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer."""
54
+ if meter_pct > 100 or meter_pct < 0:
55
+ log.warning(f"Invalid meter_pct: {meter_pct}")
56
+
57
+ color_number = METER_COLORS[int(meter_pct / METER_INTERVAL)]
58
+ style = f"color({color_number})"
59
+
60
+ if meter_pct > BOLD_CONFIDENCE_THRESHOLD:
61
+ style += ' bold'
62
+ if meter_pct > UNDERLINE_CONFIDENCE_THRESHOLD:
63
+ style += ' underline'
64
+
65
+ return style
66
+
67
+
68
+ def na_txt(style: Union[str, Style] = 'white'):
69
+ """Standard N/A text for tables and such."""
70
+ return Text('N/A', style=style)
71
+
72
+
73
+ def newline_join(texts: List[Text]) -> Text:
74
+ """Join a list of Text objects with newlines between them."""
75
+ return Text("\n").join(texts)
76
+
77
+
78
+ def prefix_with_style(_str: str, style: str, root_style: Optional[Union[Style, str]] = None) -> Text:
79
+ """Sometimes you need a Text() object to start plain lest the underline or whatever last forever."""
80
+ return Text('', style=root_style or 'white') + Text(_str, style)
81
+
82
+
83
+ def print_fatal_error_and_exit(error_message: str) -> None:
84
+ """
85
+ Print a fatal error message in a `Panel` and exit.
86
+
87
+ Args:
88
+ error_message (str): The error message to display.
89
+ """
90
+ console.line(1)
91
+ print_header_panel(error_message, expand=False, style='bold bright_red')
92
+ console.line(1)
93
+ exit()
94
+
95
+
96
+ def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple | None = None) -> None:
97
+ """
98
+ Print a headline inside a styled Rich `Panel` to the console.
99
+
100
+ Args:
101
+ headline (str): The text to display as the panel's headline.
102
+ style (str): The style to apply to the panel (e.g., color, bold, reverse).
103
+ expand (bool, optional): Whether the panel should expand to the full console width. Defaults to `True`.
104
+ padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to `(0, 2)`.
105
+ """
106
+ console.print(Panel(headline, box=box.DOUBLE_EDGE, expand=expand, padding=padding or (0, 2), style=style))
107
+
108
+
109
+ def reverse_color(style: Style) -> Style:
110
+ """Reverses the color for a given style."""
111
+ return Style(color=style.bgcolor, bgcolor=style.color, underline=style.underline, bold=style.bold)
112
+
113
+
114
+ def show_color_theme(styles: dict) -> None:
115
+ """Print all colors in 'styles' to screen in a grid"""
116
+ console.print(Panel('The Yaralyzer Color Theme', style='reverse'))
117
+
118
+ colors = [
119
+ prefix_with_style(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
120
+ for name, style in styles.items()
121
+ if name not in ['reset', 'repr_url']
122
+ ]
123
+
124
+ console.print(Columns(colors, column_first=True, padding=(0, 5), equal=True))
125
+
126
+
127
+ def size_in_bytes_text(num_bytes: int) -> Text:
128
+ return Text(f"{num_bytes:,d}", 'number').append(' bytes', style='white')
129
+
130
+
131
+ def size_text(num_bytes: int) -> Text:
132
+ """Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)."""
133
+ kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
134
+ kb_txt.append(' kb ')
135
+ bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
136
+ return kb_txt + bytes_txt
137
+
138
+
139
+ def unprintable_byte_to_text(code: str, style: str = '') -> Text:
140
+ """Used with ASCII escape codes and the like, gives colored results like '[NBSP]'."""
141
+ style = BYTES_HIGHLIGHT if style == BYTES_BRIGHTEST else style
142
+ txt = Text('[', style=style)
143
+ txt.append(code.upper(), style=f"{style} italic dim")
144
+ txt.append(Text(']', style=style))
145
+ return txt
146
+
147
+
148
+ def yaralyzer_show_color_theme() -> None:
149
+ """Script method to show yaralyzer's color theme. Invocable with 'yaralyzer_show_colors'."""
150
+ show_color_theme(YARALYZER_THEME_DICT)
@@ -0,0 +1,34 @@
1
+ """
2
+ Helper methods to work with strings.
3
+ """
4
+ from functools import partial
5
+ from typing import Any, Callable, List
6
+
7
+ INDENT_DEPTH = 4
8
+ INDENT_SPACES = INDENT_DEPTH * ' '
9
+
10
+
11
+ def escape_yara_pattern(pattern: str) -> str:
12
+ return pattern.replace('/', '\\/')
13
+
14
+
15
+ def line_count(_string: str) -> int:
16
+ return len(_string.split("\n"))
17
+
18
+
19
+ def hex_to_string(_string: str) -> str:
20
+ r"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
21
+ return bytearray.fromhex(_string.replace(' ', '')).decode()
22
+
23
+
24
+ def str_join(_list: List[Any], separator: str, func: Callable = str) -> str:
25
+ """
26
+ Return a comma separated list of strings. If func is provided the output of calling
27
+ it on each element of the list will be used instead of str()
28
+ """
29
+ func = func or str
30
+ return separator.join([func(item) for item in _list])
31
+
32
+
33
+ comma_join = partial(str_join, separator=', ')
34
+ newline_join = partial(str_join, separator='\n')
@@ -0,0 +1,82 @@
1
+ """
2
+ Methods to build the `rich.table` used to display decoding attempts of a given bytes array.
3
+
4
+ Final output should be a `rich.table` of decoding attempts that are sorted like this:
5
+
6
+ 1. String representation of undecoded bytes is always the first row
7
+
8
+ 2. Encodings which `chardet.detect()` ranked as > 0% likelihood are sorted based on that confidence
9
+
10
+ 3. Then the unchardetectable:
11
+
12
+ 1. Decodings that were successful, unforced, and new
13
+
14
+ 2. Decodings that were "successful" but forced
15
+
16
+ 3. Decodings that were the same as other decodings
17
+
18
+ 4. Failed decodings
19
+ """
20
+ from collections import namedtuple
21
+
22
+ from rich import box
23
+ from rich.table import Table
24
+ from rich.text import Text
25
+
26
+ from yaralyzer.bytes_match import BytesMatch
27
+ from yaralyzer.helpers.bytes_helper import ascii_view_of_raw_bytes, hex_view_of_raw_bytes, rich_text_view_of_raw_bytes
28
+ from yaralyzer.helpers.rich_text_helper import CENTER, FOLD, MIDDLE, RIGHT, na_txt
29
+
30
+ HEX = Text('HEX', style='bytes.title')
31
+ RAW_BYTES = Text('Raw', style=f"bytes")
32
+
33
+
34
+ def new_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
35
+ """Build a new rich `Table` with two rows, the raw and hex views of the `bytes_match` data."""
36
+ table = Table(show_lines=True, border_style='bytes', header_style='decode.table_header')
37
+
38
+ def add_col(title, **kwargs):
39
+ kwargs['justify'] = kwargs.get('justify', CENTER)
40
+ table.add_column(title, overflow=FOLD, vertical=MIDDLE, **kwargs)
41
+
42
+ add_col('Encoding', justify=RIGHT, width=12)
43
+ add_col('Detect Odds', width=len('Detect'))
44
+ add_col('Used\nForce?', width=len('Force?'))
45
+ add_col('Decoded Output', justify='left')
46
+
47
+ na = na_txt(style=HEX.style)
48
+ table.add_row(HEX, na, na, _hex_preview_subtable(bytes_match))
49
+ na = na_txt(style=RAW_BYTES.style)
50
+ table.add_row(RAW_BYTES, na, na, rich_text_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match))
51
+ return table
52
+
53
+
54
+ def _hex_preview_subtable(bytes_match: BytesMatch) -> Table:
55
+ """
56
+ Build a sub `Table` for hex view row (hex on one side, ascii on the other side).
57
+
58
+ Args:
59
+ bytes_match (BytesMatch): The `BytesMatch` object containing the bytes to display.
60
+
61
+ Returns:
62
+ Table: A `rich.table` with hex and ascii views of the bytes.
63
+ """
64
+ hex_table = Table(
65
+ 'hex',
66
+ 'ascii',
67
+ border_style='grey.darkest',
68
+ header_style='decode.table_header',
69
+ box=box.MINIMAL,
70
+ show_lines=True,
71
+ show_header=True,
72
+ show_edge=False,
73
+ padding=(0, 1, 0, 2),
74
+ pad_edge=False
75
+ )
76
+
77
+ hex_table.add_row(
78
+ hex_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match),
79
+ ascii_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match)
80
+ )
81
+
82
+ return hex_table
@@ -0,0 +1,60 @@
1
+ from dataclasses import dataclass, field
2
+
3
+ from rich.text import Text
4
+
5
+ from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
6
+ from yaralyzer.helpers.rich_text_helper import na_txt
7
+
8
+ DECODE_NOT_ATTEMPTED_MSG = Text('(decode not attempted)', style='no_attempt')
9
+
10
+
11
+ @dataclass
12
+ class DecodingTableRow:
13
+ encoding_label: Text
14
+ confidence_text: Text
15
+ errors_while_decoded: Text # This is really "is_forced"?
16
+ decoded_string: Text
17
+ # Properties below here are not displayed in the table but are used for sorting etc.
18
+ confidence: float
19
+ encoding: str
20
+ sort_score: float
21
+ encoding_label_plain: str = field(init=False)
22
+
23
+ def __post_init__(self):
24
+ self.encoding_label_plain = self.encoding_label.plain
25
+
26
+ def to_row_list(self) -> list[Text]:
27
+ return [self.encoding_label, self.confidence_text, self.errors_while_decoded, self.decoded_string]
28
+
29
+ @classmethod
30
+ def from_decoded_assessment(cls, assessment: EncodingAssessment, is_forced: Text, txt: Text, score: float) -> 'DecodingTableRow':
31
+ """
32
+ Alternate constructor that builds a table row for a decoding attempt.
33
+
34
+ Args:
35
+ assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
36
+ is_forced (Text): Text indicating if the decode was forced.
37
+ txt (Text): The decoded string as a rich `Text` object (with highlighting).
38
+ score (float): The score to use for sorting this row in the table.
39
+ """
40
+ return cls(
41
+ encoding_label=assessment.encoding_label,
42
+ confidence_text=assessment.confidence_text,
43
+ errors_while_decoded=is_forced,
44
+ decoded_string=txt,
45
+ confidence=assessment.confidence,
46
+ encoding=assessment.encoding,
47
+ sort_score=score,
48
+ )
49
+
50
+ @classmethod
51
+ def from_undecoded_assessment(cls, assessment: EncodingAssessment, score: float) -> 'DecodingTableRow':
52
+ """
53
+ Alternate constructor for a row with just `chardet` assessment confidence data and no actual
54
+ decoding attempt string.
55
+
56
+ Args:
57
+ assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
58
+ score (float): The score to use for sorting this row within the table.
59
+ """
60
+ return cls.from_decoded_assessment(assessment, na_txt(), DECODE_NOT_ATTEMPTED_MSG, score)