PyPI - yaralyzer - Versions diffs - 1.0.11__py3-none-any.whl - Mend

yaralyzer 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

.yaralyzer.example +65 -0
CHANGELOG.md +128 -0
LICENSE +674 -0
yaralyzer/__init__.py +76 -0
yaralyzer/bytes_match.py +276 -0
yaralyzer/config.py +126 -0
yaralyzer/decoding/bytes_decoder.py +207 -0
yaralyzer/decoding/decoding_attempt.py +222 -0
yaralyzer/encoding_detection/character_encodings.py +197 -0
yaralyzer/encoding_detection/encoding_assessment.py +83 -0
yaralyzer/encoding_detection/encoding_detector.py +145 -0
yaralyzer/helpers/bytes_helper.py +268 -0
yaralyzer/helpers/dict_helper.py +8 -0
yaralyzer/helpers/file_helper.py +49 -0
yaralyzer/helpers/list_helper.py +16 -0
yaralyzer/helpers/rich_text_helper.py +150 -0
yaralyzer/helpers/string_helper.py +34 -0
yaralyzer/output/decoding_attempts_table.py +82 -0
yaralyzer/output/decoding_table_row.py +60 -0
yaralyzer/output/file_export.py +111 -0
yaralyzer/output/file_hashes_table.py +82 -0
yaralyzer/output/regex_match_metrics.py +97 -0
yaralyzer/output/rich_console.py +114 -0
yaralyzer/util/argument_parser.py +297 -0
yaralyzer/util/logging.py +135 -0
yaralyzer/yara/error.py +90 -0
yaralyzer/yara/yara_match.py +160 -0
yaralyzer/yara/yara_rule_builder.py +164 -0
yaralyzer/yaralyzer.py +304 -0
yaralyzer-1.0.11.dist-info/LICENSE +674 -0
yaralyzer-1.0.11.dist-info/METADATA +151 -0
yaralyzer-1.0.11.dist-info/RECORD +34 -0
yaralyzer-1.0.11.dist-info/WHEEL +4 -0
yaralyzer-1.0.11.dist-info/entry_points.txt +4 -0

yaralyzer/helpers/bytes_helper.py ADDED Viewed

@@ -0,0 +1,268 @@
+"""
+Helper methods to work with bytes.
+"""
+import re
+from io import StringIO
+from sys import byteorder
+from rich.console import Console
+from rich.markup import escape
+from rich.padding import Padding
+from rich.text import Text
+from yaralyzer.bytes_match import BytesMatch
+from yaralyzer.config import YaralyzerConfig
+from yaralyzer.encoding_detection.character_encodings import NEWLINE_BYTE, encoding_width
+from yaralyzer.helpers.rich_text_helper import newline_join
+from yaralyzer.output.rich_console import (BYTES, BYTES_BRIGHTER, BYTES_BRIGHTEST,
+     BYTES_HIGHLIGHT, GREY, console, console_width)
+from yaralyzer.util.logging import log
+HEX_CHARS_PER_GROUP = 8
+SUBTABLE_MAX_WIDTH = console_width() - 35 - 5  # 35 for first 3 cols, 5 for in between hex and ascii
+HEX_UNIT_LENGTH = (HEX_CHARS_PER_GROUP * 3) + HEX_CHARS_PER_GROUP + 4  # 4 for padding between groups
+HEX_GROUPS_PER_LINE = divmod(SUBTABLE_MAX_WIDTH, HEX_UNIT_LENGTH)[0]
+HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE
+def get_bytes_before_and_after_match(_bytes: bytes, match: re.Match, num_before=None, num_after=None) -> bytes:
+    """
+    Get bytes before and after a regex match within a byte sequence.
+    Args:
+        _bytes (bytes): The full byte sequence.
+        match (re.Match): The regex `Match` object.
+        num_before (int, optional): Number of bytes before the match to include. Defaults to configured value.
+        num_after (int, optional): Number of bytes after the match to include. Defaults to either configured value
+            or the `num_before` arg value.
+    Returns:
+        bytes: The surrounding bytes including the match.
+    """
+    return get_bytes_surrounding_range(_bytes, match.start(), match.end(), num_before, num_after)
+def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num_before=None, num_after=None) -> bytes:
+    """
+    Get bytes surrounding a specified range in a byte sequence.
+    Args:
+        _bytes (bytes): The full byte sequence.
+        start_idx (int): Start index of the range.
+        end_idx (int): End index of the range.
+        num_before (int, optional): Number of bytes before the range. Defaults to configured value.
+        num_after (int, optional): Number of bytes after the range. Defaults to configured value.
+    Returns:
+        bytes: The surrounding bytes including the range.
+    """
+    num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
+    num_before = num_before or YaralyzerConfig.args.surrounding_bytes
+    start_idx = max(start_idx - num_before, 0)
+    end_idx = min(end_idx + num_after, len(_bytes))
+    return _bytes[start_idx:end_idx]
+def clean_byte_string(bytes_array: bytes) -> str:
+    r"""
+    Return a clean string representation of bytes, without Python's b'' or b"" wrappers.
+    e.g. '\x80\nx44' instead of "b'\x80\nx44'".
+    Args:
+        bytes_array (bytes): The bytes to convert.
+    Returns:
+        str: Clean string representation of the bytes.
+    """
+    byte_printer = Console(file=StringIO())
+    byte_printer.out(bytes_array, end='')
+    bytestr = byte_printer.file.getvalue()
+    if bytestr.startswith("b'"):
+        bytestr = bytestr.removeprefix("b'").removesuffix("'")
+    elif bytestr.startswith('b"'):
+        bytestr = bytestr.removeprefix('b"').removesuffix('"')
+    else:
+        raise RuntimeError(f"Unexpected byte string {bytestr}")
+    return bytestr
+def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
+    """
+    Return a rich `Text` object of raw bytes, highlighting the matched bytes.
+    Args:
+        _bytes (bytes): The full byte sequence.
+        bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
+    Returns:
+        Text: Rich Text object with highlighted match.
+    """
+    surrounding_bytes_str = clean_byte_string(_bytes)
+    highlighted_bytes_str = clean_byte_string(bytes_match.bytes)
+    highlighted_bytes_str_length = len(highlighted_bytes_str)
+    highlight_idx = _find_str_rep_of_bytes(surrounding_bytes_str, highlighted_bytes_str, bytes_match)
+    txt = Text(surrounding_bytes_str[:highlight_idx], style=GREY)
+    matched_bytes_str = surrounding_bytes_str[highlight_idx:highlight_idx + highlighted_bytes_str_length]
+    txt.append(matched_bytes_str, style=bytes_match.highlight_style)
+    txt.append(surrounding_bytes_str[highlight_idx + highlighted_bytes_str_length:], style=GREY)
+    return txt
+def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
+    """
+    Return a hexadecimal view of raw bytes, highlighting the matched bytes.
+    Args:
+        _bytes (bytes): The full byte sequence.
+        bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
+    Returns:
+        Text: Rich Text object with highlighted match in hex view.
+    """
+    hex_str = hex_text(_bytes)
+    highlight_start_idx = bytes_match.highlight_start_idx * 3
+    highlight_end_idx = bytes_match.highlight_end_idx * 3
+    hex_str.stylize(bytes_match.highlight_style, highlight_start_idx, highlight_end_idx)
+    lines = hex_str.wrap(console, HEX_CHARS_PER_LINE * 3)
+    return newline_join([Text('  ').join(line.wrap(console, HEX_CHARS_PER_GROUP * 3)) for line in lines])
+def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
+    """
+    Return an ASCII view of raw bytes, highlighting the matched bytes.
+    Args:
+        _bytes (bytes): The full byte sequence.
+        bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
+    Returns:
+        Text: Rich Text object with highlighted match in ASCII view.
+    """
+    txt = Text('', style=BYTES)
+    for i, b in enumerate(_bytes):
+        if i < bytes_match.highlight_start_idx or i > bytes_match.highlight_end_idx:
+            style1 = 'color(246)'
+            style2 = 'color(234)'
+        else:
+            style1 = None
+            style2 = None
+        _byte = b.to_bytes(1, byteorder)
+        if b < 32:
+            txt.append('*', style=style2 or BYTES_BRIGHTER)
+        elif b < 127:
+            txt.append(_byte.decode('UTF-8'), style1 or BYTES_BRIGHTEST)
+        elif b <= 160:
+            txt.append('*', style=style2 or BYTES_HIGHLIGHT)
+        else:
+            txt.append('*', style=style2 or BYTES)
+    segments = [txt[i:i + HEX_CHARS_PER_GROUP] for i in range(0, len(txt), HEX_CHARS_PER_GROUP)]
+    lines = [
+        Text('  ').join(segments[i:min(len(segments), i + HEX_GROUPS_PER_LINE)])
+        for i in range(0, len(segments), HEX_GROUPS_PER_LINE)
+    ]
+    return newline_join(lines)
+def hex_text(_bytes: bytes) -> Text:
+    """
+    Return a rich Text object of the hex string for the given bytes.
+    Args:
+        _bytes (bytes): The bytes to convert.
+    Returns:
+        Text: Rich Text object of the hex string.
+    """
+    return Text(hex_string(_bytes), style=GREY)
+def hex_string(_bytes: bytes) -> str:
+    """
+    Return a hex string representation of the given bytes.
+    Args:
+        _bytes (bytes): The bytes to convert.
+    Returns:
+        str: Hex string representation of the bytes.
+    """
+    return ' '.join([hex(b).removeprefix('0x').rjust(2, '0') for i, b in enumerate(_bytes)])
+def print_bytes(bytes_array: bytes, style: str | None = None, indent: int = 0) -> None:
+    """
+    Print a string representation of some bytes to the console.
+    Args:
+        bytes_array (bytes): The bytes to print.
+        style (str, optional): Style to use for printing. Defaults to 'bytes'.
+    """
+    for line in bytes_array.split(NEWLINE_BYTE):
+        padded_bytes = Padding(escape(clean_byte_string(line)), (0, 0, 0, indent))
+        console.print(padded_bytes, style=style or 'bytes')
+def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
+    """
+    Truncate bytes to a multiple of the character width for the given encoding.
+    For example, for utf-16 this means truncating to a multiple of 2, for utf-32 to a multiple of 4.
+    Args:
+        _bytes (bytes): The bytes to truncate.
+        encoding (str): The encoding to consider.
+    Returns:
+        bytes: Truncated bytes.
+    """
+    char_width = encoding_width(encoding)
+    num_bytes = len(_bytes)
+    num_extra_bytes = num_bytes % char_width
+    if char_width <= 1 or num_bytes <= char_width or num_extra_bytes == 0:
+        return _bytes
+    else:
+        return _bytes[:-num_extra_bytes]
+def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
+    r"""
+    Find the position of the highlighted bytes string within the surrounding bytes string.
+    Both arguments are string representations of binary data. This is needed because the string
+    representation of bytes can be longer than the actual bytes (e.g., '\\xcc' is 4 chars for 1 byte).
+    Args:
+        surrounding_bytes_str (str): String representation of the full byte sequence.
+        highlighted_bytes_str (str): String representation of the matched bytes.
+        highlighted_bytes (BytesMatch): The BytesMatch object for context.
+    Returns:
+        int: The index in the surrounding string where the highlighted bytes start, or -1 if not found.
+    """
+    # Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
+    # in the extra bytes. This isn't perfect - it's starting us at the first index into the *bytes* that's safe to
+    # check but this is almost certainly too soon given the large % of bytes that take 4 chars to print ('\x02' etc)
+    highlight_idx = surrounding_bytes_str.find(highlighted_bytes_str, highlighted_bytes.highlight_start_idx)
+    # TODO: Somehow \' and ' don't always come out the same :(
+    if highlight_idx == -1:
+        log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
+                  "Highlighting may be off by a few chars,")
+        surrounding_bytes_str = surrounding_bytes_str.replace("\\'", "'")
+        highlight_idx = surrounding_bytes_str.find(highlighted_bytes_str)
+        if highlight_idx == -1:
+            log.warning(f"Failed to find\n{highlighted_bytes_str}\nin surrounding bytes:\n{surrounding_bytes_str}")
+            log.warning("Highlighting will not work on this decoded string.")
+    return highlight_idx

yaralyzer/helpers/dict_helper.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Help with dicts.
+"""
+def get_dict_key_by_value(_dict: dict, value):
+    """Inverse of the usual dict operation."""
+    return list(_dict.keys())[list(_dict.values()).index(value)]

yaralyzer/helpers/file_helper.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+Helper methods to work with files.
+"""
+from datetime import datetime
+from os import listdir, path
+from pathlib import Path
+from typing import List, Optional
+def files_in_dir(dir: Path | str, with_extname: Optional[str] = None) -> List[str]:
+    """
+    Returns paths for all non dot files in `dir` (optionally filtered to only those ending in 'with_extname').
+    Args:
+        dir (str): Directory to list files from.
+        with_extname (Optional[str], optional): If set, only return files with this extension. Defaults to None.
+    Returns:
+        List[str]: List of file paths.
+    """
+    files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
+    files = [file for file in files if not path.isdir(file)]
+    if with_extname:
+        return files_with_extname(files, with_extname)
+    else:
+        return files
+def files_with_extname(files: List[str], extname: str) -> List[str]:
+    """Return only files from the list that end with the given `extname`."""
+    return [f for f in files if f.endswith(f".{extname}")]
+def load_binary_data(file_path: Path | str) -> bytes:
+    """Load and return the raw `bytes` from a file."""
+    with open(file_path, 'rb') as f:
+        return f.read()
+def load_file(file_path: Path | str) -> str:
+    """Load and return the text contents of a file."""
+    with open(file_path, 'r') as f:
+        return f.read()
+def timestamp_for_filename() -> str:
+    """Returns a string showing current time in a file name friendly format."""
+    return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")

yaralyzer/helpers/list_helper.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+Help with lists.
+"""
+def flatten(a):
+    """From https://www.geeksforgeeks.org/python/python-flatten-list-to-individual-elements/"""
+    return_value = []
+    for x in a:
+        if isinstance(x, list):
+            return_value.extend(flatten(x))  # Recursively flatten nested lists
+        else:
+            return_value.append(x)  # Append individual elements
+    return return_value

yaralyzer/helpers/rich_text_helper.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""
+Methods to handle turning various objects into Rich text/table/etc representations
+[Rich color names](https://rich.readthedocs.io/en/stable/appendix/colors.html)
+TODO: interesting colors # row_styles[0] = 'reverse bold on color(144)' <-
+"""
+from sys import exit
+from typing import List, Optional, Union
+from rich import box
+from rich.columns import Columns
+from rich.panel import Panel
+from rich.style import Style
+from rich.text import Text
+from yaralyzer.output.rich_console import BYTES_BRIGHTEST, BYTES_HIGHLIGHT, YARALYZER_THEME_DICT, console
+from yaralyzer.util.logging import log
+# String constants
+CENTER = 'center'
+FOLD = 'fold'
+LEFT = 'left'
+MIDDLE = 'middle'
+RIGHT = 'right'
+# Color meter realted constants. Make even sized buckets color coded from blue (cold) to green (go)
+METER_COLORS = list(reversed([82, 85, 71, 60, 67, 30, 24, 16]))
+METER_INTERVAL = (100 / float(len(METER_COLORS))) + 0.1
+# Color meter extra style thresholds (these are assuming a scale of 0-100)
+UNDERLINE_CONFIDENCE_THRESHOLD = 90
+BOLD_CONFIDENCE_THRESHOLD = 60
+DIM_COUNTRY_THRESHOLD = 25
+# For the table shown by running yaralyzer_show_color_theme
+MAX_THEME_COL_SIZE = 35
+# Text object defaults mostly for table entries
+NO_DECODING_ERRORS_MSG = Text('No', style='green4 dim')
+DECODING_ERRORS_MSG = Text('Yes', style='dark_red dim')
+def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
+    """Apply 'dim' style if 'is_dim'. 'style' overrides for Text and applies for strings."""
+    txt = txt.copy() if isinstance(txt, Text) else Text(txt, style=style or '')
+    if is_dim:
+        txt.stylize('dim')
+    return txt
+def meter_style(meter_pct: float | int) -> str:
+    """For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer."""
+    if meter_pct > 100 or meter_pct < 0:
+        log.warning(f"Invalid meter_pct: {meter_pct}")
+    color_number = METER_COLORS[int(meter_pct / METER_INTERVAL)]
+    style = f"color({color_number})"
+    if meter_pct > BOLD_CONFIDENCE_THRESHOLD:
+        style += ' bold'
+    if meter_pct > UNDERLINE_CONFIDENCE_THRESHOLD:
+        style += ' underline'
+    return style
+def na_txt(style: Union[str, Style] = 'white'):
+    """Standard N/A text for tables and such."""
+    return Text('N/A', style=style)
+def newline_join(texts: List[Text]) -> Text:
+    """Join a list of Text objects with newlines between them."""
+    return Text("\n").join(texts)
+def prefix_with_style(_str: str, style: str, root_style: Optional[Union[Style, str]] = None) -> Text:
+    """Sometimes you need a Text() object to start plain lest the underline or whatever last forever."""
+    return Text('', style=root_style or 'white') + Text(_str, style)
+def print_fatal_error_and_exit(error_message: str) -> None:
+    """
+    Print a fatal error message in a `Panel` and exit.
+    Args:
+        error_message (str): The error message to display.
+    """
+    console.line(1)
+    print_header_panel(error_message, expand=False, style='bold bright_red')
+    console.line(1)
+    exit()
+def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple | None = None) -> None:
+    """
+    Print a headline inside a styled Rich `Panel` to the console.
+    Args:
+        headline (str): The text to display as the panel's headline.
+        style (str): The style to apply to the panel (e.g., color, bold, reverse).
+        expand (bool, optional): Whether the panel should expand to the full console width. Defaults to `True`.
+        padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to `(0, 2)`.
+    """
+    console.print(Panel(headline, box=box.DOUBLE_EDGE, expand=expand, padding=padding or (0, 2), style=style))
+def reverse_color(style: Style) -> Style:
+    """Reverses the color for a given style."""
+    return Style(color=style.bgcolor, bgcolor=style.color, underline=style.underline, bold=style.bold)
+def show_color_theme(styles: dict) -> None:
+    """Print all colors in 'styles' to screen in a grid"""
+    console.print(Panel('The Yaralyzer Color Theme', style='reverse'))
+    colors = [
+        prefix_with_style(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
+        for name, style in styles.items()
+        if name not in ['reset', 'repr_url']
+    ]
+    console.print(Columns(colors, column_first=True, padding=(0, 5), equal=True))
+def size_in_bytes_text(num_bytes: int) -> Text:
+    return Text(f"{num_bytes:,d}", 'number').append(' bytes', style='white')
+def size_text(num_bytes: int) -> Text:
+    """Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)."""
+    kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
+    kb_txt.append(' kb ')
+    bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
+    return kb_txt + bytes_txt
+def unprintable_byte_to_text(code: str, style: str = '') -> Text:
+    """Used with ASCII escape codes and the like, gives colored results like '[NBSP]'."""
+    style = BYTES_HIGHLIGHT if style == BYTES_BRIGHTEST else style
+    txt = Text('[', style=style)
+    txt.append(code.upper(), style=f"{style} italic dim")
+    txt.append(Text(']', style=style))
+    return txt
+def yaralyzer_show_color_theme() -> None:
+    """Script method to show yaralyzer's color theme. Invocable with 'yaralyzer_show_colors'."""
+    show_color_theme(YARALYZER_THEME_DICT)

yaralyzer/helpers/string_helper.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+Helper methods to work with strings.
+"""
+from functools import partial
+from typing import Any, Callable, List
+INDENT_DEPTH = 4
+INDENT_SPACES = INDENT_DEPTH * ' '
+def escape_yara_pattern(pattern: str) -> str:
+    return pattern.replace('/', '\\/')
+def line_count(_string: str) -> int:
+    return len(_string.split("\n"))
+def hex_to_string(_string: str) -> str:
+    r"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
+    return bytearray.fromhex(_string.replace(' ', '')).decode()
+def str_join(_list: List[Any], separator: str, func: Callable = str) -> str:
+    """
+    Return a comma separated list of strings. If func is provided the output of calling
+    it on each element of the list will be used instead of str()
+    """
+    func = func or str
+    return separator.join([func(item) for item in _list])
+comma_join = partial(str_join, separator=', ')
+newline_join = partial(str_join, separator='\n')

yaralyzer/output/decoding_attempts_table.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""
+Methods to build the `rich.table` used to display decoding attempts of a given bytes array.
+Final output should be a `rich.table` of decoding attempts that are sorted like this:
+    1. String representation of undecoded bytes is always the first row
+    2. Encodings which `chardet.detect()` ranked as > 0% likelihood are sorted based on that confidence
+    3. Then the unchardetectable:
+        1. Decodings that were successful, unforced, and new
+        2. Decodings that were "successful" but forced
+        3. Decodings that were the same as other decodings
+        4. Failed decodings
+"""
+from collections import namedtuple
+from rich import box
+from rich.table import Table
+from rich.text import Text
+from yaralyzer.bytes_match import BytesMatch
+from yaralyzer.helpers.bytes_helper import ascii_view_of_raw_bytes, hex_view_of_raw_bytes, rich_text_view_of_raw_bytes
+from yaralyzer.helpers.rich_text_helper import CENTER, FOLD, MIDDLE, RIGHT, na_txt
+HEX = Text('HEX', style='bytes.title')
+RAW_BYTES = Text('Raw', style=f"bytes")
+def new_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
+    """Build a new rich `Table` with two rows, the raw and hex views of the `bytes_match` data."""
+    table = Table(show_lines=True, border_style='bytes', header_style='decode.table_header')
+    def add_col(title, **kwargs):
+        kwargs['justify'] = kwargs.get('justify', CENTER)
+        table.add_column(title, overflow=FOLD, vertical=MIDDLE, **kwargs)
+    add_col('Encoding', justify=RIGHT, width=12)
+    add_col('Detect Odds', width=len('Detect'))
+    add_col('Used\nForce?', width=len('Force?'))
+    add_col('Decoded Output', justify='left')
+    na = na_txt(style=HEX.style)
+    table.add_row(HEX, na, na, _hex_preview_subtable(bytes_match))
+    na = na_txt(style=RAW_BYTES.style)
+    table.add_row(RAW_BYTES, na, na, rich_text_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match))
+    return table
+def _hex_preview_subtable(bytes_match: BytesMatch) -> Table:
+    """
+    Build a sub `Table` for hex view row (hex on one side, ascii on the other side).
+    Args:
+        bytes_match (BytesMatch): The `BytesMatch` object containing the bytes to display.
+    Returns:
+        Table: A `rich.table` with hex and ascii views of the bytes.
+    """
+    hex_table = Table(
+        'hex',
+        'ascii',
+        border_style='grey.darkest',
+        header_style='decode.table_header',
+        box=box.MINIMAL,
+        show_lines=True,
+        show_header=True,
+        show_edge=False,
+        padding=(0, 1, 0, 2),
+        pad_edge=False
+    )
+    hex_table.add_row(
+        hex_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match),
+        ascii_view_of_raw_bytes(bytes_match.surrounding_bytes, bytes_match)
+    )
+    return hex_table

yaralyzer/output/decoding_table_row.py ADDED Viewed

@@ -0,0 +1,60 @@
+from dataclasses import dataclass, field
+from rich.text import Text
+from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
+from yaralyzer.helpers.rich_text_helper import na_txt
+DECODE_NOT_ATTEMPTED_MSG = Text('(decode not attempted)', style='no_attempt')
+@dataclass
+class DecodingTableRow:
+    encoding_label: Text
+    confidence_text: Text
+    errors_while_decoded: Text  # This is really "is_forced"?
+    decoded_string: Text
+    # Properties below here are not displayed in the table but are used for sorting etc.
+    confidence: float
+    encoding: str
+    sort_score: float
+    encoding_label_plain: str = field(init=False)
+    def __post_init__(self):
+        self.encoding_label_plain = self.encoding_label.plain
+    def to_row_list(self) -> list[Text]:
+        return [self.encoding_label, self.confidence_text, self.errors_while_decoded, self.decoded_string]
+    @classmethod
+    def from_decoded_assessment(cls, assessment: EncodingAssessment, is_forced: Text, txt: Text, score: float) -> 'DecodingTableRow':
+        """
+        Alternate constructor that builds a table row for a decoding attempt.
+        Args:
+            assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
+            is_forced (Text): Text indicating if the decode was forced.
+            txt (Text): The decoded string as a rich `Text` object (with highlighting).
+            score (float): The score to use for sorting this row in the table.
+        """
+        return cls(
+            encoding_label=assessment.encoding_label,
+            confidence_text=assessment.confidence_text,
+            errors_while_decoded=is_forced,
+            decoded_string=txt,
+            confidence=assessment.confidence,
+            encoding=assessment.encoding,
+            sort_score=score,
+        )
+    @classmethod
+    def from_undecoded_assessment(cls, assessment: EncodingAssessment, score: float) -> 'DecodingTableRow':
+        """
+        Alternate constructor for a row with just `chardet` assessment confidence data and no actual
+        decoding attempt string.
+        Args:
+            assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
+            score (float): The score to use for sorting this row within the table.
+        """
+        return cls.from_decoded_assessment(assessment, na_txt(), DECODE_NOT_ATTEMPTED_MSG, score)