yaralyzer 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

@@ -1,5 +1,5 @@
1
1
  """
2
- Class to smooth some of the rough edges around the dicts returned by chardet.detect_all()
2
+ Helps with `chardet` library.
3
3
  """
4
4
  from typing import Any, Optional
5
5
 
@@ -14,7 +14,23 @@ LANGUAGE = 'language'
14
14
 
15
15
 
16
16
  class EncodingAssessment:
17
+ """
18
+ Class to smooth some of the rough edges around the `dict`s returned by `chardet.detect_all()`.
19
+
20
+ Attributes:
21
+ assessment (dict): The dict returned by `chardet.detect_all()`.
22
+ encoding (str): The encoding detected, in lowercase.
23
+ confidence (float): Confidence score from 0.0 to 100.0.
24
+ confidence_text (Text): Rich `Text` object representing the confidence with styling.
25
+ language (Optional[str]): The detected language, if any.
26
+ encoding_label (Text): Rich `Text` object for displaying the encoding with optional language info.
27
+ """
28
+
17
29
  def __init__(self, assessment: dict) -> None:
30
+ """
31
+ Args:
32
+ assessment (dict): The `dict` returned by `chardet.detect_all()`.
33
+ """
18
34
  self.assessment = assessment
19
35
  self.encoding = assessment[ENCODING].lower()
20
36
 
@@ -27,14 +43,24 @@ class EncodingAssessment:
27
43
  self.set_encoding_label(self.language.title() if self.language else None)
28
44
 
29
45
  @classmethod
30
- def dummy_encoding_assessment(cls, encoding) -> 'EncodingAssessment':
31
- """Generate an empty EncodingAssessment to use as a dummy when chardet gives us nothing."""
46
+ def dummy_encoding_assessment(cls, encoding: str) -> 'EncodingAssessment':
47
+ """
48
+ Construct an empty `EncodingAssessment` to use as a dummy when `chardet` gives us nothing.
49
+
50
+ Args:
51
+ encoding (str): The encoding to use for the dummy assessment.
52
+ """
32
53
  assessment = cls({ENCODING: encoding, CONFIDENCE: 0.0})
33
54
  assessment.confidence_text = Text('none', 'no_attempt')
34
55
  return assessment
35
56
 
36
57
  def set_encoding_label(self, alt_text: Optional[str]) -> None:
37
- """Alt text is displayed below the encoding in slightly dimmer font."""
58
+ """
59
+ Alt text is displayed below the encoding in slightly dimmer font.
60
+
61
+ Args:
62
+ alt_text (Optional[str]): Text to display along with the encoding (often the inferred language)
63
+ """
38
64
  self.encoding_label = Text(self.encoding, 'encoding.header')
39
65
 
40
66
  if alt_text is not None:
@@ -48,7 +74,7 @@ class EncodingAssessment:
48
74
  return self.__rich__().plain
49
75
 
50
76
  def _get_dict_empty_value_as_None(self, key: str) -> Any:
51
- """Return None if the value at :key is an empty string, empty list, etc."""
77
+ """Return `None` if the value at `key` is an empty string, empty list, etc."""
52
78
  value = self.assessment.get(key)
53
79
 
54
80
  if isinstance(value, (dict, list, str)) and len(value) == 0:
@@ -1,6 +1,5 @@
1
1
  """
2
- Manager class to ease dealing with the chardet encoding detection library 'chardet'.
3
- Each instance of this class manages a chardet.detect_all() scan on a single set of bytes.
2
+ `EncodingDetector` class for managing chardet encoding detection.
4
3
  """
5
4
  from operator import attrgetter
6
5
  from typing import List
@@ -18,13 +17,37 @@ CONFIDENCE_SCORE_RANGE = range(0, 101)
18
17
 
19
18
 
20
19
  class EncodingDetector:
21
- # 10 as in 10%, 0.02, etc. Encodings w/confidences below this will not be displayed in the decoded table
20
+ """
21
+ Manager class to ease dealing with the encoding detection library `chardet`.
22
+
23
+ Each instance of this class manages a `chardet.detect_all()` scan on a single set of bytes.
24
+
25
+ Attributes:
26
+ bytes (bytes): The bytes to analyze.
27
+ bytes_len (int): The length of the bytes.
28
+ table (Table): A rich `Table` object summarizing the chardet results.
29
+ assessments (List[EncodingAssessment]): List of `EncodingAssessment` objects from `chardet` results.
30
+ unique_assessments (List[EncodingAssessment]): Unique assessments by encoding, highest confidence only.
31
+ raw_chardet_assessments (List[dict]): Raw list of dicts returned by `chardet.detect_all()`.
32
+ force_decode_assessments (List[EncodingAssessment]): Assessments above force decode threshold.
33
+ force_display_assessments (List[EncodingAssessment]): Assessments above force display threshold.
34
+ has_any_idea (Optional[bool]): `True` if `chardet` had any idea what the encoding might be,
35
+ `False` if not, `None` if `chardet` wasn't run yet.
36
+ force_display_threshold (float): `[class variable]` Default confidence threshold for forcing display
37
+ in decoded table.
38
+ force_decode_threshold (float): `[class variable]` Default confidence threshold for forcing a decode attempt.
39
+ """
40
+
41
+ # Default value for encodings w/confidences below this will not be displayed in the decoded table
22
42
  force_display_threshold = 20.0
23
-
24
- # At what chardet.detect() confidence % should we force a decode with an obscure encoding?
43
+ # Default value for what chardet.detect() confidence % should we force a decode with an obscure encoding.
25
44
  force_decode_threshold = 50.0
26
45
 
27
46
  def __init__(self, _bytes: bytes) -> None:
47
+ """
48
+ Args:
49
+ _bytes (bytes): The bytes to analyze with `chardet`.
50
+ """
28
51
  self.bytes = _bytes
29
52
  self.bytes_len = len(_bytes)
30
53
  self.table = _empty_chardet_results_table()
@@ -53,21 +76,31 @@ class EncodingDetector:
53
76
  self.force_display_assessments = self.assessments_above_confidence(type(self).force_display_threshold)
54
77
 
55
78
  def get_encoding_assessment(self, encoding: str) -> EncodingAssessment:
56
- """If chardet produced one, return it, otherwise return a dummy node with confidence of 0"""
79
+ """
80
+ Get the `chardet` assessment for a specific encoding.
81
+
82
+ Args:
83
+ encoding (str): The encoding to look for.
84
+
85
+ Returns:
86
+ EncodingAssessment: Assessment for the given encoding if it exists, otherwise a dummy with 0 confidence.
87
+ """
57
88
  assessment = next((r for r in self.unique_assessments if r.encoding == encoding), None)
58
89
  return assessment or EncodingAssessment.dummy_encoding_assessment(encoding)
59
90
 
60
91
  def has_enough_bytes(self) -> bool:
92
+ """Return `True` if we have enough bytes to run `chardet.detect()`."""
61
93
  return self.bytes_len >= YaralyzerConfig.args.min_chardet_bytes
62
94
 
63
95
  def assessments_above_confidence(self, cutoff: float) -> List[EncodingAssessment]:
96
+ """Return the assessments above the given confidence cutoff."""
64
97
  return [a for a in self.unique_assessments if a.confidence >= cutoff]
65
98
 
66
99
  def __rich__(self) -> Padding:
67
100
  return Padding(self.table, (0, 0, 0, 0))
68
101
 
69
102
  def _uniquify_results_and_build_table(self) -> None:
70
- """Keep the highest result per encoding, ignoring the language chardet has indicated"""
103
+ """Keep the highest result per encoding, ignoring the language `chardet` has indicated."""
71
104
  already_seen_encodings = {}
72
105
 
73
106
  for i, result in enumerate(self.assessments):
@@ -87,6 +120,7 @@ class EncodingDetector:
87
120
  self.unique_assessments.sort(key=attrgetter('confidence'), reverse=True)
88
121
 
89
122
  def _set_empty_results(self) -> None:
123
+ """Set empty results for when `chardet` can't help us."""
90
124
  self.assessments = []
91
125
  self.unique_assessments = []
92
126
  self.raw_chardet_assessments = []
@@ -94,8 +128,8 @@ class EncodingDetector:
94
128
  self.force_display_assessments = []
95
129
 
96
130
 
97
- def _empty_chardet_results_table():
98
- """Returns a fresh table"""
131
+ def _empty_chardet_results_table() -> Table:
132
+ """Returns an empty `Table` with appropriate columns for `chardet` results."""
99
133
  table = Table(
100
134
  'Rank', 'Encoding', 'Confidence',
101
135
  title='chardet.detect results',
@@ -26,14 +26,35 @@ HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE
26
26
 
27
27
  def get_bytes_before_and_after_match(_bytes: bytes, match: re.Match, num_before=None, num_after=None) -> bytes:
28
28
  """
29
- Get all bytes from num_before the start of the sequence up until num_after the end of the sequence
30
- num_before and num_after will both default to the env var/CLI options having to do with surrounding
31
- bytes. If only num_before is provided then num_after will use it as a default.
29
+ Get bytes before and after a regex match within a byte sequence.
30
+
31
+ Args:
32
+ _bytes (bytes): The full byte sequence.
33
+ match (re.Match): The regex `Match` object.
34
+ num_before (int, optional): Number of bytes before the match to include. Defaults to configured value.
35
+ num_after (int, optional): Number of bytes after the match to include. Defaults to either configured value
36
+ or the `num_before` arg value.
37
+
38
+ Returns:
39
+ bytes: The surrounding bytes including the match.
32
40
  """
33
41
  return get_bytes_surrounding_range(_bytes, match.start(), match.end(), num_before, num_after)
34
42
 
35
43
 
36
44
  def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num_before=None, num_after=None) -> bytes:
45
+ """
46
+ Get bytes surrounding a specified range in a byte sequence.
47
+
48
+ Args:
49
+ _bytes (bytes): The full byte sequence.
50
+ start_idx (int): Start index of the range.
51
+ end_idx (int): End index of the range.
52
+ num_before (int, optional): Number of bytes before the range. Defaults to configured value.
53
+ num_after (int, optional): Number of bytes after the range. Defaults to configured value.
54
+
55
+ Returns:
56
+ bytes: The surrounding bytes including the range.
57
+ """
37
58
  num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
38
59
  num_before = num_before or YaralyzerConfig.args.surrounding_bytes
39
60
  start_idx = max(start_idx - num_before, 0)
@@ -42,7 +63,16 @@ def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num
42
63
 
43
64
 
44
65
  def clean_byte_string(bytes_array: bytes) -> str:
45
- """Gives you a string representation of bytes w/no cruft e.g. '\x80\nx44' instead of "b'\x80\nx44'"."""
66
+ r"""
67
+ Return a clean string representation of bytes, without Python's b'' or b"" wrappers.
68
+ e.g. '\x80\nx44' instead of "b'\x80\nx44'".
69
+
70
+ Args:
71
+ bytes_array (bytes): The bytes to convert.
72
+
73
+ Returns:
74
+ str: Clean string representation of the bytes.
75
+ """
46
76
  byte_printer = Console(file=StringIO())
47
77
  byte_printer.out(bytes_array, end='')
48
78
  bytestr = byte_printer.file.getvalue()
@@ -58,7 +88,16 @@ def clean_byte_string(bytes_array: bytes) -> str:
58
88
 
59
89
 
60
90
  def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
61
- """Print raw bytes to a Text object, highlighing the bytes in the bytes_match BytesMatch"""
91
+ """
92
+ Return a rich `Text` object of raw bytes, highlighting the matched bytes.
93
+
94
+ Args:
95
+ _bytes (bytes): The full byte sequence.
96
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
97
+
98
+ Returns:
99
+ Text: Rich Text object with highlighted match.
100
+ """
62
101
  surrounding_bytes_str = clean_byte_string(_bytes)
63
102
  highlighted_bytes_str = clean_byte_string(bytes_match.bytes)
64
103
  highlighted_bytes_str_length = len(highlighted_bytes_str)
@@ -72,6 +111,16 @@ def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
72
111
 
73
112
 
74
113
  def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
114
+ """
115
+ Return a hexadecimal view of raw bytes, highlighting the matched bytes.
116
+
117
+ Args:
118
+ _bytes (bytes): The full byte sequence.
119
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
120
+
121
+ Returns:
122
+ Text: Rich Text object with highlighted match in hex view.
123
+ """
75
124
  hex_str = hex_text(_bytes)
76
125
  highlight_start_idx = bytes_match.highlight_start_idx * 3
77
126
  highlight_end_idx = bytes_match.highlight_end_idx * 3
@@ -81,6 +130,16 @@ def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
81
130
 
82
131
 
83
132
  def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
133
+ """
134
+ Return an ASCII view of raw bytes, highlighting the matched bytes.
135
+
136
+ Args:
137
+ _bytes (bytes): The full byte sequence.
138
+ bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
139
+
140
+ Returns:
141
+ Text: Rich Text object with highlighted match in ASCII view.
142
+ """
84
143
  txt = Text('', style=BYTES)
85
144
 
86
145
  for i, b in enumerate(_bytes):
@@ -113,23 +172,54 @@ def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
113
172
 
114
173
 
115
174
  def hex_text(_bytes: bytes) -> Text:
175
+ """
176
+ Return a rich Text object of the hex string for the given bytes.
177
+
178
+ Args:
179
+ _bytes (bytes): The bytes to convert.
180
+
181
+ Returns:
182
+ Text: Rich Text object of the hex string.
183
+ """
116
184
  return Text(hex_string(_bytes), style=GREY)
117
185
 
118
186
 
119
187
  def hex_string(_bytes: bytes) -> str:
188
+ """
189
+ Return a hex string representation of the given bytes.
190
+
191
+ Args:
192
+ _bytes (bytes): The bytes to convert.
193
+
194
+ Returns:
195
+ str: Hex string representation of the bytes.
196
+ """
120
197
  return ' '.join([hex(b).removeprefix('0x').rjust(2, '0') for i, b in enumerate(_bytes)])
121
198
 
122
199
 
123
200
  def print_bytes(bytes_array: bytes, style=None) -> None:
124
- """Convert bytes to a string representation and print to console"""
201
+ """
202
+ Print a string representation of some bytes to the console.
203
+
204
+ Args:
205
+ bytes_array (bytes): The bytes to print.
206
+ style (str, optional): Style to use for printing. Defaults to 'bytes'.
207
+ """
125
208
  for line in bytes_array.split(NEWLINE_BYTE):
126
209
  console.print(escape(clean_byte_string(line)), style=style or 'bytes')
127
210
 
128
211
 
129
212
  def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
130
213
  """
131
- Truncate bytes to the a modulus of the char width of the given encoding.
132
- For utf-16 this means truncate to a multiple of 2, for utf-32 to a multiple of 4.
214
+ Truncate bytes to a multiple of the character width for the given encoding.
215
+ For example, for utf-16 this means truncating to a multiple of 2, for utf-32 to a multiple of 4.
216
+
217
+ Args:
218
+ _bytes (bytes): The bytes to truncate.
219
+ encoding (str): The encoding to consider.
220
+
221
+ Returns:
222
+ bytes: Truncated bytes.
133
223
  """
134
224
  char_width = encoding_width(encoding)
135
225
  num_bytes = len(_bytes)
@@ -142,15 +232,23 @@ def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
142
232
 
143
233
 
144
234
  def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
145
- """
146
- Find the position of bytes_str in surrounding_byte_str. Both args are raw text dumps of binary data.
147
- Because strings are longer than bytes (stuff like '\xcc' are 4 chars when printed are one byte and
148
- the ANSI unprintables include stuff like 'NegativeAcknowledgement' which is over 20 chars) they represent
149
- so we have to re-find the location to highlight the bytes correctly.
235
+ r"""
236
+ Find the position of the highlighted bytes string within the surrounding bytes string.
237
+
238
+ Both arguments are string representations of binary data. This is needed because the string
239
+ representation of bytes can be longer than the actual bytes (e.g., '\\xcc' is 4 chars for 1 byte).
240
+
241
+ Args:
242
+ surrounding_bytes_str (str): String representation of the full byte sequence.
243
+ highlighted_bytes_str (str): String representation of the matched bytes.
244
+ highlighted_bytes (BytesMatch): The BytesMatch object for context.
245
+
246
+ Returns:
247
+ int: The index in the surrounding string where the highlighted bytes start, or -1 if not found.
150
248
  """
151
249
  # Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
152
- # in the extra bytes. Tthis isn't perfect - it's starting us at the first index into the *bytes* that's safe to
153
- # check but this is almost certainly far too soon given the large % of bytes that take 4 chars to print ('\x02' etc)
250
+ # in the extra bytes. This isn't perfect - it's starting us at the first index into the *bytes* that's safe to
251
+ # check but this is almost certainly too soon given the large % of bytes that take 4 chars to print ('\x02' etc)
154
252
  highlight_idx = surrounding_bytes_str.find(highlighted_bytes_str, highlighted_bytes.highlight_start_idx)
155
253
 
156
254
  # TODO: Somehow \' and ' don't always come out the same :(
@@ -4,5 +4,5 @@ Help with dicts.
4
4
 
5
5
 
6
6
  def get_dict_key_by_value(_dict: dict, value):
7
- """Inverse of the usual dict operation"""
7
+ """Inverse of the usual dict operation."""
8
8
  return list(_dict.keys())[list(_dict.values()).index(value)]
@@ -3,16 +3,21 @@ Helper methods to work with files.
3
3
  """
4
4
  from datetime import datetime
5
5
  from os import listdir, path
6
+ from pathlib import Path
6
7
  from typing import List, Optional
7
8
 
8
9
 
9
- def timestamp_for_filename() -> str:
10
- """Returns a string showing current time in a file name friendly format"""
11
- return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
10
+ def files_in_dir(dir: Path | str, with_extname: Optional[str] = None) -> List[str]:
11
+ """
12
+ Returns paths for all non dot files in `dir` (optionally filtered to only those ending in 'with_extname').
12
13
 
14
+ Args:
15
+ dir (str): Directory to list files from.
16
+ with_extname (Optional[str], optional): If set, only return files with this extension. Defaults to None.
13
17
 
14
- def files_in_dir(dir: str, with_extname: Optional[str] = None) -> List[str]:
15
- """paths for non dot files, optionally ending in 'with_extname'"""
18
+ Returns:
19
+ List[str]: List of file paths.
20
+ """
16
21
  files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
17
22
  files = [file for file in files if not path.isdir(file)]
18
23
 
@@ -23,20 +28,22 @@ def files_in_dir(dir: str, with_extname: Optional[str] = None) -> List[str]:
23
28
 
24
29
 
25
30
  def files_with_extname(files: List[str], extname: str) -> List[str]:
31
+ """Return only files from the list that end with the given `extname`."""
26
32
  return [f for f in files if f.endswith(f".{extname}")]
27
33
 
28
34
 
29
- def load_word_list(file_path):
30
- """For very simple files (1 col CSVs, if you wll)"""
31
- with open(file_path, 'r') as f:
32
- return [line.rstrip().lstrip() for line in f.readlines()]
33
-
34
-
35
- def load_binary_data(file_path) -> bytes:
35
+ def load_binary_data(file_path: Path | str) -> bytes:
36
+ """Load and return the raw `bytes` from a file."""
36
37
  with open(file_path, 'rb') as f:
37
38
  return f.read()
38
39
 
39
40
 
40
- def load_file(file_path) -> str:
41
+ def load_file(file_path: Path | str) -> str:
42
+ """Load and return the text contents of a file."""
41
43
  with open(file_path, 'r') as f:
42
44
  return f.read()
45
+
46
+
47
+ def timestamp_for_filename() -> str:
48
+ """Returns a string showing current time in a file name friendly format."""
49
+ return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
@@ -1,9 +1,10 @@
1
1
  """
2
2
  Methods to handle turning various objects into Rich text/table/etc representations
3
- Rich colors: https://rich.readthedocs.io/en/stable/appendix/colors.html
3
+
4
+ [Rich color names](https://rich.readthedocs.io/en/stable/appendix/colors.html)
4
5
  TODO: interesting colors # row_styles[0] = 'reverse bold on color(144)' <-
5
6
  """
6
- from typing import List, Union
7
+ from typing import List, Optional, Union
7
8
 
8
9
  from rich.columns import Columns
9
10
  from rich.panel import Panel
@@ -37,16 +38,17 @@ DECODING_ERRORS_MSG = Text('Yes', style='dark_red dim')
37
38
 
38
39
 
39
40
  def na_txt(style: Union[str, Style] = 'white'):
41
+ """Standard N/A text for tables and such."""
40
42
  return Text('N/A', style=style)
41
43
 
42
44
 
43
- def prefix_with_style(_str: str, style: str, root_style=None) -> Text:
44
- """Sometimes you need a Text() object to start plain lest the underline or whatever last forever"""
45
+ def prefix_with_style(_str: str, style: str, root_style: Optional[Union[Style, str]] = None) -> Text:
46
+ """Sometimes you need a Text() object to start plain lest the underline or whatever last forever."""
45
47
  return Text('', style=root_style or 'white') + Text(_str, style)
46
48
 
47
49
 
48
- def meter_style(meter_pct):
49
- """For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer"""
50
+ def meter_style(meter_pct: float | int) -> str:
51
+ """For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer."""
50
52
  if meter_pct > 100 or meter_pct < 0:
51
53
  log.warning(f"Invalid meter_pct: {meter_pct}")
52
54
 
@@ -81,15 +83,10 @@ def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
81
83
 
82
84
 
83
85
  def reverse_color(style: Style) -> Style:
84
- """Reverses the color for a given style"""
86
+ """Reverses the color for a given style."""
85
87
  return Style(color=style.bgcolor, bgcolor=style.color, underline=style.underline, bold=style.bold)
86
88
 
87
89
 
88
- def yaralyzer_show_color_theme() -> None:
89
- """Script method to show yaralyzer's color theme. Invocable with 'yaralyzer_show_colors'."""
90
- show_color_theme(YARALYZER_THEME_DICT)
91
-
92
-
93
90
  def show_color_theme(styles: dict) -> None:
94
91
  """Print all colors in 'styles' to screen in a grid"""
95
92
  console.print(Panel('The Yaralyzer Color Theme', style='reverse'))
@@ -104,7 +101,7 @@ def show_color_theme(styles: dict) -> None:
104
101
 
105
102
 
106
103
  def size_text(num_bytes: int) -> Text:
107
- """Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)"""
104
+ """Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)."""
108
105
  kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
109
106
  kb_txt.append(' kb ')
110
107
  bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
@@ -116,4 +113,10 @@ def size_in_bytes_text(num_bytes: int) -> Text:
116
113
 
117
114
 
118
115
  def newline_join(texts: List[Text]) -> Text:
116
+ """Join a list of Text objects with newlines between them."""
119
117
  return Text("\n").join(texts)
118
+
119
+
120
+ def yaralyzer_show_color_theme() -> None:
121
+ """Script method to show yaralyzer's color theme. Invocable with 'yaralyzer_show_colors'."""
122
+ show_color_theme(YARALYZER_THEME_DICT)
@@ -17,7 +17,7 @@ def line_count(_string: str) -> int:
17
17
 
18
18
 
19
19
  def hex_to_string(_string: str) -> str:
20
- """String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
20
+ r"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
21
21
  return bytearray.fromhex(_string.replace(' ', '')).decode()
22
22
 
23
23
 
@@ -1,14 +1,20 @@
1
1
  """
2
- Methods to build the rich.table used to display decoding attempts of a given bytes array.
2
+ Methods to build the `rich.table` used to display decoding attempts of a given bytes array.
3
3
 
4
- Final output should be rich.table of decoding attempts that are sorted like this:
4
+ Final output should be a `rich.table` of decoding attempts that are sorted like this:
5
5
 
6
6
  1. String representation of undecoded bytes is always the first row
7
- 2. Encodings which chardet.detect() ranked as > 0% likelihood are sorted based on that confidence
7
+
8
+ 2. Encodings which `chardet.detect()` ranked as > 0% likelihood are sorted based on that confidence
9
+
8
10
  3. Then the unchardetectable:
11
+
9
12
  1. Decodings that were successful, unforced, and new
10
- 2. Decodings that 'successful' but forced
13
+
14
+ 2. Decodings that were "successful" but forced
15
+
11
16
  3. Decodings that were the same as other decodings
17
+
12
18
  4. Failed decodings
13
19
  """
14
20
  from collections import namedtuple
@@ -45,7 +51,7 @@ RAW_BYTES = Text('Raw', style=f"bytes")
45
51
 
46
52
 
47
53
  def new_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
48
- """Build a new rich Table with two rows, the raw and hex views of the bytes_match data."""
54
+ """Build a new rich `Table` with two rows, the raw and hex views of the `bytes_match` data."""
49
55
  table = Table(show_lines=True, border_style='bytes', header_style='decode.table_header')
50
56
 
51
57
  def add_col(title, **kwargs):
@@ -65,7 +71,18 @@ def new_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
65
71
 
66
72
 
67
73
  def decoding_table_row(assessment: EncodingAssessment, is_forced: Text, txt: Text, score: float) -> DecodingTableRow:
68
- """Build a table row for a decoding attempt."""
74
+ """
75
+ Build a table row for a decoding attempt.
76
+
77
+ Args:
78
+ assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
79
+ is_forced (Text): Text indicating if the decode was forced.
80
+ txt (Text): The decoded string as a rich `Text` object (with highlighting).
81
+ score (float): The score to use for sorting this row in the table.
82
+
83
+ Returns:
84
+ DecodingTableRow: The constructed table row named tuple.
85
+ """
69
86
  return DecodingTableRow(
70
87
  assessment.encoding_label,
71
88
  assessment.confidence_text,
@@ -78,13 +95,30 @@ def decoding_table_row(assessment: EncodingAssessment, is_forced: Text, txt: Tex
78
95
  )
79
96
 
80
97
 
81
- def assessment_only_row(assessment: EncodingAssessment, score) -> DecodingTableRow:
82
- """Build a row with just chardet assessment confidence data and no actual decoding attempt string."""
98
+ def assessment_only_row(assessment: EncodingAssessment, score: float) -> DecodingTableRow:
99
+ """
100
+ Build a `DecodingTableRow` with just `chardet` assessment confidence data and no actual decoding attempt string.
101
+
102
+ Args:
103
+ assessment (EncodingAssessment): The `chardet` assessment for the encoding used.
104
+ score (float): The score to use for sorting this row within the table.
105
+
106
+ Returns:
107
+ DecodingTableRow: The constructed table row named tuple with no decoding attempt string.
108
+ """
83
109
  return decoding_table_row(assessment, na_txt(), DECODE_NOT_ATTEMPTED_MSG, score)
84
110
 
85
111
 
86
112
  def _hex_preview_subtable(bytes_match: BytesMatch) -> Table:
87
- """Build a sub table for hex view (hex on one side, ascii on the other side)."""
113
+ """
114
+ Build a sub `Table` for hex view row (hex on one side, ascii on the other side).
115
+
116
+ Args:
117
+ bytes_match (BytesMatch): The `BytesMatch` object containing the bytes to display.
118
+
119
+ Returns:
120
+ Table: A `rich.table` with hex and ascii views of the bytes.
121
+ """
88
122
  hex_table = Table(
89
123
  'hex',
90
124
  'ascii',
@@ -1,7 +1,10 @@
1
+ """
2
+ Functions to export Yaralyzer results to various file formats.
3
+ """
1
4
  import json
2
5
  import time
3
6
  from os import path
4
- from typing import Optional
7
+ from typing import Callable, Optional
5
8
 
6
9
  from rich.terminal_theme import TerminalTheme
7
10
 
@@ -51,7 +54,16 @@ _EXPORT_KWARGS = {
51
54
 
52
55
 
53
56
  def export_json(yaralyzer: Yaralyzer, output_basepath: Optional[str]) -> str:
54
- """Export YARA scan results to JSON. Returns the path to the output file that was written."""
57
+ """
58
+ Export YARA scan results to JSON.
59
+
60
+ Args:
61
+ yaralyzer (Yaralyzer): The `Yaralyzer` object containing the results to export.
62
+ output_basepath (Optional[str]): Base path to write output to. Should have no file extension.
63
+
64
+ Returns:
65
+ str: Path data was exported to.
66
+ """
55
67
  output_path = f"{output_basepath or 'yara_matches'}.json"
56
68
 
57
69
  matches_data = [
@@ -66,11 +78,16 @@ def export_json(yaralyzer: Yaralyzer, output_basepath: Optional[str]) -> str:
66
78
  return output_path
67
79
 
68
80
 
69
- def invoke_rich_export(export_method, output_file_basepath) -> str:
81
+ def invoke_rich_export(export_method: Callable, output_file_basepath: str) -> str:
70
82
  """
71
- Announce the export, perform the export, announce completion.
72
- export_method is a Rich.console.save_blah() method, output_file_path is file path w/no extname.
73
- Returns the path to path data was exported to.
83
+ Announce the export, perform the export, and announce completion.
84
+
85
+ Args:
86
+ export_method (Callable): Usually a `Rich.console.save_whatever()` method
87
+ output_file_basepath (str): Path to write output to. Should have no file extension.
88
+
89
+ Returns:
90
+ str: Path data was exported to.
74
91
  """
75
92
  method_name = export_method.__name__
76
93
  extname = 'txt' if method_name == 'save_text' else method_name.split('_')[-1]