yaralyzer 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.0.8
4
+ * Bump `python-dotenv` to v1.1.1
5
+ * Use `mkdocs` and `lazydocs` to build automatic API documentation at https://michelcrypt4d4mus.github.io/yaralyzer/
6
+ * Drop python 3.9 support (required by `mkdocs-awesome-nav` package)
7
+
8
+ ### 1.0.7
9
+ * Add `Changelog` to PyPi URLs, add some more PyPi classifiers
10
+ * Add `.flake8` config file and fix style errors
11
+ * Rename `prefix_with_plain_text_obj()` to `prefix_with_style()`
12
+
3
13
  ### 1.0.6
4
14
  * Add `Environment :: Console` and `Programming Language :: Python` to PyPi classifiers
5
15
  * Add `LICENSE` to PyPi package
yaralyzer/__init__.py CHANGED
@@ -11,11 +11,9 @@ if not environ.get('INVOKED_BY_PYTEST', False):
11
11
  load_dotenv(dotenv_path=dotenv_file)
12
12
  break
13
13
 
14
- from yaralyzer.config import YaralyzerConfig
15
14
  from yaralyzer.output.file_export import export_json, invoke_rich_export
16
15
  from yaralyzer.output.rich_console import console
17
16
  from yaralyzer.util.argument_parser import get_export_basepath, parse_arguments
18
- from yaralyzer.util.logging import log
19
17
  from yaralyzer.yara.yara_rule_builder import HEX, REGEX
20
18
  from yaralyzer.yaralyzer import Yaralyzer
21
19
 
@@ -26,6 +24,11 @@ PDFALYZER_MSG_TXT.append('https://github.com/michelcrypt4d4mus/pdfalyzer\n', sty
26
24
 
27
25
 
28
26
  def yaralyze():
27
+ """
28
+ Entry point for yaralyzer when invoked as a script.
29
+
30
+ Args are parsed from the command line and environment variables. See yaralyzer --help for details.
31
+ """
29
32
  args = parse_arguments()
30
33
  output_basepath = None
31
34
 
yaralyzer/bytes_match.py CHANGED
@@ -1,10 +1,4 @@
1
- """
2
- Simple class to keep track of regex matches against binary data. Basically an re.match object with
3
- some (not many) extra bells and whistles, most notably the surrounding_bytes property.
4
-
5
- pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
6
- e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
7
- """
1
+ """BytesMatch class for tracking regex and YARA matches against binary data."""
8
2
  import re
9
3
  from typing import Iterator, Optional
10
4
 
@@ -13,25 +7,43 @@ from rich.text import Text
13
7
  from yara import StringMatch, StringMatchInstance
14
8
 
15
9
  from yaralyzer.config import YaralyzerConfig
16
- from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
10
+ from yaralyzer.helpers.rich_text_helper import prefix_with_style
17
11
  from yaralyzer.output.file_hashes_table import bytes_hashes_table
18
12
  from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
19
13
 
20
14
 
21
15
  class BytesMatch:
16
+ """
17
+ Simple class to keep track of regex matches against binary data.
18
+
19
+ Basically an re.match object with some (not many) extra bells and whistles, most notably
20
+ the surrounding_bytes property.
21
+
22
+ pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
23
+ e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
24
+ """
25
+
22
26
  def __init__(
23
- self,
24
- matched_against: bytes,
25
- start_idx: int,
26
- length: int,
27
- label: str,
28
- ordinal: int,
29
- match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
30
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
31
- ) -> None:
32
- """
33
- Ordinal means it's the Nth match with this regex (not super important but useful)
34
- YARA makes it a little rouch to get the actual regex that matched. Can be done with plyara eventually.
27
+ self,
28
+ matched_against: bytes,
29
+ start_idx: int,
30
+ length: int,
31
+ label: str,
32
+ ordinal: int,
33
+ match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
34
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
35
+ ) -> None:
36
+ """
37
+ Initialize a BytesMatch object representing a match against binary data.
38
+
39
+ Args:
40
+ matched_against (bytes): The full byte sequence that was searched.
41
+ start_idx (int): Start index of the match in the byte sequence.
42
+ length (int): Length of the match in bytes.
43
+ label (str): Label for the match (e.g., regex or YARA rule name).
44
+ ordinal (int): The Nth match for this pattern.
45
+ match (Optional[re.Match]): Regex match object, if available.
46
+ highlight_style (str): Style to use for highlighting the match.
35
47
  """
36
48
  self.matched_against: bytes = matched_against
37
49
  self.start_idx: int = start_idx
@@ -52,25 +64,50 @@ class BytesMatch:
52
64
 
53
65
  @classmethod
54
66
  def from_regex_match(
55
- cls,
56
- matched_against: bytes,
57
- match: re.Match,
58
- ordinal: int,
59
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
60
- ) -> 'BytesMatch':
67
+ cls,
68
+ matched_against: bytes,
69
+ match: re.Match,
70
+ ordinal: int,
71
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
72
+ ) -> 'BytesMatch':
73
+ """
74
+ Create a BytesMatch from a regex match object.
75
+
76
+ Args:
77
+ matched_against (bytes): The bytes searched.
78
+ match (re.Match): The regex match object.
79
+ ordinal (int): The Nth match for this pattern.
80
+ highlight_style (str): Style for highlighting.
81
+
82
+ Returns:
83
+ BytesMatch: The constructed BytesMatch instance.
84
+ """
61
85
  return cls(matched_against, match.start(), len(match[0]), match.re.pattern, ordinal, match, highlight_style)
62
86
 
63
87
  @classmethod
64
88
  def from_yara_str(
65
- cls,
66
- matched_against: bytes,
67
- rule_name: str,
68
- yara_str_match: StringMatch,
69
- yara_str_match_instance: StringMatchInstance,
70
- ordinal: int,
71
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
72
- ) -> 'BytesMatch':
73
- """Build a BytesMatch from a yara string match. 'matched_against' is the set of bytes yara was run against."""
89
+ cls,
90
+ matched_against: bytes,
91
+ rule_name: str,
92
+ yara_str_match: StringMatch,
93
+ yara_str_match_instance: StringMatchInstance,
94
+ ordinal: int,
95
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
96
+ ) -> 'BytesMatch':
97
+ """
98
+ Build a BytesMatch from a YARA string match instance.
99
+
100
+ Args:
101
+ matched_against (bytes): The bytes searched.
102
+ rule_name (str): Name of the YARA rule.
103
+ yara_str_match (StringMatch): YARA string match object.
104
+ yara_str_match_instance (StringMatchInstance): Instance of the string match.
105
+ ordinal (int): The Nth match for this pattern.
106
+ highlight_style (str): Style for highlighting.
107
+
108
+ Returns:
109
+ BytesMatch: The constructed BytesMatch instance.
110
+ """
74
111
  pattern_label = yara_str_match.identifier
75
112
 
76
113
  # Don't duplicate the labeling if rule_name and yara_str are the same
@@ -89,12 +126,22 @@ class BytesMatch:
89
126
 
90
127
  @classmethod
91
128
  def from_yara_match(
92
- cls,
93
- matched_against: bytes,
94
- yara_match: dict,
95
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
96
- ) -> Iterator['BytesMatch']:
97
- """Iterator w/a BytesMatch for each string returned as part of a YARA match result dict."""
129
+ cls,
130
+ matched_against: bytes,
131
+ yara_match: dict,
132
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
133
+ ) -> Iterator['BytesMatch']:
134
+ """
135
+ Yield a BytesMatch for each string returned as part of a YARA match result dict.
136
+
137
+ Args:
138
+ matched_against (bytes): The bytes searched.
139
+ yara_match (dict): YARA match result dictionary.
140
+ highlight_style (str): Style for highlighting.
141
+
142
+ Yields:
143
+ BytesMatch: For each string match in the YARA result.
144
+ """
98
145
  i = 0 # For numbered labeling
99
146
 
100
147
  # yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
@@ -102,28 +149,43 @@ class BytesMatch:
102
149
  for yara_str_match_instance in yara_str_match.instances:
103
150
  i += 1
104
151
 
105
- yield(cls.from_yara_str(
152
+ yield cls.from_yara_str(
106
153
  matched_against,
107
154
  yara_match['rule'],
108
155
  yara_str_match,
109
156
  yara_str_match_instance,
110
157
  i,
111
- highlight_style))
158
+ highlight_style
159
+ )
112
160
 
113
161
  def style_at_position(self, idx) -> str:
114
- """Get the style for the byte at position idx within the matched bytes"""
162
+ """
163
+ Get the style for the byte at position idx within the matched bytes.
164
+
165
+ Args:
166
+ idx (int): Index within the surrounding bytes.
167
+
168
+ Returns:
169
+ str: The style to use for this byte (highlight or greyed out).
170
+ """
115
171
  if idx < self.highlight_start_idx or idx >= self.highlight_end_idx:
116
172
  return GREY_ADDRESS
117
173
  else:
118
174
  return self.highlight_style
119
175
 
120
176
  def location(self) -> Text:
121
- """Returns a Text obj like '(start idx: 348190, end idx: 348228)'"""
122
- location_txt = prefix_with_plain_text_obj(
177
+ """
178
+ Get a styled Text object describing the start and end index of the match.
179
+
180
+ Returns:
181
+ Text: Rich Text object like '(start idx: 348190, end idx: 348228)'.
182
+ """
183
+ location_txt = prefix_with_style(
123
184
  f"(start idx: ",
124
185
  style='off_white',
125
186
  root_style='decode.subheading'
126
187
  )
188
+
127
189
  location_txt.append(str(self.start_idx), style='number')
128
190
  location_txt.append(', end idx: ', style='off_white')
129
191
  location_txt.append(str(self.end_idx), style='number')
@@ -131,13 +193,26 @@ class BytesMatch:
131
193
  return location_txt
132
194
 
133
195
  def is_decodable(self) -> bool:
134
- """True if SUPPRESS_DECODES_TABLE is false and length of self.bytes is between MIN/MAX_DECODE_LENGTH"""
196
+ """
197
+ Determine if the matched bytes should be decoded.
198
+
199
+ Whether the bytes are decodable depends on whether SUPPRESS_DECODES_TABLE is set
200
+ and whether the match length is between MIN/MAX_DECODE_LENGTH.
201
+
202
+ Returns:
203
+ bool: True if decodable, False otherwise.
204
+ """
135
205
  return self.match_length >= YaralyzerConfig.args.min_decode_length \
136
206
  and self.match_length <= YaralyzerConfig.args.max_decode_length \
137
207
  and not YaralyzerConfig.args.suppress_decodes_table
138
208
 
139
209
  def bytes_hashes_table(self) -> Table:
140
- """Helper function to build the MD5/SHA table for self.bytes"""
210
+ """
211
+ Build a table of MD5/SHA hashes for the matched bytes.
212
+
213
+ Returns:
214
+ Table: Rich Table object with hashes.
215
+ """
141
216
  return bytes_hashes_table(
142
217
  self.bytes,
143
218
  self.location().plain,
@@ -145,7 +220,12 @@ class BytesMatch:
145
220
  )
146
221
 
147
222
  def suppression_notice(self) -> Text:
148
- """Generate a message for when there are too few/too many bytes"""
223
+ """
224
+ Generate a message for when the match is too short or too long to decode.
225
+
226
+ Returns:
227
+ Text: Rich Text object with the suppression notice.
228
+ """
149
229
  txt = self.__rich__()
150
230
 
151
231
  if self.match_length < YaralyzerConfig.args.min_decode_length:
@@ -157,7 +237,12 @@ class BytesMatch:
157
237
  return txt
158
238
 
159
239
  def to_json(self) -> dict:
160
- """Convert this BytesMatch to a JSON-serializable dict."""
240
+ """
241
+ Convert this BytesMatch to a JSON-serializable dictionary.
242
+
243
+ Returns:
244
+ dict: Dictionary representation of the match, suitable for JSON serialization.
245
+ """
161
246
  json_dict = {
162
247
  'label': self.label,
163
248
  'match_length': self.match_length,
@@ -176,7 +261,13 @@ class BytesMatch:
176
261
  return json_dict
177
262
 
178
263
  def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
179
- """Find the surrounding bytes, making sure not to step off the beginning or end"""
264
+ """
265
+ Find and set the bytes surrounding the match, ensuring indices stay within bounds.
266
+
267
+ Args:
268
+ num_before (Optional[int]): Number of bytes before the match to include.
269
+ num_after (Optional[int]): Number of bytes after the match to include.
270
+ """
180
271
  num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
181
272
  num_before = num_before or YaralyzerConfig.args.surrounding_bytes
182
273
  self.surrounding_start_idx: int = max(self.start_idx - num_before, 0)
@@ -184,11 +275,13 @@ class BytesMatch:
184
275
  self.surrounding_bytes: bytes = self.matched_against[self.surrounding_start_idx:self.surrounding_end_idx]
185
276
 
186
277
  def __rich__(self) -> Text:
187
- headline = prefix_with_plain_text_obj(str(self.match_length), style='number', root_style='decode.subheading')
278
+ """Get a rich Text representation of the match for display."""
279
+ headline = prefix_with_style(str(self.match_length), style='number', root_style='decode.subheading')
188
280
  headline.append(f" bytes matching ")
189
281
  headline.append(f"{self.label} ", style=ALERT_STYLE if self.highlight_style == ALERT_STYLE else 'regex')
190
282
  headline.append('at ')
191
283
  return headline + self.location()
192
284
 
193
285
  def __str__(self):
286
+ """Plain text (no rich colors) representation of the match for display."""
194
287
  return self.__rich__().plain
yaralyzer/config.py CHANGED
@@ -1,3 +1,6 @@
1
+ """
2
+ Configuration management for Yaralyzer.
3
+ """
1
4
  import logging
2
5
  from argparse import ArgumentParser, Namespace
3
6
  from os import environ
@@ -15,16 +18,19 @@ MEGABYTE = 1024 * KILOBYTE
15
18
 
16
19
  def config_var_name(env_var: str) -> str:
17
20
  """
18
- Get the name of env_var and strip off 'YARALYZER_', e.g.:
19
- SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
20
- config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
21
+ Get the name of env_var and strip off 'YARALYZER_' prefix.
22
+
23
+ Example:
24
+ $ SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
25
+ $ config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
26
+
21
27
  """
22
28
  env_var = env_var.removeprefix("YARALYZER_")
23
29
  return f'{env_var=}'.partition('=')[0]
24
30
 
25
31
 
26
32
  def is_env_var_set_and_not_false(var_name):
27
- """Returns True if var_name is not empty and set to anything other than 'false' (capitalization agnostic)"""
33
+ """Return True if var_name is not empty and set to anything other than 'false' (capitalization agnostic)."""
28
34
  if var_name in environ:
29
35
  var_value = environ[var_name]
30
36
  return var_value is not None and len(var_value) > 0 and var_value.lower() != 'false'
@@ -33,11 +39,13 @@ def is_env_var_set_and_not_false(var_name):
33
39
 
34
40
 
35
41
  def is_invoked_by_pytest():
36
- """Return true if pytest is running"""
42
+ """Return true if pytest is running."""
37
43
  return is_env_var_set_and_not_false(PYTEST_FLAG)
38
44
 
39
45
 
40
46
  class YaralyzerConfig:
47
+ """Handles parsing of command line args and environment variables for Yaralyzer."""
48
+
41
49
  # Passed through to yara.set_config()
42
50
  DEFAULT_MAX_MATCH_LENGTH = 100 * KILOBYTE
43
51
  DEFAULT_YARA_STACK_SIZE = 2 * 65536
@@ -76,11 +84,13 @@ class YaralyzerConfig:
76
84
 
77
85
  @classmethod
78
86
  def set_argument_parser(cls, parser: ArgumentParser) -> None:
87
+ """Sets the _argument_parser instance variable that will be used to parse command line args."""
79
88
  cls._argument_parser: ArgumentParser = parser
80
89
  cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
81
90
 
82
91
  @classmethod
83
92
  def set_args(cls, args: Namespace) -> None:
93
+ """Set the args class instance variable and update args with any environment variable overrides."""
84
94
  cls.args = args
85
95
 
86
96
  for option in cls._argparse_keys:
@@ -91,7 +101,7 @@ class YaralyzerConfig:
91
101
  env_var = f"{YARALYZER}_{option.upper()}"
92
102
  env_value = environ.get(env_var)
93
103
  default_value = cls.get_default_arg(option)
94
- #print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}")
104
+ # print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}") # noqa: E501
95
105
 
96
106
  # TODO: as is you can't override env vars with CLI args
97
107
  if isinstance(arg_value, bool):
@@ -105,9 +115,11 @@ class YaralyzerConfig:
105
115
 
106
116
  @classmethod
107
117
  def set_default_args(cls):
118
+ """Set args to their defaults as if parsed from the command line."""
108
119
  cls.set_args(cls._argument_parser.parse_args(['dummy']))
109
120
 
110
121
  @classmethod
111
122
  def get_default_arg(cls, arg: str) -> Any:
123
+ """Return the default value for arg as defined by a DEFAULT_ style class variable."""
112
124
  default_var = f"DEFAULT_{arg.upper()}"
113
125
  return vars(cls).get(default_var)
@@ -1,9 +1,4 @@
1
- """
2
- Class to handle attempting to decode a chunk of bytes into strings with various possible encodings.
3
- Leverages the chardet library to both guide what encodings are attempted as well as to rank decodings
4
- in the results.
5
- """
6
-
1
+ """BytesDecoder class for attempting to decode bytes with various encodings."""
7
2
  from collections import defaultdict
8
3
  from copy import deepcopy
9
4
  from operator import attrgetter
@@ -15,14 +10,13 @@ from rich.panel import Panel
15
10
  from rich.table import Table
16
11
  from rich.text import Text
17
12
 
18
- #from yaralyzer.bytes_match import BytesMatch
13
+ from yaralyzer.bytes_match import BytesMatch # Used to cause circular import issues
19
14
  from yaralyzer.config import YaralyzerConfig
20
15
  from yaralyzer.decoding.decoding_attempt import DecodingAttempt
21
- from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT, encoding_offsets
16
+ from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT
22
17
  from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
23
18
  from yaralyzer.encoding_detection.encoding_detector import EncodingDetector
24
19
  from yaralyzer.helpers.dict_helper import get_dict_key_by_value
25
- from yaralyzer.helpers.list_helper import flatten
26
20
  from yaralyzer.helpers.rich_text_helper import CENTER, DECODING_ERRORS_MSG, NO_DECODING_ERRORS_MSG
27
21
  from yaralyzer.output.decoding_attempts_table import (DecodingTableRow, assessment_only_row,
28
22
  decoding_table_row, new_decoding_attempts_table)
@@ -36,7 +30,33 @@ SCORE_SCALER = 100.0
36
30
 
37
31
 
38
32
  class BytesDecoder:
33
+ """
34
+ Class to handle attempting to decode a chunk of bytes into strings with various possible encodings.
35
+
36
+ Leverages the chardet library to both guide what encodings are attempted as well as to rank decodings
37
+ in the results.
38
+ """
39
+
39
40
  def __init__(self, bytes_match: 'BytesMatch', label: Optional[str] = None) -> None:
41
+ """
42
+ Initialize a BytesDecoder for attempting to decode a chunk of bytes using various encodings.
43
+
44
+ Args:
45
+ bytes_match (BytesMatch): The BytesMatch object containing the bytes to decode and match metadata.
46
+ label (Optional[str], optional): Optional label for this decoding attempt. Defaults to the match label.
47
+
48
+ Attributes:
49
+ bytes_match (BytesMatch): The BytesMatch instance being decoded.
50
+ bytes (bytes): The bytes (including surrounding context) to decode.
51
+ label (str): Label for this decoding attempt.
52
+ was_match_decodable (dict): Tracks successful decodes per encoding.
53
+ was_match_force_decoded (dict): Tracks forced decodes per encoding.
54
+ was_match_undecodable (dict): Tracks failed decodes per encoding.
55
+ decoded_strings (dict): Maps encoding to decoded string.
56
+ undecoded_rows (list): Stores undecoded table rows.
57
+ decodings (list): List of DecodingAttempt objects for each encoding tried.
58
+ encoding_detector (EncodingDetector): Used to detect and assess possible encodings.
59
+ """
40
60
  self.bytes_match = bytes_match
41
61
  self.bytes = bytes_match.surrounding_bytes
42
62
  self.label = label or bytes_match.label
@@ -53,7 +73,7 @@ class BytesDecoder:
53
73
  self.encoding_detector = EncodingDetector(self.bytes)
54
74
 
55
75
  def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
56
- """Rich object generator (see Rich console docs)"""
76
+ """Rich object generator (see Rich console docs)."""
57
77
  yield NewLine(2)
58
78
  yield Align(self._decode_attempt_subheading(), CENTER)
59
79
 
@@ -66,14 +86,13 @@ class BytesDecoder:
66
86
  if self.bytes_match.is_decodable():
67
87
  yield self._build_decodings_table()
68
88
  elif YaralyzerConfig.args.standalone_mode:
69
- # TODO: yield self.bytes_match.suppression_notice() (i guess to show some notice that things are suppressed?)
70
89
  yield self._build_decodings_table(True)
71
90
 
72
91
  yield NewLine()
73
92
  yield Align(self.bytes_match.bytes_hashes_table(), CENTER, style='dim')
74
93
 
75
94
  def _build_decodings_table(self, suppress_decodes: bool = False) -> Table:
76
- """First rows are the raw / hex views of the bytes, next rows are the attempted decodings"""
95
+ """First rows are the raw / hex views of the bytes, next rows are the attempted decodings."""
77
96
  self.table = new_decoding_attempts_table(self.bytes_match)
78
97
 
79
98
  # Add the encoding rows to the table if not suppressed
@@ -118,7 +137,7 @@ class BytesDecoder:
118
137
  return Panel(headline, style='decode.subheading', expand=False)
119
138
 
120
139
  def _track_decode_stats(self) -> None:
121
- """Track stats about successful vs. forced vs. failed decode attempts"""
140
+ """Track stats about successful vs. forced vs. failed decode attempts."""
122
141
  for decoding in self.decodings:
123
142
  if decoding.failed_to_decode:
124
143
  self.was_match_undecodable[decoding.encoding] += 1
@@ -136,7 +155,7 @@ class BytesDecoder:
136
155
  # If the decoding can have a start offset add an appropriate extension to the encoding label
137
156
  if decoding.start_offset_label:
138
157
  if assessment.language:
139
- log.warning(f"{decoding.encoding} has offset {decoding.start_offset} and language '{assessment.language}'")
158
+ log.warning(f"{decoding.encoding} offset {decoding.start_offset} AND language '{assessment.language}'")
140
159
  else:
141
160
  assessment = deepcopy(assessment)
142
161
  assessment.set_encoding_label(decoding.start_offset_label)
@@ -165,7 +184,7 @@ class BytesDecoder:
165
184
 
166
185
 
167
186
  def _build_encodings_metric_dict():
168
- """One key for each key in ENCODINGS_TO_ATTEMPT, values are all 0"""
187
+ """One key for each key in ENCODINGS_TO_ATTEMPT, values are all 0."""
169
188
  metrics_dict = defaultdict(lambda: 0)
170
189
 
171
190
  for encoding in ENCODINGS_TO_ATTEMPT.keys():
@@ -1,21 +1,22 @@
1
- """
2
- Class to manage attempting to decode a chunk of bytes into strings with a given encoding.
3
- """
1
+ """Class to manage attempting to decode a chunk of bytes into strings with a given encoding."""
4
2
  from sys import byteorder
5
3
  from typing import Optional
6
4
 
7
5
  from rich.markup import escape
8
6
  from rich.text import Text
9
7
 
8
+ from yaralyzer.bytes_match import BytesMatch # Formerly caused circular import issues
10
9
  from yaralyzer.encoding_detection.character_encodings import (ENCODINGS_TO_ATTEMPT, SINGLE_BYTE_ENCODINGS,
11
10
  UTF_8, encoding_width, is_wide_utf)
12
11
  from yaralyzer.helpers.bytes_helper import clean_byte_string, truncate_for_encoding
13
- from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj, unprintable_byte_to_text
12
+ from yaralyzer.helpers.rich_text_helper import prefix_with_style, unprintable_byte_to_text
14
13
  from yaralyzer.output.rich_console import ALERT_STYLE, BYTES_BRIGHTER, BYTES_BRIGHTEST, BYTES_NO_DIM, GREY_ADDRESS
15
14
  from yaralyzer.util.logging import log
16
15
 
17
16
 
18
17
  class DecodingAttempt:
18
+ """Class to manage attempting to decode a chunk of bytes into strings with a given encoding."""
19
+
19
20
  def __init__(self, bytes_match: 'BytesMatch', encoding: str) -> None:
20
21
  # Args
21
22
  self.bytes = bytes_match.surrounding_bytes
@@ -30,7 +31,7 @@ class DecodingAttempt:
30
31
  self.decoded_string = self._decode_bytes()
31
32
 
32
33
  def is_wide_utf_encoding(self) -> bool:
33
- """Returns True if the encoding is UTF-16 or UTF-32"""
34
+ """Returns True if the encoding is UTF-16 or UTF-32."""
34
35
  return is_wide_utf(self.encoding)
35
36
 
36
37
  def _decode_bytes(self) -> Text:
@@ -57,7 +58,7 @@ class DecodingAttempt:
57
58
  return self._custom_decode()
58
59
 
59
60
  def _custom_decode(self) -> Text:
60
- """Returns a Text obj representing an attempt to force a UTF-8 encoding upon an array of bytes"""
61
+ """Returns a Text obj representing an attempt to force a UTF-8 encoding upon an array of bytes."""
61
62
  log.info(f"Custom decoding {self.bytes_match} with {self.encoding}...")
62
63
  unprintable_char_map = ENCODINGS_TO_ATTEMPT.get(self.encoding)
63
64
  output = Text('', style='bytes.decoded')
@@ -144,8 +145,8 @@ class DecodingAttempt:
144
145
  else:
145
146
  return self._failed_to_decode_msg_txt(last_exception)
146
147
 
147
- def _to_rich_text(self, _string: str, bytes_offset: int=0) -> Text:
148
- """Convert a decoded string to highlighted Text representation"""
148
+ def _to_rich_text(self, _string: str, bytes_offset: int = 0) -> Text:
149
+ """Convert a decoded string to highlighted Text representation."""
149
150
  # Adjust where we start the highlighting given the multibyte nature of the encodings
150
151
  log.debug(f"Stepping through {self.encoding} encoded string...")
151
152
  txt = Text('', style=self.bytes_match.style_at_position(0))
@@ -181,4 +182,4 @@ class DecodingAttempt:
181
182
  def _failed_to_decode_msg_txt(self, exception: Optional[Exception]) -> Text:
182
183
  """Set failed_to_decode flag and return a Text object with the error message."""
183
184
  self.failed_to_decode = True
184
- return prefix_with_plain_text_obj(f"(decode failed: {exception})", style='red dim italic')
185
+ return prefix_with_style(f"(decode failed: {exception})", style='red dim italic')