yaralyzer 1.0.6__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

Files changed (30) hide show
  1. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/CHANGELOG.md +10 -0
  2. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/PKG-INFO +12 -7
  3. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/README.md +3 -0
  4. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/pyproject.toml +65 -24
  5. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/__init__.py +5 -2
  6. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/bytes_match.py +145 -52
  7. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/config.py +18 -6
  8. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/decoding/bytes_decoder.py +34 -15
  9. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/decoding/decoding_attempt.py +10 -9
  10. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/character_encodings.py +40 -40
  11. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/encoding_assessment.py +10 -4
  12. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/encoding_detector.py +17 -13
  13. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/bytes_helper.py +113 -16
  14. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/dict_helper.py +1 -2
  15. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/file_helper.py +3 -3
  16. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/list_helper.py +1 -0
  17. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/rich_text_helper.py +13 -11
  18. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/string_helper.py +1 -1
  19. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/file_export.py +2 -1
  20. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/file_hashes_table.py +34 -6
  21. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/regex_match_metrics.py +13 -10
  22. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/rich_console.py +18 -3
  23. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/util/argument_parser.py +11 -10
  24. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/util/logging.py +6 -6
  25. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yara/yara_match.py +1 -1
  26. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yara/yara_rule_builder.py +16 -17
  27. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yaralyzer.py +66 -51
  28. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/.yaralyzer.example +0 -0
  29. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/LICENSE +0 -0
  30. {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/decoding_attempts_table.py +0 -0
@@ -1,5 +1,15 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.0.8
4
+ * Bump `python-dotenv` to v1.1.1
5
+ * Use `mkdocs` and `lazydocs` to build automatic API documentation at https://michelcrypt4d4mus.github.io/yaralyzer/
6
+ * Drop python 3.9 support (required by `mkdocs-awesome-nav` package)
7
+
8
+ ### 1.0.7
9
+ * Add `Changelog` to PyPi URLs, add some more PyPi classifiers
10
+ * Add `.flake8` config file and fix style errors
11
+ * Rename `prefix_with_plain_text_obj()` to `prefix_with_style()`
12
+
3
13
  ### 1.0.6
4
14
  * Add `Environment :: Console` and `Programming Language :: Python` to PyPi classifiers
5
15
  * Add `LICENSE` to PyPi package
@@ -1,31 +1,33 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: yaralyzer
3
- Version: 1.0.6
4
- Summary: Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors.
3
+ Version: 1.0.8
4
+ Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
6
6
  License: GPL-3.0-or-later
7
- Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,visualization,yara
7
+ Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
8
8
  Author: Michel de Cryptadamus
9
9
  Author-email: michel@cryptadamus.com
10
- Requires-Python: >=3.9,<4.0
10
+ Requires-Python: >=3.10,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Environment :: Console
13
13
  Classifier: Intended Audience :: Information Technology
14
14
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
15
15
  Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
- Classifier: Programming Language :: Python :: 3.9
18
17
  Classifier: Programming Language :: Python :: 3.10
19
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
20
21
  Classifier: Topic :: Artistic Software
21
22
  Classifier: Topic :: Scientific/Engineering :: Visualization
22
23
  Classifier: Topic :: Security
23
24
  Requires-Dist: chardet (>=5.0.0,<6.0.0)
24
- Requires-Dist: python-dotenv (>=0.21.0,<0.22.0)
25
+ Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
25
26
  Requires-Dist: rich (>=14.1.0,<15.0.0)
26
27
  Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
27
28
  Requires-Dist: yara-python (>=4.5.4,<5.0.0)
28
- Project-URL: Documentation, https://github.com/michelcrypt4d4mus/yaralyzer
29
+ Project-URL: Changelog, https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md
30
+ Project-URL: Documentation, https://michelcrypt4d4mus.github.io/yaralyzer/
29
31
  Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
30
32
  Description-Content-Type: text/markdown
31
33
 
@@ -117,6 +119,9 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
117
119
  do_stuff()
118
120
  ```
119
121
 
122
+ #### API Documentation
123
+ Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
124
+
120
125
  # Example Output
121
126
  The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
122
127
 
@@ -86,6 +86,9 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
86
86
  do_stuff()
87
87
  ```
88
88
 
89
+ #### API Documentation
90
+ Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
91
+
89
92
  # Example Output
90
93
  The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
91
94
 
@@ -1,13 +1,35 @@
1
1
  [tool.poetry]
2
2
  name = "yaralyzer"
3
- version = "1.0.6"
4
- description = "Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors."
3
+ version = "1.0.8"
4
+ description = "Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors."
5
5
  authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
6
6
  readme = "README.md"
7
7
  license = "GPL-3.0-or-later"
8
+
8
9
  homepage = "https://github.com/michelcrypt4d4mus/yaralyzer"
9
10
  repository = "https://github.com/michelcrypt4d4mus/yaralyzer"
10
- documentation = "https://github.com/michelcrypt4d4mus/yaralyzer"
11
+ documentation = "https://michelcrypt4d4mus.github.io/yaralyzer/"
12
+
13
+ classifiers = [
14
+ "Development Status :: 5 - Production/Stable",
15
+ "Environment :: Console",
16
+ "Intended Audience :: Information Technology",
17
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
18
+ "Programming Language :: Python",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Artistic Software",
24
+ "Topic :: Security",
25
+ "Topic :: Scientific/Engineering :: Visualization",
26
+ ]
27
+
28
+ include = [
29
+ "CHANGELOG.md",
30
+ "LICENSE",
31
+ ".yaralyzer.example"
32
+ ]
11
33
 
12
34
  keywords = [
13
35
  "ascii art",
@@ -33,46 +55,65 @@ keywords = [
33
55
  "threat hunting",
34
56
  "threat intelligence",
35
57
  "threat research",
58
+ "threatintel",
36
59
  "visualization",
37
60
  "yara",
38
61
  ]
39
62
 
40
- classifiers = [
41
- "Development Status :: 5 - Production/Stable",
42
- "Environment :: Console",
43
- "Intended Audience :: Information Technology",
44
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
45
- "Programming Language :: Python",
46
- "Topic :: Artistic Software",
47
- "Topic :: Security",
48
- "Topic :: Scientific/Engineering :: Visualization",
49
- ]
50
-
51
- include = [
52
- "CHANGELOG.md",
53
- "LICENSE",
54
- ".yaralyzer.example"
55
- ]
56
-
57
63
 
64
+ ####################
65
+ # Dependencies #
66
+ ####################
58
67
  [tool.poetry.dependencies]
59
- python = "^3.9"
68
+ python = "^3.10"
60
69
  chardet = ">=5.0.0,<6.0.0"
61
- #plyara = "^2.1.1"
62
- python-dotenv = "^0.21.0"
70
+ python-dotenv = "^1.1.1"
63
71
  rich = "^14.1.0"
64
72
  rich-argparse-plus = "^0.3.1"
65
73
  yara-python = "^4.5.4"
74
+ #plyara = "^2.1.1" # TODO: use plyara for YARA rule parsing and validation
66
75
 
67
76
  [tool.poetry.group.dev.dependencies]
77
+ flake8 = "^7.3.0"
78
+ lazydocs = "^0.4.8"
79
+ mkdocs = "^1.6.1"
80
+ mkdocs-awesome-nav = "^3.1.2"
81
+ mkdocs-include-markdown-plugin = "^7.1.7"
82
+ mkdocs-material = "^9.6.19"
83
+ pydocstyle = "^6.3.0"
68
84
  pytest = "^7.1.3"
69
85
 
70
86
 
87
+ #############
88
+ # Scripts #
89
+ #############
71
90
  [tool.poetry.scripts]
72
91
  yaralyze = 'yaralyzer:yaralyze'
73
92
  yaralyzer_show_color_theme = 'yaralyzer.helpers.rich_text_helper:yaralyzer_show_color_theme'
74
93
 
75
94
 
95
+ ###############
96
+ # PyPi URLs #
97
+ ###############
98
+ [tool.poetry.urls]
99
+ Changelog = "https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md"
100
+
101
+
102
+ #################
103
+ # Build Stuff #
104
+ #################
76
105
  [build-system]
77
- requires = ["poetry-core"]
78
106
  build-backend = "poetry.core.masonry.api"
107
+ requires = ["poetry-core"]
108
+
109
+ [tool.pydocstyle]
110
+ match-dir = "yaralyzer"
111
+ ignore = [
112
+ "D200", # One-line docstring should fit on one line with quotes (found 3)
113
+ "D203", # 1 blank line required before class docstring"
114
+ "D212", # Multi-line docstring summary should start at the first line
115
+ "D401", # First line should be in imperative mood"
116
+ "D406", # Section name should end with a newline
117
+ "D407", # Missing dashed underline after section
118
+ "D413", # Missing blank line after last section
119
+ ]
@@ -11,11 +11,9 @@ if not environ.get('INVOKED_BY_PYTEST', False):
11
11
  load_dotenv(dotenv_path=dotenv_file)
12
12
  break
13
13
 
14
- from yaralyzer.config import YaralyzerConfig
15
14
  from yaralyzer.output.file_export import export_json, invoke_rich_export
16
15
  from yaralyzer.output.rich_console import console
17
16
  from yaralyzer.util.argument_parser import get_export_basepath, parse_arguments
18
- from yaralyzer.util.logging import log
19
17
  from yaralyzer.yara.yara_rule_builder import HEX, REGEX
20
18
  from yaralyzer.yaralyzer import Yaralyzer
21
19
 
@@ -26,6 +24,11 @@ PDFALYZER_MSG_TXT.append('https://github.com/michelcrypt4d4mus/pdfalyzer\n', sty
26
24
 
27
25
 
28
26
  def yaralyze():
27
+ """
28
+ Entry point for yaralyzer when invoked as a script.
29
+
30
+ Args are parsed from the command line and environment variables. See yaralyzer --help for details.
31
+ """
29
32
  args = parse_arguments()
30
33
  output_basepath = None
31
34
 
@@ -1,10 +1,4 @@
1
- """
2
- Simple class to keep track of regex matches against binary data. Basically an re.match object with
3
- some (not many) extra bells and whistles, most notably the surrounding_bytes property.
4
-
5
- pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
6
- e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
7
- """
1
+ """BytesMatch class for tracking regex and YARA matches against binary data."""
8
2
  import re
9
3
  from typing import Iterator, Optional
10
4
 
@@ -13,25 +7,43 @@ from rich.text import Text
13
7
  from yara import StringMatch, StringMatchInstance
14
8
 
15
9
  from yaralyzer.config import YaralyzerConfig
16
- from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
10
+ from yaralyzer.helpers.rich_text_helper import prefix_with_style
17
11
  from yaralyzer.output.file_hashes_table import bytes_hashes_table
18
12
  from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
19
13
 
20
14
 
21
15
  class BytesMatch:
16
+ """
17
+ Simple class to keep track of regex matches against binary data.
18
+
19
+ Basically an re.match object with some (not many) extra bells and whistles, most notably
20
+ the surrounding_bytes property.
21
+
22
+ pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
23
+ e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
24
+ """
25
+
22
26
  def __init__(
23
- self,
24
- matched_against: bytes,
25
- start_idx: int,
26
- length: int,
27
- label: str,
28
- ordinal: int,
29
- match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
30
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
31
- ) -> None:
32
- """
33
- Ordinal means it's the Nth match with this regex (not super important but useful)
34
- YARA makes it a little rouch to get the actual regex that matched. Can be done with plyara eventually.
27
+ self,
28
+ matched_against: bytes,
29
+ start_idx: int,
30
+ length: int,
31
+ label: str,
32
+ ordinal: int,
33
+ match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
34
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
35
+ ) -> None:
36
+ """
37
+ Initialize a BytesMatch object representing a match against binary data.
38
+
39
+ Args:
40
+ matched_against (bytes): The full byte sequence that was searched.
41
+ start_idx (int): Start index of the match in the byte sequence.
42
+ length (int): Length of the match in bytes.
43
+ label (str): Label for the match (e.g., regex or YARA rule name).
44
+ ordinal (int): The Nth match for this pattern.
45
+ match (Optional[re.Match]): Regex match object, if available.
46
+ highlight_style (str): Style to use for highlighting the match.
35
47
  """
36
48
  self.matched_against: bytes = matched_against
37
49
  self.start_idx: int = start_idx
@@ -52,25 +64,50 @@ class BytesMatch:
52
64
 
53
65
  @classmethod
54
66
  def from_regex_match(
55
- cls,
56
- matched_against: bytes,
57
- match: re.Match,
58
- ordinal: int,
59
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
60
- ) -> 'BytesMatch':
67
+ cls,
68
+ matched_against: bytes,
69
+ match: re.Match,
70
+ ordinal: int,
71
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
72
+ ) -> 'BytesMatch':
73
+ """
74
+ Create a BytesMatch from a regex match object.
75
+
76
+ Args:
77
+ matched_against (bytes): The bytes searched.
78
+ match (re.Match): The regex match object.
79
+ ordinal (int): The Nth match for this pattern.
80
+ highlight_style (str): Style for highlighting.
81
+
82
+ Returns:
83
+ BytesMatch: The constructed BytesMatch instance.
84
+ """
61
85
  return cls(matched_against, match.start(), len(match[0]), match.re.pattern, ordinal, match, highlight_style)
62
86
 
63
87
  @classmethod
64
88
  def from_yara_str(
65
- cls,
66
- matched_against: bytes,
67
- rule_name: str,
68
- yara_str_match: StringMatch,
69
- yara_str_match_instance: StringMatchInstance,
70
- ordinal: int,
71
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
72
- ) -> 'BytesMatch':
73
- """Build a BytesMatch from a yara string match. 'matched_against' is the set of bytes yara was run against."""
89
+ cls,
90
+ matched_against: bytes,
91
+ rule_name: str,
92
+ yara_str_match: StringMatch,
93
+ yara_str_match_instance: StringMatchInstance,
94
+ ordinal: int,
95
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
96
+ ) -> 'BytesMatch':
97
+ """
98
+ Build a BytesMatch from a YARA string match instance.
99
+
100
+ Args:
101
+ matched_against (bytes): The bytes searched.
102
+ rule_name (str): Name of the YARA rule.
103
+ yara_str_match (StringMatch): YARA string match object.
104
+ yara_str_match_instance (StringMatchInstance): Instance of the string match.
105
+ ordinal (int): The Nth match for this pattern.
106
+ highlight_style (str): Style for highlighting.
107
+
108
+ Returns:
109
+ BytesMatch: The constructed BytesMatch instance.
110
+ """
74
111
  pattern_label = yara_str_match.identifier
75
112
 
76
113
  # Don't duplicate the labeling if rule_name and yara_str are the same
@@ -89,12 +126,22 @@ class BytesMatch:
89
126
 
90
127
  @classmethod
91
128
  def from_yara_match(
92
- cls,
93
- matched_against: bytes,
94
- yara_match: dict,
95
- highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
96
- ) -> Iterator['BytesMatch']:
97
- """Iterator w/a BytesMatch for each string returned as part of a YARA match result dict."""
129
+ cls,
130
+ matched_against: bytes,
131
+ yara_match: dict,
132
+ highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
133
+ ) -> Iterator['BytesMatch']:
134
+ """
135
+ Yield a BytesMatch for each string returned as part of a YARA match result dict.
136
+
137
+ Args:
138
+ matched_against (bytes): The bytes searched.
139
+ yara_match (dict): YARA match result dictionary.
140
+ highlight_style (str): Style for highlighting.
141
+
142
+ Yields:
143
+ BytesMatch: For each string match in the YARA result.
144
+ """
98
145
  i = 0 # For numbered labeling
99
146
 
100
147
  # yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
@@ -102,28 +149,43 @@ class BytesMatch:
102
149
  for yara_str_match_instance in yara_str_match.instances:
103
150
  i += 1
104
151
 
105
- yield(cls.from_yara_str(
152
+ yield cls.from_yara_str(
106
153
  matched_against,
107
154
  yara_match['rule'],
108
155
  yara_str_match,
109
156
  yara_str_match_instance,
110
157
  i,
111
- highlight_style))
158
+ highlight_style
159
+ )
112
160
 
113
161
  def style_at_position(self, idx) -> str:
114
- """Get the style for the byte at position idx within the matched bytes"""
162
+ """
163
+ Get the style for the byte at position idx within the matched bytes.
164
+
165
+ Args:
166
+ idx (int): Index within the surrounding bytes.
167
+
168
+ Returns:
169
+ str: The style to use for this byte (highlight or greyed out).
170
+ """
115
171
  if idx < self.highlight_start_idx or idx >= self.highlight_end_idx:
116
172
  return GREY_ADDRESS
117
173
  else:
118
174
  return self.highlight_style
119
175
 
120
176
  def location(self) -> Text:
121
- """Returns a Text obj like '(start idx: 348190, end idx: 348228)'"""
122
- location_txt = prefix_with_plain_text_obj(
177
+ """
178
+ Get a styled Text object describing the start and end index of the match.
179
+
180
+ Returns:
181
+ Text: Rich Text object like '(start idx: 348190, end idx: 348228)'.
182
+ """
183
+ location_txt = prefix_with_style(
123
184
  f"(start idx: ",
124
185
  style='off_white',
125
186
  root_style='decode.subheading'
126
187
  )
188
+
127
189
  location_txt.append(str(self.start_idx), style='number')
128
190
  location_txt.append(', end idx: ', style='off_white')
129
191
  location_txt.append(str(self.end_idx), style='number')
@@ -131,13 +193,26 @@ class BytesMatch:
131
193
  return location_txt
132
194
 
133
195
  def is_decodable(self) -> bool:
134
- """True if SUPPRESS_DECODES_TABLE is false and length of self.bytes is between MIN/MAX_DECODE_LENGTH"""
196
+ """
197
+ Determine if the matched bytes should be decoded.
198
+
199
+ Whether the bytes are decodable depends on whether SUPPRESS_DECODES_TABLE is set
200
+ and whether the match length is between MIN/MAX_DECODE_LENGTH.
201
+
202
+ Returns:
203
+ bool: True if decodable, False otherwise.
204
+ """
135
205
  return self.match_length >= YaralyzerConfig.args.min_decode_length \
136
206
  and self.match_length <= YaralyzerConfig.args.max_decode_length \
137
207
  and not YaralyzerConfig.args.suppress_decodes_table
138
208
 
139
209
  def bytes_hashes_table(self) -> Table:
140
- """Helper function to build the MD5/SHA table for self.bytes"""
210
+ """
211
+ Build a table of MD5/SHA hashes for the matched bytes.
212
+
213
+ Returns:
214
+ Table: Rich Table object with hashes.
215
+ """
141
216
  return bytes_hashes_table(
142
217
  self.bytes,
143
218
  self.location().plain,
@@ -145,7 +220,12 @@ class BytesMatch:
145
220
  )
146
221
 
147
222
  def suppression_notice(self) -> Text:
148
- """Generate a message for when there are too few/too many bytes"""
223
+ """
224
+ Generate a message for when the match is too short or too long to decode.
225
+
226
+ Returns:
227
+ Text: Rich Text object with the suppression notice.
228
+ """
149
229
  txt = self.__rich__()
150
230
 
151
231
  if self.match_length < YaralyzerConfig.args.min_decode_length:
@@ -157,7 +237,12 @@ class BytesMatch:
157
237
  return txt
158
238
 
159
239
  def to_json(self) -> dict:
160
- """Convert this BytesMatch to a JSON-serializable dict."""
240
+ """
241
+ Convert this BytesMatch to a JSON-serializable dictionary.
242
+
243
+ Returns:
244
+ dict: Dictionary representation of the match, suitable for JSON serialization.
245
+ """
161
246
  json_dict = {
162
247
  'label': self.label,
163
248
  'match_length': self.match_length,
@@ -176,7 +261,13 @@ class BytesMatch:
176
261
  return json_dict
177
262
 
178
263
  def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
179
- """Find the surrounding bytes, making sure not to step off the beginning or end"""
264
+ """
265
+ Find and set the bytes surrounding the match, ensuring indices stay within bounds.
266
+
267
+ Args:
268
+ num_before (Optional[int]): Number of bytes before the match to include.
269
+ num_after (Optional[int]): Number of bytes after the match to include.
270
+ """
180
271
  num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
181
272
  num_before = num_before or YaralyzerConfig.args.surrounding_bytes
182
273
  self.surrounding_start_idx: int = max(self.start_idx - num_before, 0)
@@ -184,11 +275,13 @@ class BytesMatch:
184
275
  self.surrounding_bytes: bytes = self.matched_against[self.surrounding_start_idx:self.surrounding_end_idx]
185
276
 
186
277
  def __rich__(self) -> Text:
187
- headline = prefix_with_plain_text_obj(str(self.match_length), style='number', root_style='decode.subheading')
278
+ """Get a rich Text representation of the match for display."""
279
+ headline = prefix_with_style(str(self.match_length), style='number', root_style='decode.subheading')
188
280
  headline.append(f" bytes matching ")
189
281
  headline.append(f"{self.label} ", style=ALERT_STYLE if self.highlight_style == ALERT_STYLE else 'regex')
190
282
  headline.append('at ')
191
283
  return headline + self.location()
192
284
 
193
285
  def __str__(self):
286
+ """Plain text (no rich colors) representation of the match for display."""
194
287
  return self.__rich__().plain
@@ -1,3 +1,6 @@
1
+ """
2
+ Configuration management for Yaralyzer.
3
+ """
1
4
  import logging
2
5
  from argparse import ArgumentParser, Namespace
3
6
  from os import environ
@@ -15,16 +18,19 @@ MEGABYTE = 1024 * KILOBYTE
15
18
 
16
19
  def config_var_name(env_var: str) -> str:
17
20
  """
18
- Get the name of env_var and strip off 'YARALYZER_', e.g.:
19
- SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
20
- config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
21
+ Get the name of env_var and strip off 'YARALYZER_' prefix.
22
+
23
+ Example:
24
+ $ SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
25
+ $ config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
26
+
21
27
  """
22
28
  env_var = env_var.removeprefix("YARALYZER_")
23
29
  return f'{env_var=}'.partition('=')[0]
24
30
 
25
31
 
26
32
  def is_env_var_set_and_not_false(var_name):
27
- """Returns True if var_name is not empty and set to anything other than 'false' (capitalization agnostic)"""
33
+ """Return True if var_name is not empty and set to anything other than 'false' (capitalization agnostic)."""
28
34
  if var_name in environ:
29
35
  var_value = environ[var_name]
30
36
  return var_value is not None and len(var_value) > 0 and var_value.lower() != 'false'
@@ -33,11 +39,13 @@ def is_env_var_set_and_not_false(var_name):
33
39
 
34
40
 
35
41
  def is_invoked_by_pytest():
36
- """Return true if pytest is running"""
42
+ """Return true if pytest is running."""
37
43
  return is_env_var_set_and_not_false(PYTEST_FLAG)
38
44
 
39
45
 
40
46
  class YaralyzerConfig:
47
+ """Handles parsing of command line args and environment variables for Yaralyzer."""
48
+
41
49
  # Passed through to yara.set_config()
42
50
  DEFAULT_MAX_MATCH_LENGTH = 100 * KILOBYTE
43
51
  DEFAULT_YARA_STACK_SIZE = 2 * 65536
@@ -76,11 +84,13 @@ class YaralyzerConfig:
76
84
 
77
85
  @classmethod
78
86
  def set_argument_parser(cls, parser: ArgumentParser) -> None:
87
+ """Sets the _argument_parser instance variable that will be used to parse command line args."""
79
88
  cls._argument_parser: ArgumentParser = parser
80
89
  cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
81
90
 
82
91
  @classmethod
83
92
  def set_args(cls, args: Namespace) -> None:
93
+ """Set the args class instance variable and update args with any environment variable overrides."""
84
94
  cls.args = args
85
95
 
86
96
  for option in cls._argparse_keys:
@@ -91,7 +101,7 @@ class YaralyzerConfig:
91
101
  env_var = f"{YARALYZER}_{option.upper()}"
92
102
  env_value = environ.get(env_var)
93
103
  default_value = cls.get_default_arg(option)
94
- #print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}")
104
+ # print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}") # noqa: E501
95
105
 
96
106
  # TODO: as is you can't override env vars with CLI args
97
107
  if isinstance(arg_value, bool):
@@ -105,9 +115,11 @@ class YaralyzerConfig:
105
115
 
106
116
  @classmethod
107
117
  def set_default_args(cls):
118
+ """Set args to their defaults as if parsed from the command line."""
108
119
  cls.set_args(cls._argument_parser.parse_args(['dummy']))
109
120
 
110
121
  @classmethod
111
122
  def get_default_arg(cls, arg: str) -> Any:
123
+ """Return the default value for arg as defined by a DEFAULT_ style class variable."""
112
124
  default_var = f"DEFAULT_{arg.upper()}"
113
125
  return vars(cls).get(default_var)