yaralyzer 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

@@ -1,5 +1,5 @@
1
1
  """
2
- Methods for building Rich layout elements for display of results.
2
+ Methods for computing and displaying various file hashes.
3
3
  """
4
4
  import hashlib
5
5
  from collections import namedtuple
@@ -18,7 +18,18 @@ def bytes_hashes_table(
18
18
  title: Optional[str] = None,
19
19
  title_justify: str = LEFT
20
20
  ) -> Table:
21
- """Build a table to show the MD5, SHA1, SHA256, etc."""
21
+ """
22
+ Build a Rich `Table` displaying the size, MD5, SHA1, and SHA256 hashes of a byte sequence.
23
+
24
+ Args:
25
+ bytes_or_bytes_info (Union[bytes, BytesInfo]): The `bytes` to hash, or a `BytesInfo`
26
+ namedtuple with precomputed values.
27
+ title (Optional[str], optional): Optional title for the table. Defaults to `None`.
28
+ title_justify (str, optional): Justification for the table title. Defaults to `"LEFT"`.
29
+
30
+ Returns:
31
+ Table: A Rich `Table` object with the size and hash values.
32
+ """
22
33
  if isinstance(bytes_or_bytes_info, bytes):
23
34
  bytes_info = compute_file_hashes(bytes_or_bytes_info)
24
35
  else:
@@ -40,6 +51,15 @@ def bytes_hashes_table(
40
51
 
41
52
 
42
53
  def compute_file_hashes(_bytes: bytes) -> BytesInfo:
54
+ """
55
+ Compute the size, MD5, SHA1, and SHA256 hashes for a given byte sequence.
56
+
57
+ Args:
58
+ _bytes (bytes): The `bytes` to hash.
59
+
60
+ Returns:
61
+ BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values.
62
+ """
43
63
  return BytesInfo(
44
64
  size=len(_bytes),
45
65
  md5=hashlib.md5(_bytes).hexdigest().upper(),
@@ -49,5 +69,14 @@ def compute_file_hashes(_bytes: bytes) -> BytesInfo:
49
69
 
50
70
 
51
71
  def compute_file_hashes_for_file(file_path) -> BytesInfo:
72
+ """
73
+ Compute the size, MD5, SHA1, and SHA256 hashes for the contents of a file.
74
+
75
+ Args:
76
+ file_path (str): Path to the file to hash.
77
+
78
+ Returns:
79
+ BytesInfo: `BytesInfo` namedtuple containing size, md5, sha1, and sha256 values for the file contents.
80
+ """
52
81
  with open(file_path, 'rb') as file:
53
82
  return compute_file_hashes(file.read())
@@ -1,12 +1,5 @@
1
1
  """
2
- Class to measure what we enounter as we iterate over every single match of a relatively simple byte level regex
3
- (e.g. "bytes between quotes") against a relatively large pool of close to random encrypted binary data
4
-
5
- Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
6
- were some encodings have a higher pct of success than others (indicating part of our mystery data might be encoded
7
- that way?
8
-
9
- TODO: use @dataclass decorator https://realpython.com/python-data-classes/
2
+ `RegexMatchMetrics` class.
10
3
  """
11
4
  from collections import defaultdict
12
5
 
@@ -15,6 +8,30 @@ from yaralyzer.util.logging import log
15
8
 
16
9
 
17
10
  class RegexMatchMetrics:
11
+ """
12
+ Class to measure what we enounter as we iterate over all matches of a relatively simple byte level regex.
13
+
14
+ Things like how much many of our matched bytes were we able to decode easily vs. by force vs. not at all,
15
+ were some encodings have a higher pct of success than others (indicating part of our mystery data might be
16
+ encoded that way?
17
+
18
+ Example:
19
+ "Find bytes between quotes" against a relatively large pool of close to random encrypted binary data.
20
+
21
+ Attributes:
22
+ match_count (int): Total number of matches found.
23
+ bytes_matched (int): Total number of bytes matched across all matches.
24
+ matches_decoded (int): Number of matches where we were able to decode at least some of the matched bytes.
25
+ easy_decode_count (int): Number of matches where we were able to decode the matched bytes without forcing.
26
+ forced_decode_count (int): Number of matches where we were only able to decode the matched bytes by forcing.
27
+ undecodable_count (int): Number of matches where we were unable to decode any of the matched bytes.
28
+ skipped_matches_lengths (defaultdict): Dictionary mapping lengths of skipped matches to their counts.
29
+ bytes_match_objs (list): List of `BytesMatch` objects for all matches encountered.
30
+ per_encoding_stats (defaultdict): Dictionary mapping encoding names to their respective `RegexMatchMetrics`.
31
+
32
+ TODO: use @dataclass decorator https://realpython.com/python-data-classes/
33
+ """
34
+
18
35
  def __init__(self) -> None:
19
36
  self.match_count = 0
20
37
  self.bytes_matched = 0
@@ -27,12 +44,20 @@ class RegexMatchMetrics:
27
44
  self.per_encoding_stats = defaultdict(lambda: RegexMatchMetrics())
28
45
 
29
46
  def num_matches_skipped_for_being_empty(self) -> int:
47
+ """Number of matches skipped for being empty (0 length)."""
30
48
  return self.skipped_matches_lengths[0]
31
49
 
32
50
  def num_matches_skipped_for_being_too_big(self) -> int:
51
+ """Number of matches skipped for being too big to decode."""
33
52
  return sum({k: v for k, v in self.skipped_matches_lengths.items() if k > 0}.values())
34
53
 
35
54
  def tally_match(self, decoder: BytesDecoder) -> None:
55
+ """
56
+ Tally statistics from a `BytesDecoder` after it has processed a match.
57
+
58
+ Args:
59
+ decoder (BytesDecoder): The `BytesDecoder` that processed a match.
60
+ """
36
61
  log.debug(f"Tallying {decoder.bytes_match} ({len(decoder.decodings)} decodings)")
37
62
  self.match_count += 1
38
63
  self.bytes_matched += decoder.bytes_match.match_length
@@ -81,15 +81,11 @@ YARALYZER_THEME = Theme(YARALYZER_THEME_DICT)
81
81
 
82
82
 
83
83
  def console_width_possibilities():
84
+ """Returns a list of possible console widths, the first being the current terminal width."""
84
85
  # Subtract 2 from terminal cols just as a precaution in case things get weird
85
86
  return [get_terminal_size().columns - 2, DEFAULT_CONSOLE_WIDTH]
86
87
 
87
88
 
88
- def console_width() -> int:
89
- """Current width set in console obj"""
90
- return console._width or 40
91
-
92
-
93
89
  # Maximize output width if YARALYZER_MAXIMIZE_WIDTH is set (also can changed with --maximize-width option)
94
90
  if is_invoked_by_pytest():
95
91
  CONSOLE_WIDTH = DEFAULT_CONSOLE_WIDTH
@@ -103,8 +99,8 @@ CONSOLE_PRINT_BYTE_WIDTH = int(CONSOLE_WIDTH / 4.0)
103
99
  console = Console(theme=YARALYZER_THEME, color_system='256', highlight=False, width=CONSOLE_WIDTH)
104
100
 
105
101
 
106
- def console_print_with_fallback(_string, style=None) -> None:
107
- """Fallback to regular print() if there's a Markup issue"""
102
+ def console_print_with_fallback(_string: Text | str, style=None) -> None:
103
+ """`rich.console.print()` with fallback to regular `print()` if there's a Rich Markup issue."""
108
104
  try:
109
105
  console.print(_string, style=style)
110
106
  except MarkupError:
@@ -112,11 +108,18 @@ def console_print_with_fallback(_string, style=None) -> None:
112
108
  print(_string.plain if isinstance(_string, Text) else _string)
113
109
 
114
110
 
115
- def theme_colors_with_prefix(prefix: str) -> List[Text]:
116
- return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
111
+ def console_width() -> int:
112
+ """Current width set in `console` object."""
113
+ return console._width or 40
117
114
 
118
115
 
119
116
  def print_fatal_error_and_exit(error_message: str) -> None:
117
+ """
118
+ Print a fatal error message in a `Panel` and exit.
119
+
120
+ Args:
121
+ error_message (str): The error message to display.
122
+ """
120
123
  console.line(1)
121
124
  print_header_panel(error_message, style='bold red reverse')
122
125
  console.line(1)
@@ -124,4 +127,18 @@ def print_fatal_error_and_exit(error_message: str) -> None:
124
127
 
125
128
 
126
129
  def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0, 2)) -> None:
130
+ """
131
+ Print a headline inside a styled Rich `Panel` to the console.
132
+
133
+ Args:
134
+ headline (str): The text to display as the panel's headline.
135
+ style (str): The style to apply to the panel (e.g., color, bold, reverse).
136
+ expand (bool, optional): Whether the panel should expand to the full console width. Defaults to `True`.
137
+ padding (tuple, optional): Padding around the panel content (top/bottom, left/right). Defaults to `(0, 2)`.
138
+ """
127
139
  console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
140
+
141
+
142
+ def theme_colors_with_prefix(prefix: str) -> List[Text]:
143
+ """Return a list of (name, style) `Text` objects for all styles in the theme that start with `prefix`."""
144
+ return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
@@ -1,3 +1,4 @@
1
+ """Argument parsing for yaralyzer CLI tool."""
1
2
  import logging
2
3
  import re
3
4
  import sys
@@ -217,9 +218,16 @@ YaralyzerConfig.set_argument_parser(parser)
217
218
 
218
219
  def parse_arguments(args: Optional[Namespace] = None):
219
220
  """
220
- Parse command line args. Most settings can be communicated to the app by setting env vars.
221
- If args are passed neither rules nor a regex need be provided as it is assumed
222
- the constructor will instantiate a Yaralyzer object directly.
221
+ Parse command line args. Most arguments can also be communicated to the app by setting env vars.
222
+ If `args` are passed neither rules nor a regex need be provided as it is assumed
223
+ the constructor will instantiate a `Yaralyzer` object directly.
224
+
225
+ Args:
226
+ args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
227
+ Defaults to `None`.
228
+
229
+ Raises:
230
+ ArgumentError: If args are invalid.
223
231
  """
224
232
  if '--version' in sys.argv:
225
233
  print(f"yaralyzer {version('yaralyzer')}")
@@ -281,6 +289,7 @@ def parse_arguments(args: Optional[Namespace] = None):
281
289
 
282
290
 
283
291
  def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
292
+ """Get the basepath (directory + filename without extension) for exported files."""
284
293
  file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
285
294
  args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
286
295
  args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
yaralyzer/util/logging.py CHANGED
@@ -1,28 +1,34 @@
1
1
  """
2
- There's two possible log sinks other than STDOUT:
2
+ Handle logging for `yaralyzer`.
3
3
 
4
- 1. 'log' - the application log (standard log, what goes to STDOUT with -D option)
4
+ There's two possible log sinks other than `STDOUT`:
5
+
6
+ 1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
5
7
  2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
6
8
 
7
- The regular log file at APPLICATION_LOG_PATH is where the quite verbose application logs
9
+ The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
8
10
  will be written if things ever need to get that formal. For now those logs are only accessible
9
- on STDOUT with the -D flag but the infrastructure for persistent logging exists if someone
11
+ on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
10
12
  needs/wants that sort of thing.
11
13
 
12
- Logs are not normally ephemeral/not written to files but can be configured to do so by setting
13
- the YARALYZER_LOG_DIR env var. See .yaralyzer.example for documentation about the side effects of setting
14
- YARALYZER_LOG_DIR to a value.
14
+ Logs are not normally ephemeral/not written to files but can be configured to do so by setting
15
+ the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
16
+ of setting `YARALYZER_LOG_DIR` to a value.
17
+
18
+ * [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
15
19
 
16
- https://docs.python.org/3/library/logging.html#logging.basicConfig
17
- https://realpython.com/python-logging/
20
+ * [realpython.com/python-logging/](https://realpython.com/python-logging/)
18
21
 
19
22
  Python log levels for reference:
23
+
24
+ ```
20
25
  CRITICAL 50
21
26
  ERROR 40
22
27
  WARNING 30
23
28
  INFO 20
24
29
  DEBUG 10
25
30
  NOTSET 0
31
+ ```
26
32
  """
27
33
  import logging
28
34
  import sys
@@ -37,13 +43,22 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
37
43
 
38
44
 
39
45
  def configure_logger(log_label: str) -> logging.Logger:
40
- """Set up a file or stream logger depending on the configuration"""
46
+ """
47
+ Set up a file or stream `logger` depending on the configuration.
48
+
49
+ Args:
50
+ log_label (str): The label for the `logger`, e.g. "run" or "invocation".
51
+ Actual name will be `"yaralyzer.{log_label}"`.
52
+
53
+ Returns:
54
+ logging.Logger: The configured `logger`.
55
+ """
41
56
  log_name = f"yaralyzer.{log_label}"
42
57
  logger = logging.getLogger(log_name)
43
58
 
44
59
  if YaralyzerConfig.LOG_DIR:
45
60
  if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
46
- raise RuntimeError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
61
+ raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
47
62
 
48
63
  log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
49
64
  log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
@@ -70,14 +85,14 @@ if YaralyzerConfig.LOG_DIR:
70
85
  invocation_log.setLevel('INFO')
71
86
 
72
87
 
73
- def log_and_print(msg: str, log_level='INFO'):
74
- """Both print and log (at INFO level) a string"""
88
+ def log_and_print(msg: str, log_level: str = 'INFO'):
89
+ """Both print (to console) and log (to file) a string."""
75
90
  log.log(logging.getLevelName(log_level), msg)
76
91
  print(msg)
77
92
 
78
93
 
79
94
  def log_current_config():
80
- """Write current state of YaralyzerConfig object to the logs"""
95
+ """Write current state of `YaralyzerConfig` object to the logs."""
81
96
  msg = f"{YaralyzerConfig.__name__} current attributes:\n"
82
97
  config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
83
98
 
@@ -88,14 +103,14 @@ def log_current_config():
88
103
 
89
104
 
90
105
  def log_invocation() -> None:
91
- """Log the command used to launch the yaralyzer to the invocation log"""
106
+ """Log the command used to launch the `yaralyzer` to the invocation log."""
92
107
  msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
93
108
  log.info(msg)
94
109
  invocation_log.info(msg)
95
110
 
96
111
 
97
112
  def log_argparse_result(args, label: str):
98
- """Logs the result of argparse"""
113
+ """Logs the result of `argparse`."""
99
114
  args_dict = vars(args)
100
115
  log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
101
116
 
@@ -1,17 +1,22 @@
1
1
  """
2
- Rich text decorator for YARA match dicts, which look like this:
3
-
4
- {
5
- 'tags': ['foo', 'bar'],
6
- 'matches': True,
7
- 'namespace': 'default',
8
- 'rule': 'my_rule',
9
- 'meta': {},
10
- 'strings': [
11
- StringMatch1,
12
- StringMatch2
13
- ]
14
- }
2
+ Rich text decorator for YARA match dicts.
3
+
4
+ A YARA match is returned as a `dict` with this structure:
5
+
6
+ Example:
7
+ ```
8
+ {
9
+ 'tags': ['foo', 'bar'],
10
+ 'matches': True,
11
+ 'namespace': 'default',
12
+ 'rule': 'my_rule',
13
+ 'meta': {},
14
+ 'strings': [
15
+ StringMatch1,
16
+ StringMatch2
17
+ ]
18
+ }
19
+ ```
15
20
  """
16
21
  import re
17
22
  from numbers import Number
@@ -30,11 +35,12 @@ from yaralyzer.output.rich_console import console_width, theme_colors_with_prefi
30
35
  from yaralyzer.util.logging import log
31
36
 
32
37
  MATCH_PADDING = (0, 0, 0, 1)
33
- URL_REGEX = re.compile('^https?:')
38
+
39
+ DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
34
40
  DIGITS_REGEX = re.compile("^\\d+$")
35
41
  HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
36
- DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
37
42
  MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
43
+ URL_REGEX = re.compile('^https?:')
38
44
 
39
45
  YARA_STRING_STYLES: Dict[re.Pattern, str] = {
40
46
  URL_REGEX: 'yara.url',
@@ -50,14 +56,21 @@ RAW_YARA_THEME_TXT.justify = CENTER
50
56
 
51
57
 
52
58
  class YaraMatch:
59
+ """Rich text decorator for YARA match dicts."""
60
+
53
61
  def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
62
+ """
63
+ Args:
64
+ match (dict): The YARA match dict.
65
+ matched_against_bytes_label (Text): Label indicating what bytes were matched against.
66
+ """
54
67
  self.match = match
55
68
  self.rule_name = match['rule']
56
69
  self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
57
70
  self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
58
71
 
59
72
  def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
60
- """Renders a panel showing the color highlighted raw YARA match info."""
73
+ """Renders a rich `Panel` showing the color highlighted raw YARA match info."""
61
74
  yield Text("\n")
62
75
  yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
63
76
  yield RAW_YARA_THEME_TXT
@@ -65,7 +78,16 @@ class YaraMatch:
65
78
 
66
79
 
67
80
  def _rich_yara_match(element: Any, depth: int = 0) -> Text:
68
- """Painful/hacky way of recursively coloring a yara result hash."""
81
+ """
82
+ Painful/hacky way of recursively coloring a YARA match dict.
83
+
84
+ Args:
85
+ element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
86
+ depth (int): Current recursion depth (used for indentation).
87
+
88
+ Returns:
89
+ Text: The rich `Text` representation of the element.
90
+ """
69
91
  indent = Text((depth + 1) * INDENT_SPACES)
70
92
  end_indent = Text(depth * INDENT_SPACES)
71
93
 
@@ -130,6 +152,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
130
152
 
131
153
 
132
154
  def _yara_string(_string: str) -> Text:
155
+ """Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
133
156
  for regex in YARA_STRING_STYLES.keys():
134
157
  if regex.match(_string):
135
158
  return Text(_string, YARA_STRING_STYLES[regex])
@@ -1,6 +1,9 @@
1
1
  """
2
- Builds bare bones YARA rules to match strings and regex patterns. Example rule string:
2
+ Builds bare bones YARA rules to match strings and regex patterns.
3
3
 
4
+ Example rule string:
5
+
6
+ ```
4
7
  rule Just_A_Piano_Man {
5
8
  meta:
6
9
  author = "Tim"
@@ -9,19 +12,23 @@ rule Just_A_Piano_Man {
9
12
  condition:
10
13
  $hilton_producer
11
14
  }
15
+ ```
12
16
  """
13
17
  import re
14
- from typing import Optional
18
+ from typing import Literal, Optional
15
19
 
16
20
  import yara
17
21
 
18
22
  from yaralyzer.config import YARALYZE
19
23
  from yaralyzer.util.logging import log
20
24
 
25
+ PatternType = Literal['hex', 'regex']
26
+ YaraModifierType = Literal['ascii', 'fullword', 'nocase', 'wide']
27
+
21
28
  HEX = 'hex'
29
+ PATTERN = 'pattern'
22
30
  REGEX = 'regex'
23
31
  RULE = 'rule'
24
- PATTERN = 'pattern'
25
32
  UNDERSCORE = '_'
26
33
  YARA_REGEX_MODIFIERS = ['nocase', 'ascii', 'wide', 'fullword']
27
34
 
@@ -60,12 +67,25 @@ rule {rule_name} {{
60
67
 
61
68
  def yara_rule_string(
62
69
  pattern: str,
63
- pattern_type: str = REGEX,
70
+ pattern_type: PatternType = REGEX,
64
71
  rule_name: str = YARALYZE,
65
72
  pattern_label: Optional[str] = PATTERN,
66
- modifier: Optional[str] = None
73
+ modifier: Optional[YaraModifierType] = None
67
74
  ) -> str:
68
- """Build a YARA rule string for a given pattern"""
75
+ """
76
+ Build a YARA rule string for a given `pattern`.
77
+
78
+ Args:
79
+ pattern (str): The string or regex pattern to match.
80
+ pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
81
+ rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
82
+ pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
83
+ modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
84
+ Only valid if `pattern_type` is `"regex"`.
85
+
86
+ Returns:
87
+ str: The constructed YARA rule as a string.
88
+ """
69
89
  if not (modifier is None or modifier in YARA_REGEX_MODIFIERS):
70
90
  raise TypeError(f"Modifier '{modifier}' is not one of {YARA_REGEX_MODIFIERS}")
71
91
 
@@ -73,6 +93,8 @@ def yara_rule_string(
73
93
  pattern = f"/{pattern}/"
74
94
  elif pattern_type == HEX:
75
95
  pattern = f"{{{pattern}}}"
96
+ else:
97
+ raise ValueError(f"pattern_type must be either '{REGEX}' or '{HEX}'")
76
98
 
77
99
  if modifier:
78
100
  pattern += f" {modifier}"
@@ -81,7 +103,8 @@ def yara_rule_string(
81
103
  rule_name=rule_name,
82
104
  pattern_label=pattern_label,
83
105
  pattern=pattern,
84
- modifier='' if modifier is None else f" {modifier}")
106
+ modifier='' if modifier is None else f" {modifier}"
107
+ )
85
108
 
86
109
  log.debug(f"Built YARA rule: \n{rule}")
87
110
  return rule
@@ -89,18 +112,39 @@ def yara_rule_string(
89
112
 
90
113
  def build_yara_rule(
91
114
  pattern: str,
92
- pattern_type: str = REGEX,
115
+ pattern_type: PatternType = REGEX,
93
116
  rule_name: str = YARALYZE,
94
117
  pattern_label: Optional[str] = PATTERN,
95
- modifier: Optional[str] = None
118
+ modifier: Optional[YaraModifierType] = None
96
119
  ) -> yara.Rule:
97
- """Build a compiled YARA rule"""
120
+ """
121
+ Build a compiled `yara.Rule` object.
122
+
123
+ Args:
124
+ pattern (str): The string or regex pattern to match.
125
+ pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
126
+ rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
127
+ pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
128
+ modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
129
+ Only valid if `pattern_type` is `"regex"`.
130
+
131
+ Returns:
132
+ yara.Rule: Compiled YARA rule object.
133
+ """
98
134
  rule_string = yara_rule_string(pattern, pattern_type, rule_name, pattern_label, modifier)
99
135
  return yara.compile(source=rule_string)
100
136
 
101
137
 
102
138
  def safe_label(_label: str) -> str:
103
- """YARA rule and pattern names can only contain alphanumeric chars"""
139
+ """
140
+ YARA rule and pattern names can only contain alphanumeric chars.
141
+
142
+ Args:
143
+ _label (str): The label to sanitize.
144
+
145
+ Returns:
146
+ str: A sanitized label safe for use in YARA rules.
147
+ """
104
148
  label = _label
105
149
 
106
150
  for char, replacement in SAFE_LABEL_REPLACEMENTS.items():