yaralyzer 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

@@ -218,9 +218,16 @@ YaralyzerConfig.set_argument_parser(parser)
218
218
 
219
219
  def parse_arguments(args: Optional[Namespace] = None):
220
220
  """
221
- Parse command line args. Most settings can be communicated to the app by setting env vars.
222
- If args are passed neither rules nor a regex need be provided as it is assumed
223
- the constructor will instantiate a Yaralyzer object directly.
221
+ Parse command line args. Most arguments can also be communicated to the app by setting env vars.
222
+ If `args` are passed neither rules nor a regex need be provided as it is assumed
223
+ the constructor will instantiate a `Yaralyzer` object directly.
224
+
225
+ Args:
226
+ args (Optional[Namespace], optional): If provided, use these args instead of parsing from command line.
227
+ Defaults to `None`.
228
+
229
+ Raises:
230
+ ArgumentError: If args are invalid.
224
231
  """
225
232
  if '--version' in sys.argv:
226
233
  print(f"yaralyzer {version('yaralyzer')}")
@@ -282,6 +289,7 @@ def parse_arguments(args: Optional[Namespace] = None):
282
289
 
283
290
 
284
291
  def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
292
+ """Get the basepath (directory + filename without extension) for exported files."""
285
293
  file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
286
294
  args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
287
295
  args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
yaralyzer/util/logging.py CHANGED
@@ -1,28 +1,34 @@
1
1
  """
2
- There's two possible log sinks other than STDOUT:
2
+ Handle logging for `yaralyzer`.
3
3
 
4
- 1. 'log' - the application log (standard log, what goes to STDOUT with -D option)
4
+ There's two possible log sinks other than `STDOUT`:
5
+
6
+ 1. 'log' - the application log (standard log, what goes to `STDOUT` with `-D` option)
5
7
  2. 'invocation_log' - tracks the exact command yaralyzer was invoked with, similar to a history file
6
8
 
7
- The regular log file at APPLICATION_LOG_PATH is where the quite verbose application logs
9
+ The regular log file at `APPLICATION_LOG_PATH` is where the quite verbose application logs
8
10
  will be written if things ever need to get that formal. For now those logs are only accessible
9
- on STDOUT with the -D flag but the infrastructure for persistent logging exists if someone
11
+ on `STDOUT` with the `-D` flag but the infrastructure for persistent logging exists if someone
10
12
  needs/wants that sort of thing.
11
13
 
12
- Logs are not normally ephemeral/not written to files but can be configured to do so by setting
13
- the YARALYZER_LOG_DIR env var. See .yaralyzer.example for documentation about the side effects of setting
14
- YARALYZER_LOG_DIR to a value.
14
+ Logs are not normally ephemeral/not written to files but can be configured to do so by setting
15
+ the `YARALYZER_LOG_DIR` env var. See `.yaralyzer.example` for documentation about the side effects
16
+ of setting `YARALYZER_LOG_DIR` to a value.
17
+
18
+ * [logging.basicConfig](https://docs.python.org/3/library/logging.html#logging.basicConfig)
15
19
 
16
- https://docs.python.org/3/library/logging.html#logging.basicConfig
17
- https://realpython.com/python-logging/
20
+ * [realpython.com/python-logging/](https://realpython.com/python-logging/)
18
21
 
19
22
  Python log levels for reference:
23
+
24
+ ```
20
25
  CRITICAL 50
21
26
  ERROR 40
22
27
  WARNING 30
23
28
  INFO 20
24
29
  DEBUG 10
25
30
  NOTSET 0
31
+ ```
26
32
  """
27
33
  import logging
28
34
  import sys
@@ -37,13 +43,22 @@ ARGPARSE_LOG_FORMAT = '{0: >30} {1: <17} {2: <}\n'
37
43
 
38
44
 
39
45
  def configure_logger(log_label: str) -> logging.Logger:
40
- """Set up a file or stream logger depending on the configuration."""
46
+ """
47
+ Set up a file or stream `logger` depending on the configuration.
48
+
49
+ Args:
50
+ log_label (str): The label for the `logger`, e.g. "run" or "invocation".
51
+ Actual name will be `"yaralyzer.{log_label}"`.
52
+
53
+ Returns:
54
+ logging.Logger: The configured `logger`.
55
+ """
41
56
  log_name = f"yaralyzer.{log_label}"
42
57
  logger = logging.getLogger(log_name)
43
58
 
44
59
  if YaralyzerConfig.LOG_DIR:
45
60
  if not path.isdir(YaralyzerConfig.LOG_DIR) or not path.isabs(YaralyzerConfig.LOG_DIR):
46
- raise RuntimeError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
61
+ raise FileNotFoundError(f"Log dir '{YaralyzerConfig.LOG_DIR}' doesn't exist or is not absolute")
47
62
 
48
63
  log_file_path = path.join(YaralyzerConfig.LOG_DIR, f"{log_name}.log")
49
64
  log_formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
@@ -70,14 +85,14 @@ if YaralyzerConfig.LOG_DIR:
70
85
  invocation_log.setLevel('INFO')
71
86
 
72
87
 
73
- def log_and_print(msg: str, log_level='INFO'):
74
- """Both print and log (at INFO level) a string."""
88
+ def log_and_print(msg: str, log_level: str = 'INFO'):
89
+ """Both print (to console) and log (to file) a string."""
75
90
  log.log(logging.getLevelName(log_level), msg)
76
91
  print(msg)
77
92
 
78
93
 
79
94
  def log_current_config():
80
- """Write current state of YaralyzerConfig object to the logs."""
95
+ """Write current state of `YaralyzerConfig` object to the logs."""
81
96
  msg = f"{YaralyzerConfig.__name__} current attributes:\n"
82
97
  config_dict = {k: v for k, v in vars(YaralyzerConfig).items() if not k.startswith('__')}
83
98
 
@@ -88,14 +103,14 @@ def log_current_config():
88
103
 
89
104
 
90
105
  def log_invocation() -> None:
91
- """Log the command used to launch the yaralyzer to the invocation log."""
106
+ """Log the command used to launch the `yaralyzer` to the invocation log."""
92
107
  msg = f"THE INVOCATION: '{' '.join(sys.argv)}'"
93
108
  log.info(msg)
94
109
  invocation_log.info(msg)
95
110
 
96
111
 
97
112
  def log_argparse_result(args, label: str):
98
- """Logs the result of argparse."""
113
+ """Logs the result of `argparse`."""
99
114
  args_dict = vars(args)
100
115
  log_msg = f'{label} argparse results:\n' + ARGPARSE_LOG_FORMAT.format('OPTION', 'TYPE', 'VALUE')
101
116
 
@@ -1,17 +1,22 @@
1
1
  """
2
- Rich text decorator for YARA match dicts, which look like this:
3
-
4
- {
5
- 'tags': ['foo', 'bar'],
6
- 'matches': True,
7
- 'namespace': 'default',
8
- 'rule': 'my_rule',
9
- 'meta': {},
10
- 'strings': [
11
- StringMatch1,
12
- StringMatch2
13
- ]
14
- }
2
+ Rich text decorator for YARA match dicts.
3
+
4
+ A YARA match is returned as a `dict` with this structure:
5
+
6
+ Example:
7
+ ```
8
+ {
9
+ 'tags': ['foo', 'bar'],
10
+ 'matches': True,
11
+ 'namespace': 'default',
12
+ 'rule': 'my_rule',
13
+ 'meta': {},
14
+ 'strings': [
15
+ StringMatch1,
16
+ StringMatch2
17
+ ]
18
+ }
19
+ ```
15
20
  """
16
21
  import re
17
22
  from numbers import Number
@@ -30,11 +35,12 @@ from yaralyzer.output.rich_console import console_width, theme_colors_with_prefi
30
35
  from yaralyzer.util.logging import log
31
36
 
32
37
  MATCH_PADDING = (0, 0, 0, 1)
33
- URL_REGEX = re.compile('^https?:')
38
+
39
+ DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
34
40
  DIGITS_REGEX = re.compile("^\\d+$")
35
41
  HEX_REGEX = re.compile('^[0-9A-Fa-f]+$')
36
- DATE_REGEX = re.compile('\\d{4}-\\d{2}-\\d{2}')
37
42
  MATCHER_VAR_REGEX = re.compile('\\$[a-z_]+')
43
+ URL_REGEX = re.compile('^https?:')
38
44
 
39
45
  YARA_STRING_STYLES: Dict[re.Pattern, str] = {
40
46
  URL_REGEX: 'yara.url',
@@ -50,14 +56,21 @@ RAW_YARA_THEME_TXT.justify = CENTER
50
56
 
51
57
 
52
58
  class YaraMatch:
59
+ """Rich text decorator for YARA match dicts."""
60
+
53
61
  def __init__(self, match: dict, matched_against_bytes_label: Text) -> None:
62
+ """
63
+ Args:
64
+ match (dict): The YARA match dict.
65
+ matched_against_bytes_label (Text): Label indicating what bytes were matched against.
66
+ """
54
67
  self.match = match
55
68
  self.rule_name = match['rule']
56
69
  self.label = matched_against_bytes_label.copy().append(f" matched rule: '", style='matched_rule')
57
70
  self.label.append(self.rule_name, style='on bright_red bold').append("'!", style='siren')
58
71
 
59
72
  def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
60
- """Renders a panel showing the color highlighted raw YARA match info."""
73
+ """Renders a rich `Panel` showing the color highlighted raw YARA match info."""
61
74
  yield Text("\n")
62
75
  yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
63
76
  yield RAW_YARA_THEME_TXT
@@ -65,7 +78,16 @@ class YaraMatch:
65
78
 
66
79
 
67
80
  def _rich_yara_match(element: Any, depth: int = 0) -> Text:
68
- """Painful/hacky way of recursively coloring a yara result hash."""
81
+ """
82
+ Painful/hacky way of recursively coloring a YARA match dict.
83
+
84
+ Args:
85
+ element (Any): The element to render (can be `dict`, `list`, `str`, `bytes`, `int`, `bool`).
86
+ depth (int): Current recursion depth (used for indentation).
87
+
88
+ Returns:
89
+ Text: The rich `Text` representation of the element.
90
+ """
69
91
  indent = Text((depth + 1) * INDENT_SPACES)
70
92
  end_indent = Text(depth * INDENT_SPACES)
71
93
 
@@ -130,6 +152,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
130
152
 
131
153
 
132
154
  def _yara_string(_string: str) -> Text:
155
+ """Apply special styles to certain types of yara strings (e.g. URLs, numbers, hex, dates, matcher vars)."""
133
156
  for regex in YARA_STRING_STYLES.keys():
134
157
  if regex.match(_string):
135
158
  return Text(_string, YARA_STRING_STYLES[regex])
@@ -1,6 +1,9 @@
1
1
  """
2
- Builds bare bones YARA rules to match strings and regex patterns. Example rule string:
2
+ Builds bare bones YARA rules to match strings and regex patterns.
3
3
 
4
+ Example rule string:
5
+
6
+ ```
4
7
  rule Just_A_Piano_Man {
5
8
  meta:
6
9
  author = "Tim"
@@ -9,19 +12,23 @@ rule Just_A_Piano_Man {
9
12
  condition:
10
13
  $hilton_producer
11
14
  }
15
+ ```
12
16
  """
13
17
  import re
14
- from typing import Optional
18
+ from typing import Literal, Optional
15
19
 
16
20
  import yara
17
21
 
18
22
  from yaralyzer.config import YARALYZE
19
23
  from yaralyzer.util.logging import log
20
24
 
25
+ PatternType = Literal['hex', 'regex']
26
+ YaraModifierType = Literal['ascii', 'fullword', 'nocase', 'wide']
27
+
21
28
  HEX = 'hex'
29
+ PATTERN = 'pattern'
22
30
  REGEX = 'regex'
23
31
  RULE = 'rule'
24
- PATTERN = 'pattern'
25
32
  UNDERSCORE = '_'
26
33
  YARA_REGEX_MODIFIERS = ['nocase', 'ascii', 'wide', 'fullword']
27
34
 
@@ -60,12 +67,25 @@ rule {rule_name} {{
60
67
 
61
68
  def yara_rule_string(
62
69
  pattern: str,
63
- pattern_type: str = REGEX,
70
+ pattern_type: PatternType = REGEX,
64
71
  rule_name: str = YARALYZE,
65
72
  pattern_label: Optional[str] = PATTERN,
66
- modifier: Optional[str] = None
73
+ modifier: Optional[YaraModifierType] = None
67
74
  ) -> str:
68
- """Build a YARA rule string for a given pattern"""
75
+ """
76
+ Build a YARA rule string for a given `pattern`.
77
+
78
+ Args:
79
+ pattern (str): The string or regex pattern to match.
80
+ pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
81
+ rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
82
+ pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
83
+ modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
84
+ Only valid if `pattern_type` is `"regex"`.
85
+
86
+ Returns:
87
+ str: The constructed YARA rule as a string.
88
+ """
69
89
  if not (modifier is None or modifier in YARA_REGEX_MODIFIERS):
70
90
  raise TypeError(f"Modifier '{modifier}' is not one of {YARA_REGEX_MODIFIERS}")
71
91
 
@@ -73,6 +93,8 @@ def yara_rule_string(
73
93
  pattern = f"/{pattern}/"
74
94
  elif pattern_type == HEX:
75
95
  pattern = f"{{{pattern}}}"
96
+ else:
97
+ raise ValueError(f"pattern_type must be either '{REGEX}' or '{HEX}'")
76
98
 
77
99
  if modifier:
78
100
  pattern += f" {modifier}"
@@ -81,7 +103,8 @@ def yara_rule_string(
81
103
  rule_name=rule_name,
82
104
  pattern_label=pattern_label,
83
105
  pattern=pattern,
84
- modifier='' if modifier is None else f" {modifier}")
106
+ modifier='' if modifier is None else f" {modifier}"
107
+ )
85
108
 
86
109
  log.debug(f"Built YARA rule: \n{rule}")
87
110
  return rule
@@ -89,18 +112,39 @@ def yara_rule_string(
89
112
 
90
113
  def build_yara_rule(
91
114
  pattern: str,
92
- pattern_type: str = REGEX,
115
+ pattern_type: PatternType = REGEX,
93
116
  rule_name: str = YARALYZE,
94
117
  pattern_label: Optional[str] = PATTERN,
95
- modifier: Optional[str] = None
118
+ modifier: Optional[YaraModifierType] = None
96
119
  ) -> yara.Rule:
97
- """Build a compiled YARA rule"""
120
+ """
121
+ Build a compiled `yara.Rule` object.
122
+
123
+ Args:
124
+ pattern (str): The string or regex pattern to match.
125
+ pattern_type (str): Either `"regex"` or `"hex"`. Default is `"regex"`.
126
+ rule_name (str): The name of the YARA rule. Default is `"YARALYZE"`.
127
+ pattern_label (Optional[str]): The label for the pattern in the YARA rule. Default is `"pattern"`.
128
+ modifier (Optional[str]): Optional regex modifier (e.g. 'nocase', 'ascii', 'wide', 'fullword').
129
+ Only valid if `pattern_type` is `"regex"`.
130
+
131
+ Returns:
132
+ yara.Rule: Compiled YARA rule object.
133
+ """
98
134
  rule_string = yara_rule_string(pattern, pattern_type, rule_name, pattern_label, modifier)
99
135
  return yara.compile(source=rule_string)
100
136
 
101
137
 
102
138
  def safe_label(_label: str) -> str:
103
- """YARA rule and pattern names can only contain alphanumeric chars"""
139
+ """
140
+ YARA rule and pattern names can only contain alphanumeric chars.
141
+
142
+ Args:
143
+ _label (str): The label to sanitize.
144
+
145
+ Returns:
146
+ str: A sanitized label safe for use in YARA rules.
147
+ """
104
148
  label = _label
105
149
 
106
150
  for char, replacement in SAFE_LABEL_REPLACEMENTS.items():
yaralyzer/yaralyzer.py CHANGED
@@ -1,10 +1,11 @@
1
1
  """Main Yaralyzer class and alternate constructors."""
2
2
  from os import path
3
- from typing import Iterator, List, Optional, Tuple, Union
3
+ from typing import Callable, Iterator, List, Optional, Tuple, Union
4
4
 
5
5
  import yara
6
6
  from rich.console import Console, ConsoleOptions, RenderResult
7
7
  from rich.padding import Padding
8
+ from rich.style import Style
8
9
  from rich.text import Text
9
10
 
10
11
  from yaralyzer.bytes_match import BytesMatch
@@ -26,7 +27,7 @@ YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't
26
27
  # TODO: might be worth introducing a Scannable namedtuple or similar
27
28
  class Yaralyzer:
28
29
  """
29
- Central class that handles setting up / compiling rules and reading binary data from files as needed.
30
+ Central class that handles setting up / compiling YARA rules and reading binary data from files as needed.
30
31
 
31
32
  Alternate constructors are provided depending on whether:
32
33
 
@@ -38,7 +39,18 @@ class Yaralyzer:
38
39
 
39
40
  * YARA rules should be read from a directory of .yara files
40
41
 
41
- The real action happens in the __rich__console__() dunder method.
42
+ The real action happens in the `__rich__console__()` dunder method.
43
+
44
+ Attributes:
45
+ bytes (bytes): The binary data to scan.
46
+ bytes_length (int): The length of the binary data.
47
+ scannable_label (str): A label for the binary data, typically the filename or a user-provided label.
48
+ rules (yara.Rules): The compiled YARA rules to use for scanning.
49
+ rules_label (str): A label for the ruleset, typically derived from filenames or user input.
50
+ highlight_style (str): The style to use for highlighting matches in the output.
51
+ non_matches (List[dict]): A list of YARA rules that did not match the binary data.
52
+ matches (List[YaraMatch]): A list of YaraMatch objects representing the matches found.
53
+ extraction_stats (RegexMatchMetrics): Metrics related to decoding attempts on matched data
42
54
  """
43
55
 
44
56
  def __init__(
@@ -50,17 +62,22 @@ class Yaralyzer:
50
62
  highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
51
63
  ) -> None:
52
64
  """
53
- Initialize a Yaralyzer instance for scanning binary data with YARA rules.
65
+ Initialize a `Yaralyzer` instance for scanning binary data with YARA rules.
54
66
 
55
67
  Args:
56
- rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string (YARA rule source) or a pre-compiled yara.Rules object. If a string is provided, it will be compiled.
68
+ rules (Union[str, yara.Rules]): YARA rules to use for scanning. Can be a string or a pre-compiled
69
+ `yara.Rules` object (strings will be compiled to an instance of `yara.Rules`).
57
70
  rules_label (str): Label to identify the ruleset in output and logs.
58
- scannable (Union[bytes, str]): The data to scan. If bytes, raw data is scanned; if str, it is treated as a file path to load bytes from.
59
- scannable_label (Optional[str], optional): Label for the scannable data. Required if scannable is bytes. If scannable is a file path, defaults to the file's basename.
60
- highlight_style (str, optional): Style to use for highlighting matches in output. Defaults to YaralyzerConfig.HIGHLIGHT_STYLE.
71
+ scannable (Union[bytes, str]): The data to scan. If it's `bytes` type then that data is scanned;
72
+ if it's a string it is treated as a file path to load bytes from.
73
+ scannable_label (Optional[str], optional): Label for the `scannable` arg data.
74
+ Required if `scannable` is `bytes`.
75
+ If `scannable` is a file path `scannable_label` will default to the file's basename.
76
+ highlight_style (str, optional): Style to use for highlighting matches in output.
77
+ Defaults to `YaralyzerConfig.HIGHLIGHT_STYLE`.
61
78
 
62
79
  Raises:
63
- TypeError: If scannable is bytes and scannable_label is not provided.
80
+ TypeError: If `scannable` is `bytes` and `scannable_label` is not provided.
64
81
  """
65
82
  if 'args' not in vars(YaralyzerConfig):
66
83
  YaralyzerConfig.set_default_args()
@@ -72,7 +89,7 @@ class Yaralyzer:
72
89
 
73
90
  if isinstance(scannable, bytes):
74
91
  if scannable_label is None:
75
- raise TypeError("Must provide scannable_label arg when yaralyzing raw bytes")
92
+ raise TypeError("Must provide 'scannable_label' arg when yaralyzing raw bytes")
76
93
 
77
94
  self.bytes: bytes = scannable
78
95
  self.scannable_label: str = scannable_label
@@ -101,13 +118,26 @@ class Yaralyzer:
101
118
  scannable: Union[bytes, str],
102
119
  scannable_label: Optional[str] = None
103
120
  ) -> 'Yaralyzer':
104
- """Alternate constructor to load yara rules from files and label rules with the filenames."""
121
+ """
122
+ Alternate constructor to load YARA rules from files and label rules with the filenames.
123
+
124
+ Args:
125
+ yara_rules_files (List[str]): List of file paths to YARA rules files.
126
+ scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
127
+ if `str`, it is treated as a file path to load bytes from.
128
+ scannable_label (Optional[str], optional): Label for the `scannable` data.
129
+ Required if `scannable` is `bytes`. If scannable is a file path, defaults to the file's basename.
130
+
131
+ Raises:
132
+ TypeError: If `yara_rules_files` is not a list.
133
+ FileNotFoundError: If any file in `yara_rules_files` does not exist.
134
+ """
105
135
  if not isinstance(yara_rules_files, list):
106
136
  raise TypeError(f"{yara_rules_files} is not a list")
107
137
 
108
138
  for file in yara_rules_files:
109
139
  if not path.exists(file):
110
- raise ValueError(f"'{file}' {YARA_FILE_DOES_NOT_EXIST_ERROR_MSG}")
140
+ raise FileNotFoundError(f"'{file}' {YARA_FILE_DOES_NOT_EXIST_ERROR_MSG}")
111
141
 
112
142
  filepaths_arg = {path.basename(file): file for file in yara_rules_files}
113
143
 
@@ -126,9 +156,21 @@ class Yaralyzer:
126
156
  scannable: Union[bytes, str],
127
157
  scannable_label: Optional[str] = None
128
158
  ) -> 'Yaralyzer':
129
- """Alternate constructor that will load all .yara files in yara_rules_dir."""
159
+ """
160
+ Alternate constructor that will load all `.yara` files in `yara_rules_dir`.
161
+
162
+ Args:
163
+ dirs (List[str]): List of directories to search for `.yara` files.
164
+ scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
165
+ if `str`, it is treated as a file path to load bytes from.
166
+ scannable_label (Optional[str], optional): Label for the `scannable` data.
167
+ Required if `scannable` is `bytes`. If scannable is a file path, defaults to the file's basename.
168
+
169
+ Raises:
170
+ FileNotFoundError: If `dirs` is not a list of valid directories.
171
+ """
130
172
  if not (isinstance(dirs, list) and all(path.isdir(dir) for dir in dirs)):
131
- raise TypeError(f"'{dirs}' is not a list of valid directories")
173
+ raise FileNotFoundError(f"'{dirs}' is not a list of valid directories")
132
174
 
133
175
  rules_files = [path.join(dir, f) for dir in dirs for f in files_in_dir(dir)]
134
176
  return cls.for_rules_files(rules_files, scannable, scannable_label)
@@ -144,7 +186,22 @@ class Yaralyzer:
144
186
  pattern_label: Optional[str] = None,
145
187
  regex_modifier: Optional[str] = None,
146
188
  ) -> 'Yaralyzer':
147
- """Constructor taking regex pattern strings. Rules label defaults to patterns joined by comma."""
189
+ """
190
+ Alternate constructor taking regex pattern strings. Rules label defaults to the patterns joined by comma.
191
+
192
+ Args:
193
+ patterns (List[str]): List of regex or hex patterns to build rules from.
194
+ patterns_type (str): Either `"regex"` or `"hex"` to indicate the type of patterns provided.
195
+ scannable (Union[bytes, str]): The data to scan. If `bytes`, raw data is scanned;
196
+ if `str`, it is treated as a file path to load bytes from.
197
+ scannable_label (Optional[str], optional): Label for the `scannable` data.
198
+ Required if `scannable` is `bytes`.
199
+ If scannable is a file path, defaults to the file's basename.
200
+ rules_label (Optional[str], optional): Label for the ruleset. Defaults to the patterns joined by comma.
201
+ pattern_label (Optional[str], optional): Label for each pattern in the YARA rules. Defaults to "pattern".
202
+ regex_modifier (Optional[str], optional): Optional regex modifier (e.g. "nocase", "ascii", "wide", etc).
203
+ Only valid if `patterns_type` is `"regex"`.
204
+ """
148
205
  rule_strings = []
149
206
 
150
207
  for i, pattern in enumerate(patterns):
@@ -167,7 +224,12 @@ class Yaralyzer:
167
224
  console.print(self)
168
225
 
169
226
  def match_iterator(self) -> Iterator[Tuple[BytesMatch, BytesDecoder]]:
170
- """Iterator version of yaralyze. Yields match and decode data tuple back to caller."""
227
+ """
228
+ Iterator version of `yaralyze()`.
229
+
230
+ Yields:
231
+ Tuple[BytesMatch, BytesDecoder]: Match and decode data tuple.
232
+ """
171
233
  self.rules.match(data=self.bytes, callback=self._yara_callback)
172
234
 
173
235
  for yara_match in self.matches:
@@ -181,8 +243,16 @@ class Yaralyzer:
181
243
 
182
244
  self._print_non_matches()
183
245
 
184
- def _yara_callback(self, data: dict):
185
- """YARA callback to handle matches and non-matches as they are discovered."""
246
+ def _yara_callback(self, data: dict) -> Callable:
247
+ """
248
+ Callback invoked by `yara-python` to handle matches and non-matches as they are discovered.
249
+
250
+ Args:
251
+ data (dict): Data provided when `yara-python` invokes the callback.
252
+
253
+ Returns:
254
+ Callable: Always returns `yara.CALLBACK_CONTINUE` to signal `yara-python` should continue processing.
255
+ """
186
256
  if data['matches']:
187
257
  self.matches.append(YaraMatch(data, self._panel_text()))
188
258
  else:
@@ -212,11 +282,11 @@ class Yaralyzer:
212
282
  styles = [reverse_color(YARALYZER_THEME.styles[f"yara.{s}"]) for s in ('scanned', 'rules')]
213
283
  return self.__text__(*styles)
214
284
 
215
- def _filename_string(self):
285
+ def _filename_string(self) -> str:
216
286
  """The string to use when exporting this yaralyzer to SVG/HTML/etc."""
217
287
  return str(self).replace('>', '').replace('<', '').replace(' ', '_')
218
288
 
219
- def __text__(self, byte_style: str = 'yara.scanned', rule_style: str = 'yara.rules') -> Text:
289
+ def __text__(self, byte_style: Style | str = 'yara.scanned', rule_style: Style | str = 'yara.rules') -> Text:
220
290
  """Text representation of this YARA scan (__text__() was taken)."""
221
291
  txt = Text('').append(self.scannable_label, style=byte_style or 'yara.scanned')
222
292
  return txt.append(' scanned with <').append(self.rules_label, style=rule_style or 'yara.rules').append('>')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: yaralyzer
3
- Version: 1.0.8
3
+ Version: 1.0.9
4
4
  Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
6
6
  License: GPL-3.0-or-later
@@ -78,7 +78,7 @@ YARA just tells you the byte position and the matched string but it can't tell y
78
78
 
79
79
  Enter **The Yaralyzer**, which lets you quickly scan the regions around matches while also showing you what those regions would look like if they were forced into various character encodings.
80
80
 
81
- It's important to note that **The Yaralyzer** isn't a full on malware reversing tool. It can't do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn't try to. It's more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).
81
+ **The Yaralyzer** isn't a malware reversing tool. It can't do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn't try to. It's more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).
82
82
 
83
83
  # Installation
84
84
  Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a marginally better solution as it guarantees any packages installed with it will be isolated from the rest of your local python environment. Of course if you don't really have a local python environment this is a moot point and you can feel free to install with `pip`/`pip3`.
@@ -86,6 +86,7 @@ Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a ma
86
86
  pipx install yaralyzer
87
87
  ```
88
88
 
89
+
89
90
  # Usage
90
91
  Run `yaralyze -h` to see the command line options (screenshot below).
91
92
 
@@ -99,7 +100,7 @@ If you place a file called `.yaralyzer` in your home directory or the current wo
99
100
  Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
100
101
 
101
102
  ### As A Library
102
- [`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. It has a variety of constructors supporting:
103
+ [`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. Auto generated documentation for `Yaralyzer`'s various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/). It has a variety of [alternate constructors](https://michelcrypt4d4mus.github.io/yaralyzer/api/yaralyzer/) supporting:
103
104
 
104
105
  1. Precompiled YARA rules
105
106
  1. Creating a YARA rule from a string
@@ -108,7 +109,7 @@ Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyze
108
109
  1. Scanning `bytes`
109
110
  1. Scanning a file
110
111
 
111
- Should you want to iterate over the `BytesMatch` (like a `re.Match` object for a YARA match) and `BytesDecoder` (tracks decoding attempt stats) objects returned by The Yaralyzer, you can do so like this:
112
+ Should you want to iterate over the [`BytesMatch`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_match/) (like a `re.Match` object for a YARA match) and [`BytesDecoder`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_decoder/) (tracks decoding attempt stats) objects used by The Yaralyzer, you can do so like this:
112
113
 
113
114
  ```python
114
115
  from yaralyzer.yaralyzer import Yaralyzer
@@ -119,8 +120,6 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
119
120
  do_stuff()
120
121
  ```
121
122
 
122
- #### API Documentation
123
- Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
124
123
 
125
124
  # Example Output
126
125
  The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
@@ -0,0 +1,32 @@
1
+ .yaralyzer.example,sha256=z3_mk41xxm0Pr_8MGM7AKQG0xEFRtGcyJLboMuelRp4,3504
2
+ CHANGELOG.md,sha256=lepFLLmnoHWaac4ae49WqSbpqXXxge2S2mDvE2qbixE,3408
3
+ LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
4
+ yaralyzer/__init__.py,sha256=FHfzll5jfldsqx3pXVBPu9xwDqFKjEVfTL7dha9BYX8,2793
5
+ yaralyzer/bytes_match.py,sha256=ROMv9gK0R1bDP5IpheNyxQ44_oEJPkHn_XYwkoIYKdQ,10901
6
+ yaralyzer/config.py,sha256=uVT8Jjw6kViH_PvBQ0etaH3JXPWOIXgiaoAv3ompnJA,4558
7
+ yaralyzer/decoding/bytes_decoder.py,sha256=8uKmqXEchjhTFULrcIKk699bfbBJrwvr9A8GlzCq0Z0,10200
8
+ yaralyzer/decoding/decoding_attempt.py,sha256=gUroTUSgWrgD-EZH8t5vsdDk0DSPqHMt0ow947sSFok,10290
9
+ yaralyzer/encoding_detection/character_encodings.py,sha256=_b3Vk5abAcKVDZ7QQyrAMQODAgMjG54AjqxdSGSdaj0,5637
10
+ yaralyzer/encoding_detection/encoding_assessment.py,sha256=q7wa2rls5nXEioX9UqzaNk4TxdW5WKzXjQik9e9AHs4,3262
11
+ yaralyzer/encoding_detection/encoding_detector.py,sha256=9zV1ZA6D3z9t6-Bz2IhcmqufJ_7zGJ0Rzh2gn0fmaO8,6487
12
+ yaralyzer/helpers/bytes_helper.py,sha256=7l0EycirLsPl--BakAEH-P7ruAgGgu75zYEfiw0OwO4,10212
13
+ yaralyzer/helpers/dict_helper.py,sha256=rhyu-xlpl4yevXdLZUIgVwap0b57O9I3DNAEv8MfTlI,186
14
+ yaralyzer/helpers/file_helper.py,sha256=tjiwCr8EMFHHmX4R13J4Sba5xv0IWXhEGyWUvGvCSa8,1588
15
+ yaralyzer/helpers/list_helper.py,sha256=zX6VzJDbnyxuwQpth5Mc7k7yeJytqWPzpo1v5nXCMtE,394
16
+ yaralyzer/helpers/rich_text_helper.py,sha256=7h3MOORdfZ8vrfUJ5sei4GOMxyfTonxmzii_VhrJZ6U,4383
17
+ yaralyzer/helpers/string_helper.py,sha256=8XsvYlKn-fGhKihfJBOG6mqi5nV_8LM-IWgHzvkRgCc,933
18
+ yaralyzer/output/decoding_attempts_table.py,sha256=wQ3cyN9czZkC3cbwjgflSu0t4wDKGDIs5NPOE6UwBLk,5004
19
+ yaralyzer/output/file_export.py,sha256=iTlCYErquuy6tqBZ1_BQHxBk-6jZ2ihTnGe83HEI_5o,3300
20
+ yaralyzer/output/file_hashes_table.py,sha256=pKbIc1bHJIIorqk9R2gz3IhTxKJpYU1TioGgceyoxiI,2615
21
+ yaralyzer/output/regex_match_metrics.py,sha256=ZQjzePPXpq_g99KNQjHrRQ1N6u_OUxD32uf9xnqcOw8,4341
22
+ yaralyzer/output/rich_console.py,sha256=mQEK0hq2qyCzqebzNDmNTqG2O8pqwBKs_UFIC0DEvxM,5124
23
+ yaralyzer/util/argument_parser.py,sha256=ZOsBf5xkAWHFSWPbZt7_OdkYHIY3RIjtK1QIXOj2g6U,13281
24
+ yaralyzer/util/logging.py,sha256=aBvpNukwZTGOgzm_zpwWzTWFrptThk-g2cqi8D4Fkmo,4616
25
+ yaralyzer/yara/yara_match.py,sha256=BwWbVgYYCybT9TMhWgkT5vA54C9XJ7fAmGf6JKncjhA,5845
26
+ yaralyzer/yara/yara_rule_builder.py,sha256=PeuhPtO4FvXJoTegQr0NXwGpX7wxPfGzAO1tMozaZd8,4535
27
+ yaralyzer/yaralyzer.py,sha256=CLczlTW2ppyoChkPIGvQWwAo-5F0LG_rMEJpCy4cucg,13813
28
+ yaralyzer-1.0.9.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
29
+ yaralyzer-1.0.9.dist-info/METADATA,sha256=sN9ZZxRsjj79m5miQ535kers7OVuSYLcvB6Uuu8COqY,11255
30
+ yaralyzer-1.0.9.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
31
+ yaralyzer-1.0.9.dist-info/entry_points.txt,sha256=7LnLJrNTfql0vuctjRWwp_ZD-BYvtv9ENVipdjuT7XI,136
32
+ yaralyzer-1.0.9.dist-info/RECORD,,