yaralyzer 0.9.3__tar.gz → 0.9.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yaralyzer might be problematic. Click here for more details.

Files changed (29) hide show
  1. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/.yaralyzer.example +5 -7
  2. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/CHANGELOG.md +7 -0
  3. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/PKG-INFO +7 -6
  4. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/README.md +5 -4
  5. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/pyproject.toml +2 -2
  6. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/bytes_match.py +24 -10
  7. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/config.py +5 -6
  8. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/encoding_detection/encoding_detector.py +2 -1
  9. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/helpers/bytes_helper.py +3 -2
  10. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/helpers/file_helper.py +4 -1
  11. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/helpers/string_helper.py +6 -0
  12. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/output/file_hashes_table.py +1 -1
  13. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/output/rich_console.py +17 -0
  14. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/util/argument_parser.py +2 -2
  15. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/yara/yara_match.py +21 -7
  16. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/yaralyzer.py +11 -5
  17. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/LICENSE +0 -0
  18. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/__init__.py +0 -0
  19. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/decoding/bytes_decoder.py +0 -0
  20. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/decoding/decoding_attempt.py +0 -0
  21. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/encoding_detection/character_encodings.py +0 -0
  22. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/encoding_detection/encoding_assessment.py +0 -0
  23. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/helpers/dict_helper.py +0 -0
  24. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/helpers/rich_text_helper.py +0 -0
  25. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/output/decoding_attempts_table.py +0 -0
  26. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/output/file_export.py +1 -1
  27. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/output/regex_match_metrics.py +0 -0
  28. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/util/logging.py +0 -0
  29. {yaralyzer-0.9.3 → yaralyzer-0.9.5}/yaralyzer/yara/yara_rule_builder.py +0 -0
@@ -9,10 +9,11 @@
9
9
  # YARALYZER_MAXIMIZE_WIDTH=True
10
10
 
11
11
 
12
+
12
13
  # Expand the width of the output to the fit the display window (same as the --maximize-width options)
13
14
  # YARALYZER_MAXIMIZE_WIDTH=True
14
15
 
15
- # Passed through to yara.set_config as the stack_size and
16
+ # yara-python internal options passed through to yara.set_config() as the stack_size and max_match_data arguments
16
17
  # YARALYZER_STACK_SIZE=10485760
17
18
  # YARALYZER_MAX_MATCH_LENGTH=10737418240
18
19
 
@@ -27,7 +28,7 @@
27
28
  # YARALYZER_MIN_CHARDET_CONFIDENCE=2.0
28
29
 
29
30
  # Configure how many bytes before and after any binary data should be included in scans and visualizations
30
- # PDFAlYZER_SURROUNDING_BYTES=64
31
+ # YARALYZER_SURROUNDING_BYTES=64
31
32
 
32
33
  # Size thresholds (in bytes) under/over which yaralyzer will NOT make attempts to decode a match.
33
34
  # Longer byte sequences are for obvious reasons slower to decode by force.
@@ -35,8 +36,8 @@
35
36
  # (in my experience) less likely to be maningful. Consider it - two frontslash characters 20,000 lines apart
36
37
  # are more likely to be random than those same frontslashes when placed nearer to each other and
37
38
  # in the vicinity of lot of computerized sigils of internet power like `.', `+bacd*?`,. and other regexes.*
38
- # Keeping the max value number low will do more to affect the speed of the app than ay anything else you
39
- # can easily configure..
39
+ # Keeping the max value number low will do more to affect the speed of the app than anything else you
40
+ # can easily configure.
40
41
  #
41
42
  # YARALYZER_MIN_DECODE_LENGTH=1
42
43
  # YARALYZER_MAX_DECODE_LENGTH=256
@@ -58,6 +59,3 @@
58
59
 
59
60
  # Log level
60
61
  # YARALYZER_LOG_LEVEL='WARN'
61
-
62
- # Path to directory containing Didier Stevens's pdf-parser.py. Only required for extracting binary streams to files.
63
- # YARALYZER_PDF_PARSER_PY_PATH=/path/to/pdfparserdotpy/
@@ -1,5 +1,12 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 0.9.5
4
+ * Use all files in a directory specified by `--rule-dir` instead of just those with the extension `.yara`
5
+ * Fix bug where `--rule-dir` is prefixed by `./`
6
+
7
+ ### 0.9.4
8
+ * Bump `yara-python` to 4.3.0+ and deal with backwards incompatibility
9
+
3
10
  ### 0.9.3
4
11
  * Lock `yara-python` at 4.2.3 bc 4.3.x causes problems
5
12
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: yaralyzer
3
- Version: 0.9.3
3
+ Version: 0.9.5
4
4
  Summary: Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
6
6
  License: GPL-3.0-or-later
@@ -19,14 +19,14 @@ Requires-Dist: chardet (>=5.0.0,<6.0.0)
19
19
  Requires-Dist: python-dotenv (>=0.21.0,<0.22.0)
20
20
  Requires-Dist: rich (>=12.5.1,<13.0.0)
21
21
  Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
22
- Requires-Dist: yara-python (>=4.2.3,<4.3.0)
22
+ Requires-Dist: yara-python (>=4.3.0,<5.0.0)
23
23
  Project-URL: Documentation, https://github.com/michelcrypt4d4mus/yaralyzer
24
24
  Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
25
25
  Description-Content-Type: text/markdown
26
26
 
27
27
  <!-- ![Tests](https://img.shields.io/github/workflow/status/michelcrypt4d4mus/yaralyzer/tests?label=tests) -->
28
28
  ![Python Version](https://img.shields.io/pypi/pyversions/yaralyzer)
29
- ![Release](https://img.shields.io/github/v/release/michelcrypt4d4mus/yaralyzer?sort=semver)
29
+ ![Release](https://img.shields.io/pypi/v/yaralyzer)
30
30
  ![Downloads](https://img.shields.io/pypi/dm/yaralyzer)
31
31
 
32
32
  # THE YARALYZER
@@ -52,8 +52,8 @@ yaralyze --hex-pattern 'd0 93 d0 a3 d0 [-] 9b d0 90 d0 93' one_day_in_the_life_o
52
52
  #### What It Do
53
53
  1. **See the actual bytes your YARA rules are matching.** No more digging around copy/pasting the start positions reported by YARA into your favorite hex editor. Displays both the bytes matched by YARA as well as a configurable number of bytes before and after each match in hexadecimal and "raw" python string representation.
54
54
  1. **Do the same for byte patterns and regular expressions without writing a YARA file.** If you're too lazy to write a YARA file but are trying to determine, say, whether there's a regular expression hidden somewhere in the file you could scan for the pattern `'/.+/'` and immediately get a window into all the bytes in the file that live between front slashes. Same story for quotes, BOMs, etc. Any regex YARA can handle is supported so the sky is the limit.
55
- 1. **Detect the possible encodings of each set of matched bytes.** [The `chardet` library](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
56
- 1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable), an attempt at decoding the bytes using that encoding will be displayed.
55
+ 1. **Detect the possible encodings of each set of matched bytes.** [`chardet`](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
56
+ 1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable) an attempt at decoding the bytes using that encoding will be displayed.
57
57
  1. **Export the matched regions/decodings to SVG, HTML, and colored text files.** Show off your ASCII art.
58
58
 
59
59
  #### Why It Do
@@ -87,7 +87,7 @@ Run `yaralyze -h` to see the command line options (screenshot below).
87
87
  For info on exporting SVG images, HTML, etc., see [Example Output](#example-output).
88
88
 
89
89
  ### Configuration
90
- If you place a filed called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
90
+ If you place a file called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
91
91
 
92
92
  Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
93
93
 
@@ -131,6 +131,7 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect
131
131
 
132
132
 
133
133
  # TODO
134
+ * For some reason when displaying matches the output to a file iterates over all matches in a different way than just running in the console. Presumably this is related to the `rich` rendering engine in some way. For now the console output is the "more correct" one so it's generally OK. See [`issue_with_output_to_console_correct`](doc/rendered_images/issue_with_output_to_console_correct.png) vs. [`issue_with_output_to_txt_file_incorrect.png`](doc/rendered_images/issue_with_output_to_txt_file_incorrect.png)
134
135
  * highlight decodes done at `chardet`s behest
135
136
  * deal with repetitive matches
136
137
 
@@ -1,6 +1,6 @@
1
1
  <!-- ![Tests](https://img.shields.io/github/workflow/status/michelcrypt4d4mus/yaralyzer/tests?label=tests) -->
2
2
  ![Python Version](https://img.shields.io/pypi/pyversions/yaralyzer)
3
- ![Release](https://img.shields.io/github/v/release/michelcrypt4d4mus/yaralyzer?sort=semver)
3
+ ![Release](https://img.shields.io/pypi/v/yaralyzer)
4
4
  ![Downloads](https://img.shields.io/pypi/dm/yaralyzer)
5
5
 
6
6
  # THE YARALYZER
@@ -26,8 +26,8 @@ yaralyze --hex-pattern 'd0 93 d0 a3 d0 [-] 9b d0 90 d0 93' one_day_in_the_life_o
26
26
  #### What It Do
27
27
  1. **See the actual bytes your YARA rules are matching.** No more digging around copy/pasting the start positions reported by YARA into your favorite hex editor. Displays both the bytes matched by YARA as well as a configurable number of bytes before and after each match in hexadecimal and "raw" python string representation.
28
28
  1. **Do the same for byte patterns and regular expressions without writing a YARA file.** If you're too lazy to write a YARA file but are trying to determine, say, whether there's a regular expression hidden somewhere in the file you could scan for the pattern `'/.+/'` and immediately get a window into all the bytes in the file that live between front slashes. Same story for quotes, BOMs, etc. Any regex YARA can handle is supported so the sky is the limit.
29
- 1. **Detect the possible encodings of each set of matched bytes.** [The `chardet` library](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
30
- 1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable), an attempt at decoding the bytes using that encoding will be displayed.
29
+ 1. **Detect the possible encodings of each set of matched bytes.** [`chardet`](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
30
+ 1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable) an attempt at decoding the bytes using that encoding will be displayed.
31
31
  1. **Export the matched regions/decodings to SVG, HTML, and colored text files.** Show off your ASCII art.
32
32
 
33
33
  #### Why It Do
@@ -61,7 +61,7 @@ Run `yaralyze -h` to see the command line options (screenshot below).
61
61
  For info on exporting SVG images, HTML, etc., see [Example Output](#example-output).
62
62
 
63
63
  ### Configuration
64
- If you place a filed called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
64
+ If you place a file called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
65
65
 
66
66
  Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
67
67
 
@@ -105,6 +105,7 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect
105
105
 
106
106
 
107
107
  # TODO
108
+ * For some reason when displaying matches the output to a file iterates over all matches in a different way than just running in the console. Presumably this is related to the `rich` rendering engine in some way. For now the console output is the "more correct" one so it's generally OK. See [`issue_with_output_to_console_correct`](doc/rendered_images/issue_with_output_to_console_correct.png) vs. [`issue_with_output_to_txt_file_incorrect.png`](doc/rendered_images/issue_with_output_to_txt_file_incorrect.png)
108
109
  * highlight decodes done at `chardet`s behest
109
110
  * deal with repetitive matches
110
111
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "yaralyzer"
3
- version = "0.9.3"
3
+ version = "0.9.5"
4
4
  description = "Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors."
5
5
  authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
6
6
  readme = "README.md"
@@ -44,7 +44,7 @@ chardet = "^5.0.0"
44
44
  python-dotenv = "^0.21.0"
45
45
  rich = "^12.5.1"
46
46
  rich-argparse-plus = "^0.3.1"
47
- yara-python = "~4.2.3"
47
+ yara-python = "^4.3.0"
48
48
 
49
49
  [tool.poetry.group.dev.dependencies]
50
50
  pytest = "^7.1.3"
@@ -10,6 +10,7 @@ from typing import Iterator, Optional
10
10
 
11
11
  from rich.table import Table
12
12
  from rich.text import Text
13
+ from yara import StringMatch, StringMatchInstance
13
14
 
14
15
  from yaralyzer.config import YaralyzerConfig
15
16
  from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
@@ -41,10 +42,10 @@ class BytesMatch:
41
42
  self.label: str = label
42
43
  self.ordinal: int = ordinal
43
44
  self.match: Optional[re.Match] = match
44
- # Maybe should be called "matched_bytes"
45
- self.bytes = matched_against[start_idx:self.end_idx]
45
+ self.bytes = matched_against[start_idx:self.end_idx] # TODO: Maybe should be called "matched_bytes"
46
46
  self.match_groups: Optional[tuple] = match.groups() if match else None
47
47
  self._find_surrounding_bytes()
48
+
48
49
  # Adjust the highlighting start point in case this match is very early in the stream
49
50
  self.highlight_start_idx = start_idx - self.surrounding_start_idx
50
51
  self.highlight_end_idx = self.highlight_start_idx + self.length
@@ -65,14 +66,15 @@ class BytesMatch:
65
66
  cls,
66
67
  matched_against: bytes,
67
68
  rule_name: str,
68
- yara_str: dict,
69
+ yara_str_match: StringMatch,
70
+ yara_str_match_instance: StringMatchInstance,
69
71
  ordinal: int,
70
72
  highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
71
73
  ) -> 'BytesMatch':
72
- """Build a BytesMatch from a yara string match. matched_against is the set of bytes yara was run against"""
73
- # Don't duplicate the labeling if rule_name and yara_str are the same
74
- pattern_label = yara_str[1]
74
+ """Build a BytesMatch from a yara string match. 'matched_against' is the set of bytes yara was run against."""
75
+ pattern_label = yara_str_match.identifier
75
76
 
77
+ # Don't duplicate the labeling if rule_name and yara_str are the same
76
78
  if pattern_label == '$' + rule_name:
77
79
  label = pattern_label
78
80
  else:
@@ -80,8 +82,8 @@ class BytesMatch:
80
82
 
81
83
  return cls(
82
84
  matched_against=matched_against,
83
- start_idx=yara_str[0],
84
- length=len(yara_str[2]),
85
+ start_idx=yara_str_match_instance.offset,
86
+ length=yara_str_match_instance.matched_length,
85
87
  label=label,
86
88
  ordinal=ordinal,
87
89
  highlight_style=highlight_style)
@@ -94,8 +96,20 @@ class BytesMatch:
94
96
  highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
95
97
  ) -> Iterator['BytesMatch']:
96
98
  """Iterator w/a BytesMatch for each string returned as part of a YARA match result dict."""
97
- for i, yara_str in enumerate(yara_match['strings']):
98
- yield(cls.from_yara_str(matched_against, yara_match['rule'], yara_str, i + 1, highlight_style))
99
+ i = 0 # For numbered labeling
100
+
101
+ # yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
102
+ for yara_str_match in yara_match['strings']:
103
+ for yara_str_match_instance in yara_str_match.instances:
104
+ i += 1
105
+
106
+ yield(cls.from_yara_str(
107
+ matched_against,
108
+ yara_match['rule'],
109
+ yara_str_match,
110
+ yara_str_match_instance,
111
+ i,
112
+ highlight_style))
99
113
 
100
114
  def style_at_position(self, idx) -> str:
101
115
  """Get the style for the byte at position idx within the matched bytes"""
@@ -13,10 +13,9 @@ MEGABYTE = 1024 * KILOBYTE
13
13
 
14
14
  def config_var_name(env_var: str) -> str:
15
15
  """
16
- Get the name of env_var and strip off 'YARALYZER_':
17
-
18
- SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
19
- config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
16
+ Get the name of env_var and strip off 'YARALYZER_', e.g.:
17
+ SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
18
+ config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
20
19
  """
21
20
  env_var = env_var.removeprefix("YARALYZER_")
22
21
  return f'{env_var=}'.partition('=')[0]
@@ -71,7 +70,7 @@ class YaralyzerConfig:
71
70
  ]
72
71
 
73
72
  @classmethod
74
- def set_argument_parser(cls, parser):
73
+ def set_argument_parser(cls, parser: ArgumentParser) -> None:
75
74
  cls._argument_parser: ArgumentParser = parser
76
75
  cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
77
76
 
@@ -93,7 +92,7 @@ class YaralyzerConfig:
93
92
  if isinstance(arg_value, bool):
94
93
  setattr(args, option, arg_value or is_env_var_set_and_not_false(env_var))
95
94
  elif isinstance(arg_value, (int, float)):
96
- # Check against defaults to avoid overriding env var configured optoins
95
+ # Check against defaults to avoid overriding env var configured options
97
96
  if arg_value == default_value and env_value is not None:
98
97
  setattr(args, option, int(env_value) or arg_value) # TODO: float args not handled
99
98
  else:
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Manager class to ease dealing with the chardet encoding detection library 'chardet'.
3
- Each instance of this classes managed a chardet.detect_all() scan on a single set of bytes.
3
+ Each instance of this class manages a chardet.detect_all() scan on a single set of bytes.
4
4
  """
5
5
  from operator import attrgetter
6
6
  from typing import List
@@ -30,6 +30,7 @@ class EncodingDetector:
30
30
  self.bytes_len = len(_bytes)
31
31
  self.table = _empty_chardet_results_table()
32
32
 
33
+ # Skip chardet if there's not enough bytes available
33
34
  if not self.has_enough_bytes():
34
35
  log.debug(f"{self.bytes_len} is not enough bytes to run chardet.detect()")
35
36
  self._set_empty_results()
@@ -1,6 +1,7 @@
1
- import hashlib
1
+ """
2
+ Helper methods to work with bytes.
3
+ """
2
4
  import re
3
- from collections import namedtuple
4
5
  from io import StringIO
5
6
  from sys import byteorder
6
7
 
@@ -1,3 +1,6 @@
1
+ """
2
+ Helper methods to work with files.
3
+ """
1
4
  from datetime import datetime
2
5
  from os import listdir, path
3
6
  from typing import List, Optional
@@ -10,7 +13,7 @@ def timestamp_for_filename() -> str:
10
13
 
11
14
  def files_in_dir(dir: str, with_extname: Optional[str] = None) -> List[str]:
12
15
  """paths for non dot files, optionally ending in 'with_extname'"""
13
- files = [path.join(dir, file) for file in listdir(dir) if not file.startswith('.')]
16
+ files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
14
17
  files = [file for file in files if not path.isdir(file)]
15
18
 
16
19
  if with_extname:
@@ -1,6 +1,12 @@
1
+ """
2
+ Helper methods to work with strings.
3
+ """
1
4
  from functools import partial
2
5
  from typing import Any, Callable, List
3
6
 
7
+ INDENT_DEPTH = 4
8
+ INDENT_SPACES = INDENT_DEPTH * ' '
9
+
4
10
 
5
11
  def escape_yara_pattern(pattern: str) -> str:
6
12
  return pattern.replace('/', '\\/')
@@ -1,5 +1,5 @@
1
1
  """
2
- Methods for building Rich layout elements
2
+ Methods for building Rich layout elements for display of results.
3
3
  """
4
4
  import hashlib
5
5
  from collections import namedtuple
@@ -1,8 +1,14 @@
1
+ """
2
+ Variables and methods for working with Rich text output.
3
+ """
1
4
  from shutil import get_terminal_size
5
+ from sys import exit
2
6
  from typing import List
3
7
 
8
+ from rich import box
4
9
  from rich.console import Console
5
10
  from rich.errors import MarkupError
11
+ from rich.panel import Panel
6
12
  from rich.style import Style
7
13
  from rich.text import Text
8
14
  from rich.theme import Theme
@@ -107,3 +113,14 @@ def console_print_with_fallback(_string, style=None) -> None:
107
113
 
108
114
  def theme_colors_with_prefix(prefix: str) -> List[Text]:
109
115
  return [Text(k, v) for k, v in YARALYZER_THEME.styles.items() if k.startswith(prefix)]
116
+
117
+
118
+ def print_fatal_error_and_exit(error_message: str) -> None:
119
+ console.line(1)
120
+ print_header_panel(error_message, style='bold red reverse')
121
+ console.line(1)
122
+ exit()
123
+
124
+
125
+ def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0,2)) -> None:
126
+ console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
@@ -49,7 +49,7 @@ source.add_argument('--yara-file', '-Y',
49
49
  dest='yara_rules_files')
50
50
 
51
51
  source.add_argument('--rule-dir', '-dir',
52
- help='directory with .yara files (can be supplied more than once)',
52
+ help='directory with yara rules files (all files are used, can be supplied more than once)',
53
53
  action='append',
54
54
  metavar='DIR',
55
55
  dest='yara_rules_dirs')
@@ -161,7 +161,7 @@ export = parser.add_argument_group(
161
161
  'FILE EXPORT',
162
162
  "Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " + \
163
163
  "formats in parallel. Writes files to the current directory if --output-dir is not provided. " + \
164
- "Filenames are expansion of the scanned filename though you can use --file-prefix to make your " +
164
+ "Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
165
165
  "filenames more unique and beautiful to their beholder.")
166
166
 
167
167
  export.add_argument('-svg', '--export-svg',
@@ -8,12 +8,13 @@ Rich text decorator for YARA match dicts, which look like this:
8
8
  'rule': 'my_rule',
9
9
  'meta': {},
10
10
  'strings': [
11
- (81L, '$a', 'abc'),
12
- (141L, '$b', 'def')
11
+ StringMatch1,
12
+ StringMatch2
13
13
  ]
14
14
  }
15
15
  """
16
16
  import re
17
+ from copy import deepcopy
17
18
  from numbers import Number
18
19
  from typing import Any, Dict
19
20
 
@@ -21,9 +22,11 @@ from rich.console import Console, ConsoleOptions, RenderResult
21
22
  from rich.padding import Padding
22
23
  from rich.panel import Panel
23
24
  from rich.text import Text
25
+ from yara import StringMatch
24
26
 
25
27
  from yaralyzer.helpers.bytes_helper import clean_byte_string
26
28
  from yaralyzer.helpers.rich_text_helper import CENTER
29
+ from yaralyzer.helpers.string_helper import INDENT_SPACES
27
30
  from yaralyzer.output.rich_console import console_width, theme_colors_with_prefix
28
31
  from yaralyzer.util.logging import log
29
32
 
@@ -56,16 +59,16 @@ class YaraMatch:
56
59
 
57
60
  def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
58
61
  """Renders a panel showing the color highlighted raw YARA match info."""
59
- yield(Text("\n"))
62
+ yield Text("\n")
60
63
  yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
61
- yield(RAW_YARA_THEME_TXT)
64
+ yield RAW_YARA_THEME_TXT
62
65
  yield Padding(Panel(_rich_yara_match(self.match)), MATCH_PADDING)
63
66
 
64
67
 
65
68
  def _rich_yara_match(element: Any, depth: int = 0) -> Text:
66
- """Mildly painful/hacky way of coloring a yara result hash."""
67
- indent = Text((depth + 1) * 4 * ' ')
68
- end_indent = Text(depth * 4 * ' ')
69
+ """Painful/hacky way of recursively coloring a yara result hash."""
70
+ indent = Text((depth + 1) * INDENT_SPACES)
71
+ end_indent = Text(depth * INDENT_SPACES)
69
72
 
70
73
  if isinstance(element, str):
71
74
  txt = _yara_string(element)
@@ -79,6 +82,17 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
79
82
  if len(element) == 0:
80
83
  txt = Text('[]', style='white')
81
84
  else:
85
+ if isinstance(element[0], StringMatch):
86
+ # In yara-python 4.3.0 the StringMatch type was introduced so we just make it look like
87
+ # the old list of tuples format (see: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0)
88
+ match_tuples = [
89
+ (match.identifier, match_instance.offset, match_instance.matched_data)
90
+ for match in element
91
+ for match_instance in match.instances
92
+ ]
93
+
94
+ return _rich_yara_match(match_tuples, depth)
95
+
82
96
  total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + len(indent) + 2
83
97
  elements_txt = [_rich_yara_match(e, depth + 1) for e in element]
84
98
  list_txt = Text('[', style='white')
@@ -9,6 +9,7 @@ Alternate constructors are provided depending on whether:
9
9
  The real action happens in the __rich__console__() dunder method.
10
10
  """
11
11
  from os import path
12
+ from sys import exit
12
13
  from typing import Iterator, List, Optional, Tuple, Union
13
14
 
14
15
  import yara
@@ -23,13 +24,13 @@ from yaralyzer.helpers.file_helper import files_in_dir, load_binary_data
23
24
  from yaralyzer.helpers.rich_text_helper import dim_if, reverse_color
24
25
  from yaralyzer.helpers.string_helper import comma_join, newline_join
25
26
  from yaralyzer.output.regex_match_metrics import RegexMatchMetrics
26
- from yaralyzer.output.rich_console import YARALYZER_THEME, console
27
+ from yaralyzer.output.rich_console import YARALYZER_THEME, console, print_fatal_error_and_exit
27
28
  from yaralyzer.output.file_hashes_table import bytes_hashes_table
28
29
  from yaralyzer.util.logging import log
29
30
  from yaralyzer.yara.yara_match import YaraMatch
30
31
  from yaralyzer.yara.yara_rule_builder import yara_rule_string
31
32
 
32
- YARA_EXT = 'yara'
33
+ YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't exist)"
33
34
 
34
35
 
35
36
  # TODO: might be worth introducing a Scannable namedtuple or similar
@@ -93,10 +94,15 @@ class Yaralyzer:
93
94
 
94
95
  for file in yara_rules_files:
95
96
  if not path.exists(file):
96
- raise ValueError(f"'{file}' is not a valid yara rules file (it doesn't exist)")
97
+ raise ValueError(f"'{file}' {YARA_FILE_DOES_NOT_EXIST_ERROR_MSG}")
97
98
 
98
99
  filepaths_arg = {path.basename(file): file for file in yara_rules_files}
99
- yara_rules = yara.compile(filepaths=filepaths_arg)
100
+
101
+ try:
102
+ yara_rules = yara.compile(filepaths=filepaths_arg)
103
+ except yara.SyntaxError as e:
104
+ print_fatal_error_and_exit(f"Failed to parse YARA rules file(s): {e}")
105
+
100
106
  yara_rules_label = comma_join(yara_rules_files, func=path.basename)
101
107
  return cls(yara_rules, yara_rules_label, scannable, scannable_label)
102
108
 
@@ -111,7 +117,7 @@ class Yaralyzer:
111
117
  if not (isinstance(dirs, list) and all(path.isdir(dir) for dir in dirs)):
112
118
  raise TypeError(f"'{dirs}' is not a list of valid directories")
113
119
 
114
- rules_files = [path.join(dir, f) for dir in dirs for f in files_in_dir(dir, YARA_EXT)]
120
+ rules_files = [path.join(dir, f) for dir in dirs for f in files_in_dir(dir)]
115
121
  return cls.for_rules_files(rules_files, scannable, scannable_label)
116
122
 
117
123
  @classmethod
File without changes
@@ -46,6 +46,7 @@ _EXPORT_KWARGS = {
46
46
  },
47
47
  }
48
48
 
49
+
49
50
  def invoke_rich_export(export_method, output_file_basepath) -> str:
50
51
  """
51
52
  Announce the export, perform the export, announce completion.
@@ -72,4 +73,3 @@ def invoke_rich_export(export_method, output_file_basepath) -> str:
72
73
  elapsed_time = time.perf_counter() - start_time
73
74
  log_and_print(f"'{output_file_path}' written in {elapsed_time:02f} seconds")
74
75
  return output_file_path
75
-