yaralyzer 0.9.2__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/.yaralyzer.example +1 -1
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/CHANGELOG.md +6 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/PKG-INFO +7 -6
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/README.md +5 -4
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/pyproject.toml +2 -2
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/bytes_match.py +24 -10
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/config.py +5 -6
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/helpers/string_helper.py +3 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/util/argument_parser.py +1 -1
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/yara/yara_match.py +21 -7
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/yaralyzer.py +2 -1
- yaralyzer-0.9.2/setup.py +0 -46
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/LICENSE +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/__init__.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/decoding/bytes_decoder.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/decoding/decoding_attempt.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/encoding_detection/character_encodings.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/encoding_detection/encoding_assessment.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/encoding_detection/encoding_detector.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/helpers/bytes_helper.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/helpers/dict_helper.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/helpers/file_helper.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/helpers/rich_text_helper.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/output/decoding_attempts_table.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/output/file_export.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/output/file_hashes_table.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/output/regex_match_metrics.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/output/rich_console.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/util/logging.py +0 -0
- {yaralyzer-0.9.2 → yaralyzer-0.9.4}/yaralyzer/yara/yara_rule_builder.py +0 -0
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# Expand the width of the output to the fit the display window (same as the --maximize-width options)
|
|
13
13
|
# YARALYZER_MAXIMIZE_WIDTH=True
|
|
14
14
|
|
|
15
|
-
# Passed through to yara.set_config as the stack_size and
|
|
15
|
+
# Passed through to yara.set_config() as the stack_size and max_match_data arguments
|
|
16
16
|
# YARALYZER_STACK_SIZE=10485760
|
|
17
17
|
# YARALYZER_MAX_MATCH_LENGTH=10737418240
|
|
18
18
|
|
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 0.9.4
|
|
4
|
+
* Bump `yara-python` to 4.3.0+ and deal with backwards incompatibility
|
|
5
|
+
|
|
6
|
+
### 0.9.3
|
|
7
|
+
* Lock `yara-python` at 4.2.3 bc 4.3.x causes problems
|
|
8
|
+
|
|
3
9
|
### 0.9.2
|
|
4
10
|
* Fix PyPi screenshots
|
|
5
11
|
* Raise better error message if yara rules file doesn't exist
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.4
|
|
4
4
|
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -19,14 +19,14 @@ Requires-Dist: chardet (>=5.0.0,<6.0.0)
|
|
|
19
19
|
Requires-Dist: python-dotenv (>=0.21.0,<0.22.0)
|
|
20
20
|
Requires-Dist: rich (>=12.5.1,<13.0.0)
|
|
21
21
|
Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
|
|
22
|
-
Requires-Dist: yara-python (>=4.
|
|
22
|
+
Requires-Dist: yara-python (>=4.3.0,<5.0.0)
|
|
23
23
|
Project-URL: Documentation, https://github.com/michelcrypt4d4mus/yaralyzer
|
|
24
24
|
Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
26
26
|
|
|
27
27
|
<!--  -->
|
|
28
28
|

|
|
29
|
-

|
|
30
30
|

|
|
31
31
|
|
|
32
32
|
# THE YARALYZER
|
|
@@ -52,8 +52,8 @@ yaralyze --hex-pattern 'd0 93 d0 a3 d0 [-] 9b d0 90 d0 93' one_day_in_the_life_o
|
|
|
52
52
|
#### What It Do
|
|
53
53
|
1. **See the actual bytes your YARA rules are matching.** No more digging around copy/pasting the start positions reported by YARA into your favorite hex editor. Displays both the bytes matched by YARA as well as a configurable number of bytes before and after each match in hexadecimal and "raw" python string representation.
|
|
54
54
|
1. **Do the same for byte patterns and regular expressions without writing a YARA file.** If you're too lazy to write a YARA file but are trying to determine, say, whether there's a regular expression hidden somewhere in the file you could scan for the pattern `'/.+/'` and immediately get a window into all the bytes in the file that live between front slashes. Same story for quotes, BOMs, etc. Any regex YARA can handle is supported so the sky is the limit.
|
|
55
|
-
1. **Detect the possible encodings of each set of matched bytes.** [
|
|
56
|
-
1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable)
|
|
55
|
+
1. **Detect the possible encodings of each set of matched bytes.** [`chardet`](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
|
|
56
|
+
1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable) an attempt at decoding the bytes using that encoding will be displayed.
|
|
57
57
|
1. **Export the matched regions/decodings to SVG, HTML, and colored text files.** Show off your ASCII art.
|
|
58
58
|
|
|
59
59
|
#### Why It Do
|
|
@@ -87,7 +87,7 @@ Run `yaralyze -h` to see the command line options (screenshot below).
|
|
|
87
87
|
For info on exporting SVG images, HTML, etc., see [Example Output](#example-output).
|
|
88
88
|
|
|
89
89
|
### Configuration
|
|
90
|
-
If you place a
|
|
90
|
+
If you place a file called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
|
|
91
91
|
|
|
92
92
|
Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
|
|
93
93
|
|
|
@@ -131,6 +131,7 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect
|
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
# TODO
|
|
134
|
+
* For some reason when displaying matches the output to a file iterates over all matches in a different way than just running in the console. Presumably this is related to the `rich` rendering engine in some way. For now the console output is the "more correct" one so it's generally OK. See [`issue_with_output_to_console_correct`](doc/rendered_images/issue_with_output_to_console_correct.png) vs. [`doc/rendered_images/issue_with_output_to_txt_file_incorrect.png`](doc/rendered_images/issue_with_output_to_txt_file_incorrect.png)
|
|
134
135
|
* highlight decodes done at `chardet`s behest
|
|
135
136
|
* deal with repetitive matches
|
|
136
137
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<!--  -->
|
|
2
2
|

|
|
3
|
-

|
|
4
4
|

|
|
5
5
|
|
|
6
6
|
# THE YARALYZER
|
|
@@ -26,8 +26,8 @@ yaralyze --hex-pattern 'd0 93 d0 a3 d0 [-] 9b d0 90 d0 93' one_day_in_the_life_o
|
|
|
26
26
|
#### What It Do
|
|
27
27
|
1. **See the actual bytes your YARA rules are matching.** No more digging around copy/pasting the start positions reported by YARA into your favorite hex editor. Displays both the bytes matched by YARA as well as a configurable number of bytes before and after each match in hexadecimal and "raw" python string representation.
|
|
28
28
|
1. **Do the same for byte patterns and regular expressions without writing a YARA file.** If you're too lazy to write a YARA file but are trying to determine, say, whether there's a regular expression hidden somewhere in the file you could scan for the pattern `'/.+/'` and immediately get a window into all the bytes in the file that live between front slashes. Same story for quotes, BOMs, etc. Any regex YARA can handle is supported so the sky is the limit.
|
|
29
|
-
1. **Detect the possible encodings of each set of matched bytes.** [
|
|
30
|
-
1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable)
|
|
29
|
+
1. **Detect the possible encodings of each set of matched bytes.** [`chardet`](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.
|
|
30
|
+
1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable) an attempt at decoding the bytes using that encoding will be displayed.
|
|
31
31
|
1. **Export the matched regions/decodings to SVG, HTML, and colored text files.** Show off your ASCII art.
|
|
32
32
|
|
|
33
33
|
#### Why It Do
|
|
@@ -61,7 +61,7 @@ Run `yaralyze -h` to see the command line options (screenshot below).
|
|
|
61
61
|
For info on exporting SVG images, HTML, etc., see [Example Output](#example-output).
|
|
62
62
|
|
|
63
63
|
### Configuration
|
|
64
|
-
If you place a
|
|
64
|
+
If you place a file called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.
|
|
65
65
|
|
|
66
66
|
Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
|
|
67
67
|
|
|
@@ -105,6 +105,7 @@ The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vect
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
# TODO
|
|
108
|
+
* For some reason when displaying matches the output to a file iterates over all matches in a different way than just running in the console. Presumably this is related to the `rich` rendering engine in some way. For now the console output is the "more correct" one so it's generally OK. See [`issue_with_output_to_console_correct`](doc/rendered_images/issue_with_output_to_console_correct.png) vs. [`doc/rendered_images/issue_with_output_to_txt_file_incorrect.png`](doc/rendered_images/issue_with_output_to_txt_file_incorrect.png)
|
|
108
109
|
* highlight decodes done at `chardet`s behest
|
|
109
110
|
* deal with repetitive matches
|
|
110
111
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "yaralyzer"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.4"
|
|
4
4
|
description = "Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors."
|
|
5
5
|
authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
|
|
6
6
|
readme = "README.md"
|
|
@@ -44,7 +44,7 @@ chardet = "^5.0.0"
|
|
|
44
44
|
python-dotenv = "^0.21.0"
|
|
45
45
|
rich = "^12.5.1"
|
|
46
46
|
rich-argparse-plus = "^0.3.1"
|
|
47
|
-
yara-python = "^4.
|
|
47
|
+
yara-python = "^4.3.0"
|
|
48
48
|
|
|
49
49
|
[tool.poetry.group.dev.dependencies]
|
|
50
50
|
pytest = "^7.1.3"
|
|
@@ -10,6 +10,7 @@ from typing import Iterator, Optional
|
|
|
10
10
|
|
|
11
11
|
from rich.table import Table
|
|
12
12
|
from rich.text import Text
|
|
13
|
+
from yara import StringMatch, StringMatchInstance
|
|
13
14
|
|
|
14
15
|
from yaralyzer.config import YaralyzerConfig
|
|
15
16
|
from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
|
|
@@ -41,10 +42,10 @@ class BytesMatch:
|
|
|
41
42
|
self.label: str = label
|
|
42
43
|
self.ordinal: int = ordinal
|
|
43
44
|
self.match: Optional[re.Match] = match
|
|
44
|
-
# Maybe should be called "matched_bytes"
|
|
45
|
-
self.bytes = matched_against[start_idx:self.end_idx]
|
|
45
|
+
self.bytes = matched_against[start_idx:self.end_idx] # TODO: Maybe should be called "matched_bytes"
|
|
46
46
|
self.match_groups: Optional[tuple] = match.groups() if match else None
|
|
47
47
|
self._find_surrounding_bytes()
|
|
48
|
+
|
|
48
49
|
# Adjust the highlighting start point in case this match is very early in the stream
|
|
49
50
|
self.highlight_start_idx = start_idx - self.surrounding_start_idx
|
|
50
51
|
self.highlight_end_idx = self.highlight_start_idx + self.length
|
|
@@ -65,14 +66,15 @@ class BytesMatch:
|
|
|
65
66
|
cls,
|
|
66
67
|
matched_against: bytes,
|
|
67
68
|
rule_name: str,
|
|
68
|
-
|
|
69
|
+
yara_str_match: StringMatch,
|
|
70
|
+
yara_str_match_instance: StringMatchInstance,
|
|
69
71
|
ordinal: int,
|
|
70
72
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
71
73
|
) -> 'BytesMatch':
|
|
72
|
-
"""Build a BytesMatch from a yara string match. matched_against is the set of bytes yara was run against"""
|
|
73
|
-
|
|
74
|
-
pattern_label = yara_str[1]
|
|
74
|
+
"""Build a BytesMatch from a yara string match. 'matched_against' is the set of bytes yara was run against."""
|
|
75
|
+
pattern_label = yara_str_match.identifier
|
|
75
76
|
|
|
77
|
+
# Don't duplicate the labeling if rule_name and yara_str are the same
|
|
76
78
|
if pattern_label == '$' + rule_name:
|
|
77
79
|
label = pattern_label
|
|
78
80
|
else:
|
|
@@ -80,8 +82,8 @@ class BytesMatch:
|
|
|
80
82
|
|
|
81
83
|
return cls(
|
|
82
84
|
matched_against=matched_against,
|
|
83
|
-
start_idx=
|
|
84
|
-
length=
|
|
85
|
+
start_idx=yara_str_match_instance.offset,
|
|
86
|
+
length=yara_str_match_instance.matched_length,
|
|
85
87
|
label=label,
|
|
86
88
|
ordinal=ordinal,
|
|
87
89
|
highlight_style=highlight_style)
|
|
@@ -94,8 +96,20 @@ class BytesMatch:
|
|
|
94
96
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
95
97
|
) -> Iterator['BytesMatch']:
|
|
96
98
|
"""Iterator w/a BytesMatch for each string returned as part of a YARA match result dict."""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
+
i = 0 # For numbered labeling
|
|
100
|
+
|
|
101
|
+
# yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
|
|
102
|
+
for yara_str_match in yara_match['strings']:
|
|
103
|
+
for yara_str_match_instance in yara_str_match.instances:
|
|
104
|
+
i += 1
|
|
105
|
+
|
|
106
|
+
yield(cls.from_yara_str(
|
|
107
|
+
matched_against,
|
|
108
|
+
yara_match['rule'],
|
|
109
|
+
yara_str_match,
|
|
110
|
+
yara_str_match_instance,
|
|
111
|
+
i,
|
|
112
|
+
highlight_style))
|
|
99
113
|
|
|
100
114
|
def style_at_position(self, idx) -> str:
|
|
101
115
|
"""Get the style for the byte at position idx within the matched bytes"""
|
|
@@ -13,10 +13,9 @@ MEGABYTE = 1024 * KILOBYTE
|
|
|
13
13
|
|
|
14
14
|
def config_var_name(env_var: str) -> str:
|
|
15
15
|
"""
|
|
16
|
-
Get the name of env_var and strip off 'YARALYZER_'
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
|
|
16
|
+
Get the name of env_var and strip off 'YARALYZER_', e.g.:
|
|
17
|
+
SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
|
|
18
|
+
config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
|
|
20
19
|
"""
|
|
21
20
|
env_var = env_var.removeprefix("YARALYZER_")
|
|
22
21
|
return f'{env_var=}'.partition('=')[0]
|
|
@@ -71,7 +70,7 @@ class YaralyzerConfig:
|
|
|
71
70
|
]
|
|
72
71
|
|
|
73
72
|
@classmethod
|
|
74
|
-
def set_argument_parser(cls, parser):
|
|
73
|
+
def set_argument_parser(cls, parser: ArgumentParser) -> None:
|
|
75
74
|
cls._argument_parser: ArgumentParser = parser
|
|
76
75
|
cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
|
|
77
76
|
|
|
@@ -93,7 +92,7 @@ class YaralyzerConfig:
|
|
|
93
92
|
if isinstance(arg_value, bool):
|
|
94
93
|
setattr(args, option, arg_value or is_env_var_set_and_not_false(env_var))
|
|
95
94
|
elif isinstance(arg_value, (int, float)):
|
|
96
|
-
# Check against defaults to avoid overriding env var configured
|
|
95
|
+
# Check against defaults to avoid overriding env var configured options
|
|
97
96
|
if arg_value == default_value and env_value is not None:
|
|
98
97
|
setattr(args, option, int(env_value) or arg_value) # TODO: float args not handled
|
|
99
98
|
else:
|
|
@@ -161,7 +161,7 @@ export = parser.add_argument_group(
|
|
|
161
161
|
'FILE EXPORT',
|
|
162
162
|
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " + \
|
|
163
163
|
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " + \
|
|
164
|
-
"Filenames are
|
|
164
|
+
"Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
|
|
165
165
|
"filenames more unique and beautiful to their beholder.")
|
|
166
166
|
|
|
167
167
|
export.add_argument('-svg', '--export-svg',
|
|
@@ -8,12 +8,13 @@ Rich text decorator for YARA match dicts, which look like this:
|
|
|
8
8
|
'rule': 'my_rule',
|
|
9
9
|
'meta': {},
|
|
10
10
|
'strings': [
|
|
11
|
-
|
|
12
|
-
|
|
11
|
+
StringMatch1,
|
|
12
|
+
StringMatch2
|
|
13
13
|
]
|
|
14
14
|
}
|
|
15
15
|
"""
|
|
16
16
|
import re
|
|
17
|
+
from copy import deepcopy
|
|
17
18
|
from numbers import Number
|
|
18
19
|
from typing import Any, Dict
|
|
19
20
|
|
|
@@ -21,9 +22,11 @@ from rich.console import Console, ConsoleOptions, RenderResult
|
|
|
21
22
|
from rich.padding import Padding
|
|
22
23
|
from rich.panel import Panel
|
|
23
24
|
from rich.text import Text
|
|
25
|
+
from yara import StringMatch
|
|
24
26
|
|
|
25
27
|
from yaralyzer.helpers.bytes_helper import clean_byte_string
|
|
26
28
|
from yaralyzer.helpers.rich_text_helper import CENTER
|
|
29
|
+
from yaralyzer.helpers.string_helper import INDENT_SPACES
|
|
27
30
|
from yaralyzer.output.rich_console import console_width, theme_colors_with_prefix
|
|
28
31
|
from yaralyzer.util.logging import log
|
|
29
32
|
|
|
@@ -56,16 +59,16 @@ class YaraMatch:
|
|
|
56
59
|
|
|
57
60
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
58
61
|
"""Renders a panel showing the color highlighted raw YARA match info."""
|
|
59
|
-
yield
|
|
62
|
+
yield Text("\n")
|
|
60
63
|
yield Padding(Panel(self.label, expand=False, style=f"on color(251) reverse"), MATCH_PADDING)
|
|
61
|
-
yield
|
|
64
|
+
yield RAW_YARA_THEME_TXT
|
|
62
65
|
yield Padding(Panel(_rich_yara_match(self.match)), MATCH_PADDING)
|
|
63
66
|
|
|
64
67
|
|
|
65
68
|
def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
66
|
-
"""
|
|
67
|
-
indent = Text((depth + 1) *
|
|
68
|
-
end_indent = Text(depth *
|
|
69
|
+
"""Painful/hacky way of recursively coloring a yara result hash."""
|
|
70
|
+
indent = Text((depth + 1) * INDENT_SPACES)
|
|
71
|
+
end_indent = Text(depth * INDENT_SPACES)
|
|
69
72
|
|
|
70
73
|
if isinstance(element, str):
|
|
71
74
|
txt = _yara_string(element)
|
|
@@ -79,6 +82,17 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
|
79
82
|
if len(element) == 0:
|
|
80
83
|
txt = Text('[]', style='white')
|
|
81
84
|
else:
|
|
85
|
+
if isinstance(element[0], StringMatch):
|
|
86
|
+
# In yara-python 4.3.0 the StringMatch type was introduced so we just make it look like
|
|
87
|
+
# the old list of tuples format (see: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0)
|
|
88
|
+
match_tuples = [
|
|
89
|
+
(match.identifier, match_instance.offset, match_instance.matched_data)
|
|
90
|
+
for match in element
|
|
91
|
+
for match_instance in match.instances
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
return _rich_yara_match(match_tuples, depth)
|
|
95
|
+
|
|
82
96
|
total_length = sum([len(str(e)) for e in element]) + ((len(element) - 1) * 2) + len(indent) + 2
|
|
83
97
|
elements_txt = [_rich_yara_match(e, depth + 1) for e in element]
|
|
84
98
|
list_txt = Text('[', style='white')
|
|
@@ -30,6 +30,7 @@ from yaralyzer.yara.yara_match import YaraMatch
|
|
|
30
30
|
from yaralyzer.yara.yara_rule_builder import yara_rule_string
|
|
31
31
|
|
|
32
32
|
YARA_EXT = 'yara'
|
|
33
|
+
YARA_FILE_DOES_NOT_EXIST_ERROR_MSG = "is not a valid yara rules file (it doesn't exist)"
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
# TODO: might be worth introducing a Scannable namedtuple or similar
|
|
@@ -93,7 +94,7 @@ class Yaralyzer:
|
|
|
93
94
|
|
|
94
95
|
for file in yara_rules_files:
|
|
95
96
|
if not path.exists(file):
|
|
96
|
-
raise ValueError(f"'{file}'
|
|
97
|
+
raise ValueError(f"'{file}' {YARA_FILE_DOES_NOT_EXIST_ERROR_MSG}")
|
|
97
98
|
|
|
98
99
|
filepaths_arg = {path.basename(file): file for file in yara_rules_files}
|
|
99
100
|
yara_rules = yara.compile(filepaths=filepaths_arg)
|
yaralyzer-0.9.2/setup.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
from setuptools import setup
|
|
3
|
-
|
|
4
|
-
packages = \
|
|
5
|
-
['yaralyzer',
|
|
6
|
-
'yaralyzer.decoding',
|
|
7
|
-
'yaralyzer.encoding_detection',
|
|
8
|
-
'yaralyzer.helpers',
|
|
9
|
-
'yaralyzer.output',
|
|
10
|
-
'yaralyzer.util',
|
|
11
|
-
'yaralyzer.yara']
|
|
12
|
-
|
|
13
|
-
package_data = \
|
|
14
|
-
{'': ['*']}
|
|
15
|
-
|
|
16
|
-
install_requires = \
|
|
17
|
-
['chardet>=5.0.0,<6.0.0',
|
|
18
|
-
'python-dotenv>=0.21.0,<0.22.0',
|
|
19
|
-
'rich-argparse-plus>=0.3.1,<0.4.0',
|
|
20
|
-
'rich>=12.5.1,<13.0.0',
|
|
21
|
-
'yara-python>=4.2.3,<5.0.0']
|
|
22
|
-
|
|
23
|
-
entry_points = \
|
|
24
|
-
{'console_scripts': ['yaralyze = yaralyzer:yaralyze',
|
|
25
|
-
'yaralyzer_show_color_theme = '
|
|
26
|
-
'yaralyzer.helpers.rich_text_helper:yaralyzer_show_color_theme']}
|
|
27
|
-
|
|
28
|
-
setup_kwargs = {
|
|
29
|
-
'name': 'yaralyzer',
|
|
30
|
-
'version': '0.9.2',
|
|
31
|
-
'description': 'Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors.',
|
|
32
|
-
'long_description': '<!--  -->\n\n\n\n\n# THE YARALYZER\n\n\n\nVisually inspect all of the regex matches (and their sexier, more cloak and dagger cousins, the [YARA](https://github.com/VirusTotal/yara-python) matches) found in binary data and/or text. See what happens when you force various character encodings upon those matched bytes. [With colors](https://github.com/michelcrypt4d4mus/yaralyzer#example-output).\n\n#### Quick Start\n```sh\npipx install yaralyzer\n\n# Scan against YARA definitions in a file:\nyaralyze --yara-rules /secret/vault/sigmunds_malware_rules.yara lacan_buys_the_dip.pdf\n\n# Scan against an arbitrary regular expression:\nyaralyze --regex-pattern \'good and evil.*of\\s+\\w+byte\' the_crypto_archipelago.exe\n\n# Scan against an arbitrary YARA hex pattern\nyaralyze --hex-pattern \'d0 93 d0 a3 d0 [-] 9b d0 90 d0 93\' one_day_in_the_life_of_ivan_cryptosovich.bin\n```\n\n#### What It Do\n1. **See the actual bytes your YARA rules are matching.** No more digging around copy/pasting the start positions reported by YARA into your favorite hex editor. Displays both the bytes matched by YARA as well as a configurable number of bytes before and after each match in hexadecimal and "raw" python string representation.\n1. **Do the same for byte patterns and regular expressions without writing a YARA file.** If you\'re too lazy to write a YARA file but are trying to determine, say, whether there\'s a regular expression hidden somewhere in the file you could scan for the pattern `\'/.+/\'` and immediately get a window into all the bytes in the file that live between front slashes. Same story for quotes, BOMs, etc. Any regex YARA can handle is supported so the sky is the limit.\n1. **Detect the possible encodings of each set of matched bytes.** [The `chardet` library](https://github.com/chardet/chardet) is a sophisticated library for guessing character encodings and it is leveraged here.\n1. **Display the result of forcing various character encodings upon the matched areas.** Several default character encodings will be _forcibly_ attempted in the region around the match. [`chardet`](https://github.com/chardet/chardet) will also be leveraged to see if the bytes fit the pattern of _any_ known encoding. If `chardet` is confident enough (configurable), an attempt at decoding the bytes using that encoding will be displayed.\n1. **Export the matched regions/decodings to SVG, HTML, and colored text files.** Show off your ASCII art.\n\n#### Why It Do\nThe Yaralyzer\'s functionality was extracted from [The Pdfalyzer](https://github.com/michelcrypt4d4mus/pdfalyzer) when it became apparent that visualizing and decoding pattern matches in binaries had more utility than just in a PDF analysis tool.\n\n[YARA](https://github.com/VirusTotal/yara-python), for those who are unaware[^1], is branded as a malware analysis/alerting tool but it\'s actually both a lot more and a lot less than that. One way to think about it is that YARA is a regular expression matching engine on steroids. It can locate regex matches in binaries like any regex engine but it can also do far wilder things like combine regexes in logical groups, compare regexes against all 256 XORed versions of a binary, check for `base64` and other encodings of the pattern, and more. Maybe most importantly of all YARA provides a standard text based format for\npeople to _share_ their \'roided regexes with the world. All these features are particularly useful when analyzing or reverse engineering malware, whose authors tend to invest a great deal of time into making stuff hard to find.\n\nBut... that\'s also all YARA does. Everything else is up to the user. YARA\'s just a match engine and if you don\'t know what to match (or even what character encoding you might be able to match in) it only gets you so far. I found myself a bit frustrated trying to use YARA to look at all the matches of a few critical patterns:\n\n1. Bytes between escaped quotes (`\\".+\\"` and `\\\'.+\\\'`)\n1. Bytes between front slashes (`/.+/`). Front slashes demarcate a regular expression in many implementations and I was trying to see if any of the bytes matching this pattern were _actually_ regexes.\n\nYARA just tells you the byte position and the matched string but it can\'t tell you whether those bytes are UTF-8, UTF-16, Latin-1, etc. etc. (or none of the above). I also found myself wanting to understand what was going _in the region_ of the matched bytes and not just _in_ the matched bytes. In other words I wanted to scope the bytes immediately before and after whatever got matched.\n\nEnter **The Yaralyzer**, which lets you quickly scan the regions around matches while also showing you what those regions would look like if they were forced into various character encodings.\n\nIt\'s important to note that **The Yaralyzer** isn\'t a full on malware reversing tool. It can\'t do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn\'t try to. It\'s more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).\n\n# Installation\nInstall it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a marginally better solution as it guarantees any packages installed with it will be isolated from the rest of your local python environment. Of course if you don\'t really have a local python environment this is a moot point and you can feel free to install with `pip`/`pip3`.\n```\npipx install yaralyzer\n```\n\n# Usage\nRun `yaralyze -h` to see the command line options (screenshot below).\n\n\n\nFor info on exporting SVG images, HTML, etc., see [Example Output](#example-output).\n\n### Configuration\nIf you place a filed called `.yaralyzer` in your home directory or the current working directory then environment variables specified in that `.yaralyzer` file will be added to the environment each time yaralyzer is invoked. This provides a mechanism for permanently configuring various command line options so you can avoid typing them over and over. See the example file [`.yaralyzer.example`](.yaralyzer.example) to see which options can be configured this way.\n\nOnly one `.yaralyzer` file will be loaded and the working directory\'s `.yaralyzer` takes precedence over the home directory\'s `.yaralyzer`.\n\n### As A Library\n[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. It has a variety of constructors supporting:\n\n1. Precompiled YARA rules\n1. Creating a YARA rule from a string\n1. Loading YARA rules from files\n1. Loading YARA rules from all `.yara` file in a directory\n1. Scanning `bytes`\n1. Scanning a file\n\nShould you want to iterate over the `BytesMatch` (like a `re.Match` object for a YARA match) and `BytesDecoder` (tracks decoding attempt stats) objects returned by The Yaralyzer, you can do so like this:\n\n```python\nfrom yaralyzer.yaralyzer import Yaralyzer\n\nyaralyzer = Yaralyzer.for_rules_files([\'/secret/rule.yara\'], \'lacan_buys_the_dip.pdf\')\n\nfor bytes_match, bytes_decoder in yaralyzer.match_iterator():\n do_stuff()\n```\n\n# Example Output\nThe Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich). SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we\'ve seen some glitchiness with `cairosvg`.\n\n**PyPi Users:** If you are reading this document [on PyPi](https://pypi.org/project/yaralyzer/) be aware that it renders a lot better [over on GitHub](https://github.com/michelcrypt4d4mus/yaralyzer). Pretty pictures, footnotes that work, etc.\n\n#### Raw YARA match result:\n\n\n\n#### Display hex, raw python string, and various attempted decodings of both the match and the bytes before and after the match (configurable):\n\n\n\n#### Bonus: see what `chardet.detect()` thinks about the likelihood your bytes are in a given encoding/language:\n\n\n\n\n# TODO\n* highlight decodes done at `chardet`s behest\n* deal with repetitive matches\n\n[^1]: As I was until recently.\n',
|
|
33
|
-
'author': 'Michel de Cryptadamus',
|
|
34
|
-
'author_email': 'michel@cryptadamus.com',
|
|
35
|
-
'maintainer': 'None',
|
|
36
|
-
'maintainer_email': 'None',
|
|
37
|
-
'url': 'https://github.com/michelcrypt4d4mus/yaralyzer',
|
|
38
|
-
'packages': packages,
|
|
39
|
-
'package_data': package_data,
|
|
40
|
-
'install_requires': install_requires,
|
|
41
|
-
'entry_points': entry_points,
|
|
42
|
-
'python_requires': '>=3.9,<4.0',
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
setup(**setup_kwargs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|