yaralyzer 1.0.7__tar.gz → 1.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/CHANGELOG.md +9 -0
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/PKG-INFO +10 -9
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/README.md +5 -3
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/pyproject.toml +33 -15
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/__init__.py +5 -3
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/bytes_match.py +106 -16
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/config.py +18 -5
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/decoding/bytes_decoder.py +41 -11
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/decoding/decoding_attempt.py +54 -17
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/encoding_detection/character_encodings.py +11 -7
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/encoding_detection/encoding_assessment.py +31 -5
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/encoding_detection/encoding_detector.py +43 -9
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/bytes_helper.py +113 -15
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/dict_helper.py +1 -1
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/file_helper.py +20 -13
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/rich_text_helper.py +16 -13
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/string_helper.py +1 -1
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/output/decoding_attempts_table.py +43 -9
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/output/file_export.py +23 -6
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/output/file_hashes_table.py +31 -2
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/output/regex_match_metrics.py +33 -8
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/output/rich_console.py +26 -9
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/util/argument_parser.py +12 -3
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/util/logging.py +31 -16
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/yara/yara_match.py +40 -17
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/yara/yara_rule_builder.py +55 -11
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/yaralyzer.py +117 -31
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/.yaralyzer.example +0 -0
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/LICENSE +0 -0
- {yaralyzer-1.0.7 → yaralyzer-1.0.9}/yaralyzer/helpers/list_helper.py +0 -0
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 1.0.9
|
|
4
|
+
* Raise `FileNotFoundError` instead of `ValueError` if provided YARA rules files or dirs don't exist
|
|
5
|
+
|
|
6
|
+
### 1.0.8
|
|
7
|
+
* Bump `python-dotenv` to v1.1.1
|
|
8
|
+
* Use `mkdocs` and `lazydocs` to build automatic API documentation at https://michelcrypt4d4mus.github.io/yaralyzer/
|
|
9
|
+
* Drop python 3.9 support (required by `mkdocs-awesome-nav` package)
|
|
10
|
+
|
|
3
11
|
### 1.0.7
|
|
4
12
|
* Add `Changelog` to PyPi URLs, add some more PyPi classifiers
|
|
5
13
|
* Add `.flake8` config file and fix style errors
|
|
14
|
+
* Rename `prefix_with_plain_text_obj()` to `prefix_with_style()`
|
|
6
15
|
|
|
7
16
|
### 1.0.6
|
|
8
17
|
* Add `Environment :: Console` and `Programming Language :: Python` to PyPi classifiers
|
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream
|
|
3
|
+
Version: 1.0.9
|
|
4
|
+
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
7
7
|
Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
|
|
8
8
|
Author: Michel de Cryptadamus
|
|
9
9
|
Author-email: michel@cryptadamus.com
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
14
14
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
15
15
|
Classifier: Programming Language :: Python
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -23,12 +22,12 @@ Classifier: Topic :: Artistic Software
|
|
|
23
22
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
24
23
|
Classifier: Topic :: Security
|
|
25
24
|
Requires-Dist: chardet (>=5.0.0,<6.0.0)
|
|
26
|
-
Requires-Dist: python-dotenv (>=
|
|
25
|
+
Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
|
|
27
26
|
Requires-Dist: rich (>=14.1.0,<15.0.0)
|
|
28
27
|
Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
|
|
29
28
|
Requires-Dist: yara-python (>=4.5.4,<5.0.0)
|
|
30
29
|
Project-URL: Changelog, https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md
|
|
31
|
-
Project-URL: Documentation, https://github.
|
|
30
|
+
Project-URL: Documentation, https://michelcrypt4d4mus.github.io/yaralyzer/
|
|
32
31
|
Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
|
|
33
32
|
Description-Content-Type: text/markdown
|
|
34
33
|
|
|
@@ -79,7 +78,7 @@ YARA just tells you the byte position and the matched string but it can't tell y
|
|
|
79
78
|
|
|
80
79
|
Enter **The Yaralyzer**, which lets you quickly scan the regions around matches while also showing you what those regions would look like if they were forced into various character encodings.
|
|
81
80
|
|
|
82
|
-
|
|
81
|
+
**The Yaralyzer** isn't a malware reversing tool. It can't do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn't try to. It's more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).
|
|
83
82
|
|
|
84
83
|
# Installation
|
|
85
84
|
Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a marginally better solution as it guarantees any packages installed with it will be isolated from the rest of your local python environment. Of course if you don't really have a local python environment this is a moot point and you can feel free to install with `pip`/`pip3`.
|
|
@@ -87,6 +86,7 @@ Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a ma
|
|
|
87
86
|
pipx install yaralyzer
|
|
88
87
|
```
|
|
89
88
|
|
|
89
|
+
|
|
90
90
|
# Usage
|
|
91
91
|
Run `yaralyze -h` to see the command line options (screenshot below).
|
|
92
92
|
|
|
@@ -100,7 +100,7 @@ If you place a file called `.yaralyzer` in your home directory or the current wo
|
|
|
100
100
|
Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
|
|
101
101
|
|
|
102
102
|
### As A Library
|
|
103
|
-
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. It has a variety of constructors supporting:
|
|
103
|
+
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. Auto generated documentation for `Yaralyzer`'s various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/). It has a variety of [alternate constructors](https://michelcrypt4d4mus.github.io/yaralyzer/api/yaralyzer/) supporting:
|
|
104
104
|
|
|
105
105
|
1. Precompiled YARA rules
|
|
106
106
|
1. Creating a YARA rule from a string
|
|
@@ -109,7 +109,7 @@ Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyze
|
|
|
109
109
|
1. Scanning `bytes`
|
|
110
110
|
1. Scanning a file
|
|
111
111
|
|
|
112
|
-
Should you want to iterate over the `BytesMatch` (like a `re.Match` object for a YARA match) and `BytesDecoder` (tracks decoding attempt stats) objects
|
|
112
|
+
Should you want to iterate over the [`BytesMatch`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_match/) (like a `re.Match` object for a YARA match) and [`BytesDecoder`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_decoder/) (tracks decoding attempt stats) objects used by The Yaralyzer, you can do so like this:
|
|
113
113
|
|
|
114
114
|
```python
|
|
115
115
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
@@ -120,6 +120,7 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
120
120
|
do_stuff()
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
+
|
|
123
124
|
# Example Output
|
|
124
125
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
125
126
|
|
|
@@ -45,7 +45,7 @@ YARA just tells you the byte position and the matched string but it can't tell y
|
|
|
45
45
|
|
|
46
46
|
Enter **The Yaralyzer**, which lets you quickly scan the regions around matches while also showing you what those regions would look like if they were forced into various character encodings.
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
**The Yaralyzer** isn't a malware reversing tool. It can't do all the things a tool like [CyberChef](https://gchq.github.io/CyberChef/) does and it doesn't try to. It's more intended to give you a quick visual overview of suspect regions in the binary so you can hone in on the areas you might want to inspect with a more serious tool like [CyberChef](https://gchq.github.io/CyberChef/).
|
|
49
49
|
|
|
50
50
|
# Installation
|
|
51
51
|
Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a marginally better solution as it guarantees any packages installed with it will be isolated from the rest of your local python environment. Of course if you don't really have a local python environment this is a moot point and you can feel free to install with `pip`/`pip3`.
|
|
@@ -53,6 +53,7 @@ Install it with [`pipx`](https://pypa.github.io/pipx/) or `pip3`. `pipx` is a ma
|
|
|
53
53
|
pipx install yaralyzer
|
|
54
54
|
```
|
|
55
55
|
|
|
56
|
+
|
|
56
57
|
# Usage
|
|
57
58
|
Run `yaralyze -h` to see the command line options (screenshot below).
|
|
58
59
|
|
|
@@ -66,7 +67,7 @@ If you place a file called `.yaralyzer` in your home directory or the current wo
|
|
|
66
67
|
Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyzer` takes precedence over the home directory's `.yaralyzer`.
|
|
67
68
|
|
|
68
69
|
### As A Library
|
|
69
|
-
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. It has a variety of constructors supporting:
|
|
70
|
+
[`Yaralyzer`](yaralyzer/yaralyzer.py) is the main class. Auto generated documentation for `Yaralyzer`'s various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/). It has a variety of [alternate constructors](https://michelcrypt4d4mus.github.io/yaralyzer/api/yaralyzer/) supporting:
|
|
70
71
|
|
|
71
72
|
1. Precompiled YARA rules
|
|
72
73
|
1. Creating a YARA rule from a string
|
|
@@ -75,7 +76,7 @@ Only one `.yaralyzer` file will be loaded and the working directory's `.yaralyze
|
|
|
75
76
|
1. Scanning `bytes`
|
|
76
77
|
1. Scanning a file
|
|
77
78
|
|
|
78
|
-
Should you want to iterate over the `BytesMatch` (like a `re.Match` object for a YARA match) and `BytesDecoder` (tracks decoding attempt stats) objects
|
|
79
|
+
Should you want to iterate over the [`BytesMatch`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_match/) (like a `re.Match` object for a YARA match) and [`BytesDecoder`](https://michelcrypt4d4mus.github.io/yaralyzer/api/bytes_decoder/) (tracks decoding attempt stats) objects used by The Yaralyzer, you can do so like this:
|
|
79
80
|
|
|
80
81
|
```python
|
|
81
82
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
@@ -86,6 +87,7 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
86
87
|
do_stuff()
|
|
87
88
|
```
|
|
88
89
|
|
|
90
|
+
|
|
89
91
|
# Example Output
|
|
90
92
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
91
93
|
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "yaralyzer"
|
|
3
|
-
version = "1.0.
|
|
4
|
-
description = "Visualize and force decode YARA and regex matches found in a file or byte stream
|
|
3
|
+
version = "1.0.9"
|
|
4
|
+
description = "Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors."
|
|
5
5
|
authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
license = "GPL-3.0-or-later"
|
|
8
|
+
|
|
8
9
|
homepage = "https://github.com/michelcrypt4d4mus/yaralyzer"
|
|
9
10
|
repository = "https://github.com/michelcrypt4d4mus/yaralyzer"
|
|
10
|
-
documentation = "https://github.
|
|
11
|
+
documentation = "https://michelcrypt4d4mus.github.io/yaralyzer/"
|
|
11
12
|
|
|
12
13
|
classifiers = [
|
|
13
14
|
"Development Status :: 5 - Production/Stable",
|
|
@@ -15,7 +16,6 @@ classifiers = [
|
|
|
15
16
|
"Intended Audience :: Information Technology",
|
|
16
17
|
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
17
18
|
"Programming Language :: Python",
|
|
18
|
-
"Programming Language :: Python :: 3.9",
|
|
19
19
|
"Programming Language :: Python :: 3.10",
|
|
20
20
|
"Programming Language :: Python :: 3.11",
|
|
21
21
|
"Programming Language :: Python :: 3.12",
|
|
@@ -61,13 +61,13 @@ keywords = [
|
|
|
61
61
|
]
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
# Dependencies
|
|
66
|
-
|
|
64
|
+
####################
|
|
65
|
+
# Dependencies #
|
|
66
|
+
####################
|
|
67
67
|
[tool.poetry.dependencies]
|
|
68
|
-
python = "^3.
|
|
68
|
+
python = "^3.10"
|
|
69
69
|
chardet = ">=5.0.0,<6.0.0"
|
|
70
|
-
python-dotenv = "^
|
|
70
|
+
python-dotenv = "^1.1.1"
|
|
71
71
|
rich = "^14.1.0"
|
|
72
72
|
rich-argparse-plus = "^0.3.1"
|
|
73
73
|
yara-python = "^4.5.4"
|
|
@@ -75,6 +75,12 @@ yara-python = "^4.5.4"
|
|
|
75
75
|
|
|
76
76
|
[tool.poetry.group.dev.dependencies]
|
|
77
77
|
flake8 = "^7.3.0"
|
|
78
|
+
lazydocs = "^0.4.8"
|
|
79
|
+
mkdocs = "^1.6.1"
|
|
80
|
+
mkdocs-awesome-nav = "^3.1.2"
|
|
81
|
+
mkdocs-include-markdown-plugin = "^7.1.7"
|
|
82
|
+
mkdocs-material = "^9.6.19"
|
|
83
|
+
pydocstyle = "^6.3.0"
|
|
78
84
|
pytest = "^7.1.3"
|
|
79
85
|
|
|
80
86
|
|
|
@@ -86,16 +92,28 @@ yaralyze = 'yaralyzer:yaralyze'
|
|
|
86
92
|
yaralyzer_show_color_theme = 'yaralyzer.helpers.rich_text_helper:yaralyzer_show_color_theme'
|
|
87
93
|
|
|
88
94
|
|
|
89
|
-
|
|
90
|
-
#
|
|
91
|
-
|
|
95
|
+
###############
|
|
96
|
+
# PyPi URLs #
|
|
97
|
+
###############
|
|
92
98
|
[tool.poetry.urls]
|
|
93
99
|
Changelog = "https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md"
|
|
94
100
|
|
|
95
101
|
|
|
96
|
-
|
|
97
|
-
#
|
|
98
|
-
|
|
102
|
+
#################
|
|
103
|
+
# Build Stuff #
|
|
104
|
+
#################
|
|
99
105
|
[build-system]
|
|
100
106
|
build-backend = "poetry.core.masonry.api"
|
|
101
107
|
requires = ["poetry-core"]
|
|
108
|
+
|
|
109
|
+
[tool.pydocstyle]
|
|
110
|
+
match-dir = "yaralyzer"
|
|
111
|
+
ignore = [
|
|
112
|
+
"D200", # One-line docstring should fit on one line with quotes (found 3)
|
|
113
|
+
"D203", # 1 blank line required before class docstring"
|
|
114
|
+
"D212", # Multi-line docstring summary should start at the first line
|
|
115
|
+
"D401", # First line should be in imperative mood"
|
|
116
|
+
"D406", # Section name should end with a newline
|
|
117
|
+
"D407", # Missing dashed underline after section
|
|
118
|
+
"D413", # Missing blank line after last section
|
|
119
|
+
]
|
|
@@ -24,6 +24,11 @@ PDFALYZER_MSG_TXT.append('https://github.com/michelcrypt4d4mus/pdfalyzer\n', sty
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def yaralyze():
|
|
27
|
+
"""
|
|
28
|
+
Entry point for yaralyzer when invoked as a script.
|
|
29
|
+
|
|
30
|
+
Args are parsed from the command line and environment variables. See `yaralyze --help` for details.
|
|
31
|
+
"""
|
|
27
32
|
args = parse_arguments()
|
|
28
33
|
output_basepath = None
|
|
29
34
|
|
|
@@ -50,13 +55,10 @@ def yaralyze():
|
|
|
50
55
|
|
|
51
56
|
if args.export_txt:
|
|
52
57
|
invoke_rich_export(console.save_text, output_basepath)
|
|
53
|
-
|
|
54
58
|
if args.export_html:
|
|
55
59
|
invoke_rich_export(console.save_html, output_basepath)
|
|
56
|
-
|
|
57
60
|
if args.export_svg:
|
|
58
61
|
invoke_rich_export(console.save_svg, output_basepath)
|
|
59
|
-
|
|
60
62
|
if args.export_json:
|
|
61
63
|
export_json(yaralyzer, output_basepath)
|
|
62
64
|
|
|
@@ -1,9 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
some (not many) extra bells and whistles, most notably the surrounding_bytes property.
|
|
4
|
-
|
|
5
|
-
pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
|
|
6
|
-
e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
|
|
2
|
+
`BytesMatch` class for tracking regex and YARA matches against binary data.
|
|
7
3
|
"""
|
|
8
4
|
import re
|
|
9
5
|
from typing import Iterator, Optional
|
|
@@ -19,6 +15,13 @@ from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
|
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
class BytesMatch:
|
|
18
|
+
"""
|
|
19
|
+
Simple class to keep track of regex matches against binary data.
|
|
20
|
+
|
|
21
|
+
Basically a Regex `re.match` object with some (not many) extra bells and whistles, most notably
|
|
22
|
+
the `surrounding_bytes` property.
|
|
23
|
+
"""
|
|
24
|
+
|
|
22
25
|
def __init__(
|
|
23
26
|
self,
|
|
24
27
|
matched_against: bytes,
|
|
@@ -30,8 +33,16 @@ class BytesMatch:
|
|
|
30
33
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
31
34
|
) -> None:
|
|
32
35
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
36
|
+
Initialize a `BytesMatch` object representing a match against binary data.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
matched_against (bytes): The full byte sequence that was searched.
|
|
40
|
+
start_idx (int): Start index of the match in the byte sequence.
|
|
41
|
+
length (int): Length of the match in bytes.
|
|
42
|
+
label (str): Label for the match (e.g., regex or YARA rule name).
|
|
43
|
+
ordinal (int): This was the Nth match for this pattern (used for labeling only).
|
|
44
|
+
match (Optional[re.Match]): Regex `match` object, if available.
|
|
45
|
+
highlight_style (str): Style to use for highlighting the match.
|
|
35
46
|
"""
|
|
36
47
|
self.matched_against: bytes = matched_against
|
|
37
48
|
self.start_idx: int = start_idx
|
|
@@ -58,6 +69,18 @@ class BytesMatch:
|
|
|
58
69
|
ordinal: int,
|
|
59
70
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
60
71
|
) -> 'BytesMatch':
|
|
72
|
+
"""
|
|
73
|
+
Alternate constructor to build a `BytesMatch` from a regex match object.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
matched_against (bytes): The bytes searched.
|
|
77
|
+
match (re.Match): The regex `match` object.
|
|
78
|
+
ordinal (int): This was the Nth match for this pattern (used for labeling only).
|
|
79
|
+
highlight_style (str): Style for highlighting.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
BytesMatch: The constructed `BytesMatch` instance.
|
|
83
|
+
"""
|
|
61
84
|
return cls(matched_against, match.start(), len(match[0]), match.re.pattern, ordinal, match, highlight_style)
|
|
62
85
|
|
|
63
86
|
@classmethod
|
|
@@ -70,7 +93,20 @@ class BytesMatch:
|
|
|
70
93
|
ordinal: int,
|
|
71
94
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
72
95
|
) -> 'BytesMatch':
|
|
73
|
-
"""
|
|
96
|
+
"""
|
|
97
|
+
Alternate constructor to build a `BytesMatch` from a YARA string match instance.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
matched_against (bytes): The bytes searched.
|
|
101
|
+
rule_name (str): Name of the YARA rule.
|
|
102
|
+
yara_str_match (StringMatch): YARA string match object.
|
|
103
|
+
yara_str_match_instance (StringMatchInstance): Instance of the string match.
|
|
104
|
+
ordinal (int): The Nth match for this pattern.
|
|
105
|
+
highlight_style (str): Style for highlighting.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
BytesMatch: The constructed BytesMatch instance.
|
|
109
|
+
"""
|
|
74
110
|
pattern_label = yara_str_match.identifier
|
|
75
111
|
|
|
76
112
|
# Don't duplicate the labeling if rule_name and yara_str are the same
|
|
@@ -94,7 +130,17 @@ class BytesMatch:
|
|
|
94
130
|
yara_match: dict,
|
|
95
131
|
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
96
132
|
) -> Iterator['BytesMatch']:
|
|
97
|
-
"""
|
|
133
|
+
"""
|
|
134
|
+
Yield a `BytesMatch` for each string returned as part of a YARA match result dict.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
matched_against (bytes): The bytes searched.
|
|
138
|
+
yara_match (dict): YARA match result dictionary.
|
|
139
|
+
highlight_style (str): Style for highlighting.
|
|
140
|
+
|
|
141
|
+
Yields:
|
|
142
|
+
BytesMatch: For each string match in the YARA result.
|
|
143
|
+
"""
|
|
98
144
|
i = 0 # For numbered labeling
|
|
99
145
|
|
|
100
146
|
# yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
|
|
@@ -112,14 +158,27 @@ class BytesMatch:
|
|
|
112
158
|
)
|
|
113
159
|
|
|
114
160
|
def style_at_position(self, idx) -> str:
|
|
115
|
-
"""
|
|
161
|
+
"""
|
|
162
|
+
Get the style for the byte at position `idx` within the matched bytes.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
idx (int): Index within the surrounding bytes.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
str: The style to use for this byte (highlight or greyed out).
|
|
169
|
+
"""
|
|
116
170
|
if idx < self.highlight_start_idx or idx >= self.highlight_end_idx:
|
|
117
171
|
return GREY_ADDRESS
|
|
118
172
|
else:
|
|
119
173
|
return self.highlight_style
|
|
120
174
|
|
|
121
175
|
def location(self) -> Text:
|
|
122
|
-
"""
|
|
176
|
+
"""
|
|
177
|
+
Get a styled `Text` object describing the start and end index of the match.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Text: Rich Text object like '(start idx: 348190, end idx: 348228)'.
|
|
181
|
+
"""
|
|
123
182
|
location_txt = prefix_with_style(
|
|
124
183
|
f"(start idx: ",
|
|
125
184
|
style='off_white',
|
|
@@ -133,13 +192,26 @@ class BytesMatch:
|
|
|
133
192
|
return location_txt
|
|
134
193
|
|
|
135
194
|
def is_decodable(self) -> bool:
|
|
136
|
-
"""
|
|
195
|
+
"""
|
|
196
|
+
Determine if the matched bytes should be decoded.
|
|
197
|
+
|
|
198
|
+
Whether the bytes are decodable depends on whether `SUPPRESS_DECODES_TABLE` is set
|
|
199
|
+
and whether the match length is between `MIN`/`MAX_DECODE_LENGTH`.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
bool: `True` if decodable, `False` otherwise.
|
|
203
|
+
"""
|
|
137
204
|
return self.match_length >= YaralyzerConfig.args.min_decode_length \
|
|
138
205
|
and self.match_length <= YaralyzerConfig.args.max_decode_length \
|
|
139
206
|
and not YaralyzerConfig.args.suppress_decodes_table
|
|
140
207
|
|
|
141
208
|
def bytes_hashes_table(self) -> Table:
|
|
142
|
-
"""
|
|
209
|
+
"""
|
|
210
|
+
Build a table of MD5/SHA hashes for the matched bytes.
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Table: Rich `Table` object with hashes.
|
|
214
|
+
"""
|
|
143
215
|
return bytes_hashes_table(
|
|
144
216
|
self.bytes,
|
|
145
217
|
self.location().plain,
|
|
@@ -147,7 +219,12 @@ class BytesMatch:
|
|
|
147
219
|
)
|
|
148
220
|
|
|
149
221
|
def suppression_notice(self) -> Text:
|
|
150
|
-
"""
|
|
222
|
+
"""
|
|
223
|
+
Generate a message for when the match is too short or too long to decode.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Text: Rich `Text` object with the suppression notice.
|
|
227
|
+
"""
|
|
151
228
|
txt = self.__rich__()
|
|
152
229
|
|
|
153
230
|
if self.match_length < YaralyzerConfig.args.min_decode_length:
|
|
@@ -159,7 +236,12 @@ class BytesMatch:
|
|
|
159
236
|
return txt
|
|
160
237
|
|
|
161
238
|
def to_json(self) -> dict:
|
|
162
|
-
"""
|
|
239
|
+
"""
|
|
240
|
+
Convert this `BytesMatch` to a JSON-serializable dictionary.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
dict: Dictionary representation of the match, suitable for JSON serialization.
|
|
244
|
+
"""
|
|
163
245
|
json_dict = {
|
|
164
246
|
'label': self.label,
|
|
165
247
|
'match_length': self.match_length,
|
|
@@ -178,7 +260,13 @@ class BytesMatch:
|
|
|
178
260
|
return json_dict
|
|
179
261
|
|
|
180
262
|
def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
|
|
181
|
-
"""
|
|
263
|
+
"""
|
|
264
|
+
Find and set the bytes surrounding the match, ensuring indices stay within bounds.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
num_before (Optional[int]): Number of bytes before the match to include.
|
|
268
|
+
num_after (Optional[int]): Number of bytes after the match to include.
|
|
269
|
+
"""
|
|
182
270
|
num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
|
|
183
271
|
num_before = num_before or YaralyzerConfig.args.surrounding_bytes
|
|
184
272
|
self.surrounding_start_idx: int = max(self.start_idx - num_before, 0)
|
|
@@ -186,6 +274,7 @@ class BytesMatch:
|
|
|
186
274
|
self.surrounding_bytes: bytes = self.matched_against[self.surrounding_start_idx:self.surrounding_end_idx]
|
|
187
275
|
|
|
188
276
|
def __rich__(self) -> Text:
|
|
277
|
+
"""Get a rich `Text` representation of the match for display."""
|
|
189
278
|
headline = prefix_with_style(str(self.match_length), style='number', root_style='decode.subheading')
|
|
190
279
|
headline.append(f" bytes matching ")
|
|
191
280
|
headline.append(f"{self.label} ", style=ALERT_STYLE if self.highlight_style == ALERT_STYLE else 'regex')
|
|
@@ -193,4 +282,5 @@ class BytesMatch:
|
|
|
193
282
|
return headline + self.location()
|
|
194
283
|
|
|
195
284
|
def __str__(self):
|
|
285
|
+
"""Plain text (no rich colors) representation of the match for display."""
|
|
196
286
|
return self.__rich__().plain
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management for Yaralyzer.
|
|
3
|
+
"""
|
|
1
4
|
import logging
|
|
2
5
|
from argparse import ArgumentParser, Namespace
|
|
3
6
|
from os import environ
|
|
@@ -15,16 +18,20 @@ MEGABYTE = 1024 * KILOBYTE
|
|
|
15
18
|
|
|
16
19
|
def config_var_name(env_var: str) -> str:
|
|
17
20
|
"""
|
|
18
|
-
Get the name of env_var and strip off
|
|
21
|
+
Get the name of `env_var` and strip off `YARALYZER_` prefix.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
```
|
|
19
25
|
SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
|
|
20
26
|
config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
|
|
27
|
+
```
|
|
21
28
|
"""
|
|
22
29
|
env_var = env_var.removeprefix("YARALYZER_")
|
|
23
30
|
return f'{env_var=}'.partition('=')[0]
|
|
24
31
|
|
|
25
32
|
|
|
26
|
-
def is_env_var_set_and_not_false(var_name):
|
|
27
|
-
"""
|
|
33
|
+
def is_env_var_set_and_not_false(var_name: str) -> bool:
|
|
34
|
+
"""Return `True` if `var_name` is not empty and set to anything other than "false" (capitalization agnostic)."""
|
|
28
35
|
if var_name in environ:
|
|
29
36
|
var_value = environ[var_name]
|
|
30
37
|
return var_value is not None and len(var_value) > 0 and var_value.lower() != 'false'
|
|
@@ -32,12 +39,14 @@ def is_env_var_set_and_not_false(var_name):
|
|
|
32
39
|
return False
|
|
33
40
|
|
|
34
41
|
|
|
35
|
-
def is_invoked_by_pytest():
|
|
36
|
-
"""Return
|
|
42
|
+
def is_invoked_by_pytest() -> bool:
|
|
43
|
+
"""Return `True` if invoked in a `pytest` context."""
|
|
37
44
|
return is_env_var_set_and_not_false(PYTEST_FLAG)
|
|
38
45
|
|
|
39
46
|
|
|
40
47
|
class YaralyzerConfig:
|
|
48
|
+
"""Handles parsing of command line args and environment variables for Yaralyzer."""
|
|
49
|
+
|
|
41
50
|
# Passed through to yara.set_config()
|
|
42
51
|
DEFAULT_MAX_MATCH_LENGTH = 100 * KILOBYTE
|
|
43
52
|
DEFAULT_YARA_STACK_SIZE = 2 * 65536
|
|
@@ -76,11 +85,13 @@ class YaralyzerConfig:
|
|
|
76
85
|
|
|
77
86
|
@classmethod
|
|
78
87
|
def set_argument_parser(cls, parser: ArgumentParser) -> None:
|
|
88
|
+
"""Sets the `_argument_parser` instance variable that will be used to parse command line args."""
|
|
79
89
|
cls._argument_parser: ArgumentParser = parser
|
|
80
90
|
cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
|
|
81
91
|
|
|
82
92
|
@classmethod
|
|
83
93
|
def set_args(cls, args: Namespace) -> None:
|
|
94
|
+
"""Set the `args` class instance variable and update args with any environment variable overrides."""
|
|
84
95
|
cls.args = args
|
|
85
96
|
|
|
86
97
|
for option in cls._argparse_keys:
|
|
@@ -105,9 +116,11 @@ class YaralyzerConfig:
|
|
|
105
116
|
|
|
106
117
|
@classmethod
|
|
107
118
|
def set_default_args(cls):
|
|
119
|
+
"""Set `self.args` to their defaults as if parsed from the command line."""
|
|
108
120
|
cls.set_args(cls._argument_parser.parse_args(['dummy']))
|
|
109
121
|
|
|
110
122
|
@classmethod
|
|
111
123
|
def get_default_arg(cls, arg: str) -> Any:
|
|
124
|
+
"""Return the default value for `arg` as defined by a `DEFAULT_` style class variable."""
|
|
112
125
|
default_var = f"DEFAULT_{arg.upper()}"
|
|
113
126
|
return vars(cls).get(default_var)
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
Leverages the chardet library to both guide what encodings are attempted as well as to rank decodings
|
|
4
|
-
in the results.
|
|
2
|
+
`BytesDecoder` class for attempting to decode bytes with various encodings.
|
|
5
3
|
"""
|
|
6
4
|
from collections import defaultdict
|
|
7
5
|
from copy import deepcopy
|
|
@@ -34,7 +32,34 @@ SCORE_SCALER = 100.0
|
|
|
34
32
|
|
|
35
33
|
|
|
36
34
|
class BytesDecoder:
|
|
35
|
+
"""
|
|
36
|
+
Handles decoding a chunk of bytes into strings using various possible encodings, ranking and displaying results.
|
|
37
|
+
|
|
38
|
+
This class leverages the `chardet` library and custom logic to try multiple encodings, track decoding outcomes,
|
|
39
|
+
and present the results in a rich, user-friendly format. It is used to analyze and display the possible
|
|
40
|
+
interpretations of a byte sequence, especially in the context of YARA matches or binary analysis.
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
bytes_match (BytesMatch): The `BytesMatch` instance being decoded.
|
|
44
|
+
bytes (bytes): The bytes (including surrounding context) to decode.
|
|
45
|
+
label (str): Label for this decoding attempt.
|
|
46
|
+
was_match_decodable (dict): Tracks successful decodes per encoding.
|
|
47
|
+
was_match_force_decoded (dict): Tracks forced decodes per encoding.
|
|
48
|
+
was_match_undecodable (dict): Tracks failed decodes per encoding.
|
|
49
|
+
decoded_strings (dict): Maps encoding to decoded string.
|
|
50
|
+
undecoded_rows (list): Stores undecoded table rows.
|
|
51
|
+
decodings (list): List of DecodingAttempt objects for each encoding tried.
|
|
52
|
+
encoding_detector (EncodingDetector): Used to detect and assess possible encodings.
|
|
53
|
+
"""
|
|
54
|
+
|
|
37
55
|
def __init__(self, bytes_match: 'BytesMatch', label: Optional[str] = None) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Initialize a `BytesDecoder` for attempting to decode a chunk of bytes using various encodings.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
bytes_match (BytesMatch): The `BytesMatch` object containing the bytes to decode and match metadata.
|
|
61
|
+
label (Optional[str], optional): Optional label for this decoding attempt. Defaults to the match label.
|
|
62
|
+
"""
|
|
38
63
|
self.bytes_match = bytes_match
|
|
39
64
|
self.bytes = bytes_match.surrounding_bytes
|
|
40
65
|
self.label = label or bytes_match.label
|
|
@@ -51,7 +76,7 @@ class BytesDecoder:
|
|
|
51
76
|
self.encoding_detector = EncodingDetector(self.bytes)
|
|
52
77
|
|
|
53
78
|
def __rich_console__(self, _console: Console, options: ConsoleOptions) -> RenderResult:
|
|
54
|
-
"""Rich object generator (see Rich console docs)"""
|
|
79
|
+
"""Rich object generator (see Rich console docs)."""
|
|
55
80
|
yield NewLine(2)
|
|
56
81
|
yield Align(self._decode_attempt_subheading(), CENTER)
|
|
57
82
|
|
|
@@ -70,7 +95,12 @@ class BytesDecoder:
|
|
|
70
95
|
yield Align(self.bytes_match.bytes_hashes_table(), CENTER, style='dim')
|
|
71
96
|
|
|
72
97
|
def _build_decodings_table(self, suppress_decodes: bool = False) -> Table:
|
|
73
|
-
"""
|
|
98
|
+
"""
|
|
99
|
+
First rows are the raw / hex views of the bytes, next rows are the attempted decodings.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
suppress_decodes (bool, optional): If `True` don't add decoding attempts to the table. Defaults to `False`.
|
|
103
|
+
"""
|
|
74
104
|
self.table = new_decoding_attempts_table(self.bytes_match)
|
|
75
105
|
|
|
76
106
|
# Add the encoding rows to the table if not suppressed
|
|
@@ -102,20 +132,20 @@ class BytesDecoder:
|
|
|
102
132
|
return self._undecoded_assessments(self.encoding_detector.force_display_assessments)
|
|
103
133
|
|
|
104
134
|
def _undecoded_assessments(self, assessments: List[EncodingAssessment]) -> List[EncodingAssessment]:
|
|
105
|
-
"""Filter out the already decoded assessments from a set of assessments"""
|
|
135
|
+
"""Filter out the already decoded assessments from a set of assessments."""
|
|
106
136
|
return [a for a in assessments if not self._was_decoded(a.encoding)]
|
|
107
137
|
|
|
108
138
|
def _was_decoded(self, encoding: str) -> bool:
|
|
109
|
-
"""Check whether a given encoding is in the table already"""
|
|
139
|
+
"""Check whether a given encoding is in the table already."""
|
|
110
140
|
return any(row.encoding == encoding for row in self.decodings)
|
|
111
141
|
|
|
112
142
|
def _decode_attempt_subheading(self) -> Panel:
|
|
113
|
-
"""Generate a rich.Panel for displaying decode attempts"""
|
|
143
|
+
"""Generate a rich.Panel for displaying decode attempts."""
|
|
114
144
|
headline = Text(f"Found ", style='decode.subheading') + self.bytes_match.__rich__()
|
|
115
145
|
return Panel(headline, style='decode.subheading', expand=False)
|
|
116
146
|
|
|
117
147
|
def _track_decode_stats(self) -> None:
|
|
118
|
-
"""Track stats about successful vs. forced vs. failed decode attempts"""
|
|
148
|
+
"""Track stats about successful vs. forced vs. failed decode attempts."""
|
|
119
149
|
for decoding in self.decodings:
|
|
120
150
|
if decoding.failed_to_decode:
|
|
121
151
|
self.was_match_undecodable[decoding.encoding] += 1
|
|
@@ -127,7 +157,7 @@ class BytesDecoder:
|
|
|
127
157
|
self.was_match_force_decoded[decoding.encoding] += 1
|
|
128
158
|
|
|
129
159
|
def _row_from_decoding_attempt(self, decoding: DecodingAttempt) -> DecodingTableRow:
|
|
130
|
-
"""Create a DecodingAttemptTable row from a DecodingAttempt
|
|
160
|
+
"""Create a `DecodingAttemptTable` row from a `DecodingAttempt`."""
|
|
131
161
|
assessment = self.encoding_detector.get_encoding_assessment(decoding.encoding)
|
|
132
162
|
|
|
133
163
|
# If the decoding can have a start offset add an appropriate extension to the encoding label
|
|
@@ -162,7 +192,7 @@ class BytesDecoder:
|
|
|
162
192
|
|
|
163
193
|
|
|
164
194
|
def _build_encodings_metric_dict():
|
|
165
|
-
"""One key for each key in ENCODINGS_TO_ATTEMPT
|
|
195
|
+
"""One key for each key in `ENCODINGS_TO_ATTEMPT`, values are all 0."""
|
|
166
196
|
metrics_dict = defaultdict(lambda: 0)
|
|
167
197
|
|
|
168
198
|
for encoding in ENCODINGS_TO_ATTEMPT.keys():
|