yaralyzer 1.0.6__tar.gz → 1.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/CHANGELOG.md +10 -0
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/PKG-INFO +12 -7
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/README.md +3 -0
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/pyproject.toml +65 -24
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/__init__.py +5 -2
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/bytes_match.py +145 -52
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/config.py +18 -6
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/decoding/bytes_decoder.py +34 -15
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/decoding/decoding_attempt.py +10 -9
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/character_encodings.py +40 -40
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/encoding_assessment.py +10 -4
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/encoding_detection/encoding_detector.py +17 -13
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/bytes_helper.py +113 -16
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/dict_helper.py +1 -2
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/file_helper.py +3 -3
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/list_helper.py +1 -0
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/rich_text_helper.py +13 -11
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/helpers/string_helper.py +1 -1
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/file_export.py +2 -1
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/file_hashes_table.py +34 -6
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/regex_match_metrics.py +13 -10
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/rich_console.py +18 -3
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/util/argument_parser.py +11 -10
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/util/logging.py +6 -6
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yara/yara_match.py +1 -1
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yara/yara_rule_builder.py +16 -17
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/yaralyzer.py +66 -51
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/.yaralyzer.example +0 -0
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/LICENSE +0 -0
- {yaralyzer-1.0.6 → yaralyzer-1.0.8}/yaralyzer/output/decoding_attempts_table.py +0 -0
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 1.0.8
|
|
4
|
+
* Bump `python-dotenv` to v1.1.1
|
|
5
|
+
* Use `mkdocs` and `lazydocs` to build automatic API documentation at https://michelcrypt4d4mus.github.io/yaralyzer/
|
|
6
|
+
* Drop python 3.9 support (required by `mkdocs-awesome-nav` package)
|
|
7
|
+
|
|
8
|
+
### 1.0.7
|
|
9
|
+
* Add `Changelog` to PyPi URLs, add some more PyPi classifiers
|
|
10
|
+
* Add `.flake8` config file and fix style errors
|
|
11
|
+
* Rename `prefix_with_plain_text_obj()` to `prefix_with_style()`
|
|
12
|
+
|
|
3
13
|
### 1.0.6
|
|
4
14
|
* Add `Environment :: Console` and `Programming Language :: Python` to PyPi classifiers
|
|
5
15
|
* Add `LICENSE` to PyPi package
|
|
@@ -1,31 +1,33 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 1.0.
|
|
4
|
-
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream
|
|
3
|
+
Version: 1.0.8
|
|
4
|
+
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
7
|
-
Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,visualization,yara
|
|
7
|
+
Keywords: ascii art,binary,character encoding,color,cybersecurity,data visualization,decode,DFIR,encoding,infosec,maldoc,malicious,malware,malware analysis,regex,regular expressions,reverse engineering,reversing,security,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
|
|
8
8
|
Author: Michel de Cryptadamus
|
|
9
9
|
Author-email: michel@cryptadamus.com
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
14
14
|
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
15
15
|
Classifier: Programming Language :: Python
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
21
|
Classifier: Topic :: Artistic Software
|
|
21
22
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
22
23
|
Classifier: Topic :: Security
|
|
23
24
|
Requires-Dist: chardet (>=5.0.0,<6.0.0)
|
|
24
|
-
Requires-Dist: python-dotenv (>=
|
|
25
|
+
Requires-Dist: python-dotenv (>=1.1.1,<2.0.0)
|
|
25
26
|
Requires-Dist: rich (>=14.1.0,<15.0.0)
|
|
26
27
|
Requires-Dist: rich-argparse-plus (>=0.3.1,<0.4.0)
|
|
27
28
|
Requires-Dist: yara-python (>=4.5.4,<5.0.0)
|
|
28
|
-
Project-URL:
|
|
29
|
+
Project-URL: Changelog, https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md
|
|
30
|
+
Project-URL: Documentation, https://michelcrypt4d4mus.github.io/yaralyzer/
|
|
29
31
|
Project-URL: Repository, https://github.com/michelcrypt4d4mus/yaralyzer
|
|
30
32
|
Description-Content-Type: text/markdown
|
|
31
33
|
|
|
@@ -117,6 +119,9 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
117
119
|
do_stuff()
|
|
118
120
|
```
|
|
119
121
|
|
|
122
|
+
#### API Documentation
|
|
123
|
+
Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
|
|
124
|
+
|
|
120
125
|
# Example Output
|
|
121
126
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
122
127
|
|
|
@@ -86,6 +86,9 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
86
86
|
do_stuff()
|
|
87
87
|
```
|
|
88
88
|
|
|
89
|
+
#### API Documentation
|
|
90
|
+
Auto generated documentation for Yaralyzer's various classes and methods can be found [here](https://michelcrypt4d4mus.github.io/yaralyzer/).
|
|
91
|
+
|
|
89
92
|
# Example Output
|
|
90
93
|
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
91
94
|
|
|
@@ -1,13 +1,35 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "yaralyzer"
|
|
3
|
-
version = "1.0.
|
|
4
|
-
description = "Visualize and force decode YARA and regex matches found in a file or byte stream
|
|
3
|
+
version = "1.0.8"
|
|
4
|
+
description = "Visualize and force decode YARA and regex matches found in a file or byte stream with colors. Lots of colors."
|
|
5
5
|
authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
license = "GPL-3.0-or-later"
|
|
8
|
+
|
|
8
9
|
homepage = "https://github.com/michelcrypt4d4mus/yaralyzer"
|
|
9
10
|
repository = "https://github.com/michelcrypt4d4mus/yaralyzer"
|
|
10
|
-
documentation = "https://github.
|
|
11
|
+
documentation = "https://michelcrypt4d4mus.github.io/yaralyzer/"
|
|
12
|
+
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 5 - Production/Stable",
|
|
15
|
+
"Environment :: Console",
|
|
16
|
+
"Intended Audience :: Information Technology",
|
|
17
|
+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
18
|
+
"Programming Language :: Python",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Artistic Software",
|
|
24
|
+
"Topic :: Security",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Visualization",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
include = [
|
|
29
|
+
"CHANGELOG.md",
|
|
30
|
+
"LICENSE",
|
|
31
|
+
".yaralyzer.example"
|
|
32
|
+
]
|
|
11
33
|
|
|
12
34
|
keywords = [
|
|
13
35
|
"ascii art",
|
|
@@ -33,46 +55,65 @@ keywords = [
|
|
|
33
55
|
"threat hunting",
|
|
34
56
|
"threat intelligence",
|
|
35
57
|
"threat research",
|
|
58
|
+
"threatintel",
|
|
36
59
|
"visualization",
|
|
37
60
|
"yara",
|
|
38
61
|
]
|
|
39
62
|
|
|
40
|
-
classifiers = [
|
|
41
|
-
"Development Status :: 5 - Production/Stable",
|
|
42
|
-
"Environment :: Console",
|
|
43
|
-
"Intended Audience :: Information Technology",
|
|
44
|
-
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
45
|
-
"Programming Language :: Python",
|
|
46
|
-
"Topic :: Artistic Software",
|
|
47
|
-
"Topic :: Security",
|
|
48
|
-
"Topic :: Scientific/Engineering :: Visualization",
|
|
49
|
-
]
|
|
50
|
-
|
|
51
|
-
include = [
|
|
52
|
-
"CHANGELOG.md",
|
|
53
|
-
"LICENSE",
|
|
54
|
-
".yaralyzer.example"
|
|
55
|
-
]
|
|
56
|
-
|
|
57
63
|
|
|
64
|
+
####################
|
|
65
|
+
# Dependencies #
|
|
66
|
+
####################
|
|
58
67
|
[tool.poetry.dependencies]
|
|
59
|
-
python = "^3.
|
|
68
|
+
python = "^3.10"
|
|
60
69
|
chardet = ">=5.0.0,<6.0.0"
|
|
61
|
-
|
|
62
|
-
python-dotenv = "^0.21.0"
|
|
70
|
+
python-dotenv = "^1.1.1"
|
|
63
71
|
rich = "^14.1.0"
|
|
64
72
|
rich-argparse-plus = "^0.3.1"
|
|
65
73
|
yara-python = "^4.5.4"
|
|
74
|
+
#plyara = "^2.1.1" # TODO: use plyara for YARA rule parsing and validation
|
|
66
75
|
|
|
67
76
|
[tool.poetry.group.dev.dependencies]
|
|
77
|
+
flake8 = "^7.3.0"
|
|
78
|
+
lazydocs = "^0.4.8"
|
|
79
|
+
mkdocs = "^1.6.1"
|
|
80
|
+
mkdocs-awesome-nav = "^3.1.2"
|
|
81
|
+
mkdocs-include-markdown-plugin = "^7.1.7"
|
|
82
|
+
mkdocs-material = "^9.6.19"
|
|
83
|
+
pydocstyle = "^6.3.0"
|
|
68
84
|
pytest = "^7.1.3"
|
|
69
85
|
|
|
70
86
|
|
|
87
|
+
#############
|
|
88
|
+
# Scripts #
|
|
89
|
+
#############
|
|
71
90
|
[tool.poetry.scripts]
|
|
72
91
|
yaralyze = 'yaralyzer:yaralyze'
|
|
73
92
|
yaralyzer_show_color_theme = 'yaralyzer.helpers.rich_text_helper:yaralyzer_show_color_theme'
|
|
74
93
|
|
|
75
94
|
|
|
95
|
+
###############
|
|
96
|
+
# PyPi URLs #
|
|
97
|
+
###############
|
|
98
|
+
[tool.poetry.urls]
|
|
99
|
+
Changelog = "https://github.com/michelcrypt4d4mus/yaralyzer/blob/master/CHANGELOG.md"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
#################
|
|
103
|
+
# Build Stuff #
|
|
104
|
+
#################
|
|
76
105
|
[build-system]
|
|
77
|
-
requires = ["poetry-core"]
|
|
78
106
|
build-backend = "poetry.core.masonry.api"
|
|
107
|
+
requires = ["poetry-core"]
|
|
108
|
+
|
|
109
|
+
[tool.pydocstyle]
|
|
110
|
+
match-dir = "yaralyzer"
|
|
111
|
+
ignore = [
|
|
112
|
+
"D200", # One-line docstring should fit on one line with quotes (found 3)
|
|
113
|
+
"D203", # 1 blank line required before class docstring"
|
|
114
|
+
"D212", # Multi-line docstring summary should start at the first line
|
|
115
|
+
"D401", # First line should be in imperative mood"
|
|
116
|
+
"D406", # Section name should end with a newline
|
|
117
|
+
"D407", # Missing dashed underline after section
|
|
118
|
+
"D413", # Missing blank line after last section
|
|
119
|
+
]
|
|
@@ -11,11 +11,9 @@ if not environ.get('INVOKED_BY_PYTEST', False):
|
|
|
11
11
|
load_dotenv(dotenv_path=dotenv_file)
|
|
12
12
|
break
|
|
13
13
|
|
|
14
|
-
from yaralyzer.config import YaralyzerConfig
|
|
15
14
|
from yaralyzer.output.file_export import export_json, invoke_rich_export
|
|
16
15
|
from yaralyzer.output.rich_console import console
|
|
17
16
|
from yaralyzer.util.argument_parser import get_export_basepath, parse_arguments
|
|
18
|
-
from yaralyzer.util.logging import log
|
|
19
17
|
from yaralyzer.yara.yara_rule_builder import HEX, REGEX
|
|
20
18
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
21
19
|
|
|
@@ -26,6 +24,11 @@ PDFALYZER_MSG_TXT.append('https://github.com/michelcrypt4d4mus/pdfalyzer\n', sty
|
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
def yaralyze():
|
|
27
|
+
"""
|
|
28
|
+
Entry point for yaralyzer when invoked as a script.
|
|
29
|
+
|
|
30
|
+
Args are parsed from the command line and environment variables. See yaralyzer --help for details.
|
|
31
|
+
"""
|
|
29
32
|
args = parse_arguments()
|
|
30
33
|
output_basepath = None
|
|
31
34
|
|
|
@@ -1,10 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Simple class to keep track of regex matches against binary data. Basically an re.match object with
|
|
3
|
-
some (not many) extra bells and whistles, most notably the surrounding_bytes property.
|
|
4
|
-
|
|
5
|
-
pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
|
|
6
|
-
e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
|
|
7
|
-
"""
|
|
1
|
+
"""BytesMatch class for tracking regex and YARA matches against binary data."""
|
|
8
2
|
import re
|
|
9
3
|
from typing import Iterator, Optional
|
|
10
4
|
|
|
@@ -13,25 +7,43 @@ from rich.text import Text
|
|
|
13
7
|
from yara import StringMatch, StringMatchInstance
|
|
14
8
|
|
|
15
9
|
from yaralyzer.config import YaralyzerConfig
|
|
16
|
-
from yaralyzer.helpers.rich_text_helper import
|
|
10
|
+
from yaralyzer.helpers.rich_text_helper import prefix_with_style
|
|
17
11
|
from yaralyzer.output.file_hashes_table import bytes_hashes_table
|
|
18
12
|
from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
|
|
19
13
|
|
|
20
14
|
|
|
21
15
|
class BytesMatch:
|
|
16
|
+
"""
|
|
17
|
+
Simple class to keep track of regex matches against binary data.
|
|
18
|
+
|
|
19
|
+
Basically an re.match object with some (not many) extra bells and whistles, most notably
|
|
20
|
+
the surrounding_bytes property.
|
|
21
|
+
|
|
22
|
+
pre_capture_len and post_capture_len refer to the regex sections before and after the capture group,
|
|
23
|
+
e.g. a regex like '123(.*)x:' would have pre_capture_len of 3 and post_capture_len of 2.
|
|
24
|
+
"""
|
|
25
|
+
|
|
22
26
|
def __init__(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
self,
|
|
28
|
+
matched_against: bytes,
|
|
29
|
+
start_idx: int,
|
|
30
|
+
length: int,
|
|
31
|
+
label: str,
|
|
32
|
+
ordinal: int,
|
|
33
|
+
match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
|
|
34
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
35
|
+
) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Initialize a BytesMatch object representing a match against binary data.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
matched_against (bytes): The full byte sequence that was searched.
|
|
41
|
+
start_idx (int): Start index of the match in the byte sequence.
|
|
42
|
+
length (int): Length of the match in bytes.
|
|
43
|
+
label (str): Label for the match (e.g., regex or YARA rule name).
|
|
44
|
+
ordinal (int): The Nth match for this pattern.
|
|
45
|
+
match (Optional[re.Match]): Regex match object, if available.
|
|
46
|
+
highlight_style (str): Style to use for highlighting the match.
|
|
35
47
|
"""
|
|
36
48
|
self.matched_against: bytes = matched_against
|
|
37
49
|
self.start_idx: int = start_idx
|
|
@@ -52,25 +64,50 @@ class BytesMatch:
|
|
|
52
64
|
|
|
53
65
|
@classmethod
|
|
54
66
|
def from_regex_match(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
67
|
+
cls,
|
|
68
|
+
matched_against: bytes,
|
|
69
|
+
match: re.Match,
|
|
70
|
+
ordinal: int,
|
|
71
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
72
|
+
) -> 'BytesMatch':
|
|
73
|
+
"""
|
|
74
|
+
Create a BytesMatch from a regex match object.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
matched_against (bytes): The bytes searched.
|
|
78
|
+
match (re.Match): The regex match object.
|
|
79
|
+
ordinal (int): The Nth match for this pattern.
|
|
80
|
+
highlight_style (str): Style for highlighting.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
BytesMatch: The constructed BytesMatch instance.
|
|
84
|
+
"""
|
|
61
85
|
return cls(matched_against, match.start(), len(match[0]), match.re.pattern, ordinal, match, highlight_style)
|
|
62
86
|
|
|
63
87
|
@classmethod
|
|
64
88
|
def from_yara_str(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
"""
|
|
89
|
+
cls,
|
|
90
|
+
matched_against: bytes,
|
|
91
|
+
rule_name: str,
|
|
92
|
+
yara_str_match: StringMatch,
|
|
93
|
+
yara_str_match_instance: StringMatchInstance,
|
|
94
|
+
ordinal: int,
|
|
95
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
96
|
+
) -> 'BytesMatch':
|
|
97
|
+
"""
|
|
98
|
+
Build a BytesMatch from a YARA string match instance.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
matched_against (bytes): The bytes searched.
|
|
102
|
+
rule_name (str): Name of the YARA rule.
|
|
103
|
+
yara_str_match (StringMatch): YARA string match object.
|
|
104
|
+
yara_str_match_instance (StringMatchInstance): Instance of the string match.
|
|
105
|
+
ordinal (int): The Nth match for this pattern.
|
|
106
|
+
highlight_style (str): Style for highlighting.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
BytesMatch: The constructed BytesMatch instance.
|
|
110
|
+
"""
|
|
74
111
|
pattern_label = yara_str_match.identifier
|
|
75
112
|
|
|
76
113
|
# Don't duplicate the labeling if rule_name and yara_str are the same
|
|
@@ -89,12 +126,22 @@ class BytesMatch:
|
|
|
89
126
|
|
|
90
127
|
@classmethod
|
|
91
128
|
def from_yara_match(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
"""
|
|
129
|
+
cls,
|
|
130
|
+
matched_against: bytes,
|
|
131
|
+
yara_match: dict,
|
|
132
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
133
|
+
) -> Iterator['BytesMatch']:
|
|
134
|
+
"""
|
|
135
|
+
Yield a BytesMatch for each string returned as part of a YARA match result dict.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
matched_against (bytes): The bytes searched.
|
|
139
|
+
yara_match (dict): YARA match result dictionary.
|
|
140
|
+
highlight_style (str): Style for highlighting.
|
|
141
|
+
|
|
142
|
+
Yields:
|
|
143
|
+
BytesMatch: For each string match in the YARA result.
|
|
144
|
+
"""
|
|
98
145
|
i = 0 # For numbered labeling
|
|
99
146
|
|
|
100
147
|
# yara-python's internals changed with 4.3.0: https://github.com/VirusTotal/yara-python/releases/tag/v4.3.0
|
|
@@ -102,28 +149,43 @@ class BytesMatch:
|
|
|
102
149
|
for yara_str_match_instance in yara_str_match.instances:
|
|
103
150
|
i += 1
|
|
104
151
|
|
|
105
|
-
yield
|
|
152
|
+
yield cls.from_yara_str(
|
|
106
153
|
matched_against,
|
|
107
154
|
yara_match['rule'],
|
|
108
155
|
yara_str_match,
|
|
109
156
|
yara_str_match_instance,
|
|
110
157
|
i,
|
|
111
|
-
highlight_style
|
|
158
|
+
highlight_style
|
|
159
|
+
)
|
|
112
160
|
|
|
113
161
|
def style_at_position(self, idx) -> str:
|
|
114
|
-
"""
|
|
162
|
+
"""
|
|
163
|
+
Get the style for the byte at position idx within the matched bytes.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
idx (int): Index within the surrounding bytes.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
str: The style to use for this byte (highlight or greyed out).
|
|
170
|
+
"""
|
|
115
171
|
if idx < self.highlight_start_idx or idx >= self.highlight_end_idx:
|
|
116
172
|
return GREY_ADDRESS
|
|
117
173
|
else:
|
|
118
174
|
return self.highlight_style
|
|
119
175
|
|
|
120
176
|
def location(self) -> Text:
|
|
121
|
-
"""
|
|
122
|
-
|
|
177
|
+
"""
|
|
178
|
+
Get a styled Text object describing the start and end index of the match.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Text: Rich Text object like '(start idx: 348190, end idx: 348228)'.
|
|
182
|
+
"""
|
|
183
|
+
location_txt = prefix_with_style(
|
|
123
184
|
f"(start idx: ",
|
|
124
185
|
style='off_white',
|
|
125
186
|
root_style='decode.subheading'
|
|
126
187
|
)
|
|
188
|
+
|
|
127
189
|
location_txt.append(str(self.start_idx), style='number')
|
|
128
190
|
location_txt.append(', end idx: ', style='off_white')
|
|
129
191
|
location_txt.append(str(self.end_idx), style='number')
|
|
@@ -131,13 +193,26 @@ class BytesMatch:
|
|
|
131
193
|
return location_txt
|
|
132
194
|
|
|
133
195
|
def is_decodable(self) -> bool:
|
|
134
|
-
"""
|
|
196
|
+
"""
|
|
197
|
+
Determine if the matched bytes should be decoded.
|
|
198
|
+
|
|
199
|
+
Whether the bytes are decodable depends on whether SUPPRESS_DECODES_TABLE is set
|
|
200
|
+
and whether the match length is between MIN/MAX_DECODE_LENGTH.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
bool: True if decodable, False otherwise.
|
|
204
|
+
"""
|
|
135
205
|
return self.match_length >= YaralyzerConfig.args.min_decode_length \
|
|
136
206
|
and self.match_length <= YaralyzerConfig.args.max_decode_length \
|
|
137
207
|
and not YaralyzerConfig.args.suppress_decodes_table
|
|
138
208
|
|
|
139
209
|
def bytes_hashes_table(self) -> Table:
|
|
140
|
-
"""
|
|
210
|
+
"""
|
|
211
|
+
Build a table of MD5/SHA hashes for the matched bytes.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Table: Rich Table object with hashes.
|
|
215
|
+
"""
|
|
141
216
|
return bytes_hashes_table(
|
|
142
217
|
self.bytes,
|
|
143
218
|
self.location().plain,
|
|
@@ -145,7 +220,12 @@ class BytesMatch:
|
|
|
145
220
|
)
|
|
146
221
|
|
|
147
222
|
def suppression_notice(self) -> Text:
|
|
148
|
-
"""
|
|
223
|
+
"""
|
|
224
|
+
Generate a message for when the match is too short or too long to decode.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Text: Rich Text object with the suppression notice.
|
|
228
|
+
"""
|
|
149
229
|
txt = self.__rich__()
|
|
150
230
|
|
|
151
231
|
if self.match_length < YaralyzerConfig.args.min_decode_length:
|
|
@@ -157,7 +237,12 @@ class BytesMatch:
|
|
|
157
237
|
return txt
|
|
158
238
|
|
|
159
239
|
def to_json(self) -> dict:
|
|
160
|
-
"""
|
|
240
|
+
"""
|
|
241
|
+
Convert this BytesMatch to a JSON-serializable dictionary.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
dict: Dictionary representation of the match, suitable for JSON serialization.
|
|
245
|
+
"""
|
|
161
246
|
json_dict = {
|
|
162
247
|
'label': self.label,
|
|
163
248
|
'match_length': self.match_length,
|
|
@@ -176,7 +261,13 @@ class BytesMatch:
|
|
|
176
261
|
return json_dict
|
|
177
262
|
|
|
178
263
|
def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
|
|
179
|
-
"""
|
|
264
|
+
"""
|
|
265
|
+
Find and set the bytes surrounding the match, ensuring indices stay within bounds.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
num_before (Optional[int]): Number of bytes before the match to include.
|
|
269
|
+
num_after (Optional[int]): Number of bytes after the match to include.
|
|
270
|
+
"""
|
|
180
271
|
num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
|
|
181
272
|
num_before = num_before or YaralyzerConfig.args.surrounding_bytes
|
|
182
273
|
self.surrounding_start_idx: int = max(self.start_idx - num_before, 0)
|
|
@@ -184,11 +275,13 @@ class BytesMatch:
|
|
|
184
275
|
self.surrounding_bytes: bytes = self.matched_against[self.surrounding_start_idx:self.surrounding_end_idx]
|
|
185
276
|
|
|
186
277
|
def __rich__(self) -> Text:
|
|
187
|
-
|
|
278
|
+
"""Get a rich Text representation of the match for display."""
|
|
279
|
+
headline = prefix_with_style(str(self.match_length), style='number', root_style='decode.subheading')
|
|
188
280
|
headline.append(f" bytes matching ")
|
|
189
281
|
headline.append(f"{self.label} ", style=ALERT_STYLE if self.highlight_style == ALERT_STYLE else 'regex')
|
|
190
282
|
headline.append('at ')
|
|
191
283
|
return headline + self.location()
|
|
192
284
|
|
|
193
285
|
def __str__(self):
|
|
286
|
+
"""Plain text (no rich colors) representation of the match for display."""
|
|
194
287
|
return self.__rich__().plain
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration management for Yaralyzer.
|
|
3
|
+
"""
|
|
1
4
|
import logging
|
|
2
5
|
from argparse import ArgumentParser, Namespace
|
|
3
6
|
from os import environ
|
|
@@ -15,16 +18,19 @@ MEGABYTE = 1024 * KILOBYTE
|
|
|
15
18
|
|
|
16
19
|
def config_var_name(env_var: str) -> str:
|
|
17
20
|
"""
|
|
18
|
-
Get the name of env_var and strip off 'YARALYZER_'
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
Get the name of env_var and strip off 'YARALYZER_' prefix.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
$ SURROUNDING_BYTES_ENV_VAR = 'YARALYZER_SURROUNDING_BYTES'
|
|
25
|
+
$ config_var_name(SURROUNDING_BYTES_ENV_VAR) => 'SURROUNDING_BYTES'
|
|
26
|
+
|
|
21
27
|
"""
|
|
22
28
|
env_var = env_var.removeprefix("YARALYZER_")
|
|
23
29
|
return f'{env_var=}'.partition('=')[0]
|
|
24
30
|
|
|
25
31
|
|
|
26
32
|
def is_env_var_set_and_not_false(var_name):
|
|
27
|
-
"""
|
|
33
|
+
"""Return True if var_name is not empty and set to anything other than 'false' (capitalization agnostic)."""
|
|
28
34
|
if var_name in environ:
|
|
29
35
|
var_value = environ[var_name]
|
|
30
36
|
return var_value is not None and len(var_value) > 0 and var_value.lower() != 'false'
|
|
@@ -33,11 +39,13 @@ def is_env_var_set_and_not_false(var_name):
|
|
|
33
39
|
|
|
34
40
|
|
|
35
41
|
def is_invoked_by_pytest():
|
|
36
|
-
"""Return true if pytest is running"""
|
|
42
|
+
"""Return true if pytest is running."""
|
|
37
43
|
return is_env_var_set_and_not_false(PYTEST_FLAG)
|
|
38
44
|
|
|
39
45
|
|
|
40
46
|
class YaralyzerConfig:
|
|
47
|
+
"""Handles parsing of command line args and environment variables for Yaralyzer."""
|
|
48
|
+
|
|
41
49
|
# Passed through to yara.set_config()
|
|
42
50
|
DEFAULT_MAX_MATCH_LENGTH = 100 * KILOBYTE
|
|
43
51
|
DEFAULT_YARA_STACK_SIZE = 2 * 65536
|
|
@@ -76,11 +84,13 @@ class YaralyzerConfig:
|
|
|
76
84
|
|
|
77
85
|
@classmethod
|
|
78
86
|
def set_argument_parser(cls, parser: ArgumentParser) -> None:
|
|
87
|
+
"""Sets the _argument_parser instance variable that will be used to parse command line args."""
|
|
79
88
|
cls._argument_parser: ArgumentParser = parser
|
|
80
89
|
cls._argparse_keys: List[str] = sorted([action.dest for action in parser._actions])
|
|
81
90
|
|
|
82
91
|
@classmethod
|
|
83
92
|
def set_args(cls, args: Namespace) -> None:
|
|
93
|
+
"""Set the args class instance variable and update args with any environment variable overrides."""
|
|
84
94
|
cls.args = args
|
|
85
95
|
|
|
86
96
|
for option in cls._argparse_keys:
|
|
@@ -91,7 +101,7 @@ class YaralyzerConfig:
|
|
|
91
101
|
env_var = f"{YARALYZER}_{option.upper()}"
|
|
92
102
|
env_value = environ.get(env_var)
|
|
93
103
|
default_value = cls.get_default_arg(option)
|
|
94
|
-
#print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}")
|
|
104
|
+
# print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}") # noqa: E501
|
|
95
105
|
|
|
96
106
|
# TODO: as is you can't override env vars with CLI args
|
|
97
107
|
if isinstance(arg_value, bool):
|
|
@@ -105,9 +115,11 @@ class YaralyzerConfig:
|
|
|
105
115
|
|
|
106
116
|
@classmethod
|
|
107
117
|
def set_default_args(cls):
|
|
118
|
+
"""Set args to their defaults as if parsed from the command line."""
|
|
108
119
|
cls.set_args(cls._argument_parser.parse_args(['dummy']))
|
|
109
120
|
|
|
110
121
|
@classmethod
|
|
111
122
|
def get_default_arg(cls, arg: str) -> Any:
|
|
123
|
+
"""Return the default value for arg as defined by a DEFAULT_ style class variable."""
|
|
112
124
|
default_var = f"DEFAULT_{arg.upper()}"
|
|
113
125
|
return vars(cls).get(default_var)
|