yaralyzer 0.9.6__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- .yaralyzer.example +5 -1
- CHANGELOG.md +9 -0
- yaralyzer/__init__.py +4 -1
- yaralyzer/bytes_match.py +20 -2
- yaralyzer/config.py +5 -0
- yaralyzer/decoding/bytes_decoder.py +44 -39
- yaralyzer/decoding/decoding_attempt.py +41 -24
- yaralyzer/encoding_detection/character_encodings.py +36 -9
- yaralyzer/encoding_detection/encoding_assessment.py +16 -14
- yaralyzer/encoding_detection/encoding_detector.py +2 -2
- yaralyzer/helpers/bytes_helper.py +16 -1
- yaralyzer/helpers/dict_helper.py +1 -1
- yaralyzer/helpers/list_helper.py +15 -0
- yaralyzer/helpers/rich_text_helper.py +1 -2
- yaralyzer/output/decoding_attempts_table.py +18 -15
- yaralyzer/output/file_export.py +20 -0
- yaralyzer/output/rich_console.py +1 -0
- yaralyzer/util/argument_parser.py +9 -1
- yaralyzer/util/logging.py +0 -1
- yaralyzer/yara/yara_match.py +0 -1
- {yaralyzer-0.9.6.dist-info → yaralyzer-1.0.1.dist-info}/METADATA +2 -2
- yaralyzer-1.0.1.dist-info/RECORD +31 -0
- yaralyzer-0.9.6.dist-info/RECORD +0 -30
- {yaralyzer-0.9.6.dist-info → yaralyzer-1.0.1.dist-info}/LICENSE +0 -0
- {yaralyzer-0.9.6.dist-info → yaralyzer-1.0.1.dist-info}/WHEEL +0 -0
- {yaralyzer-0.9.6.dist-info → yaralyzer-1.0.1.dist-info}/entry_points.txt +0 -0
.yaralyzer.example
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# YARALYZER_STACK_SIZE=10485760
|
|
18
18
|
# YARALYZER_MAX_MATCH_LENGTH=10737418240
|
|
19
19
|
|
|
20
|
-
# Suppress all
|
|
20
|
+
# Suppress all attempts to decode bytes into various text encodings
|
|
21
21
|
# YARALYZER_SUPPRESS_DECODES_TABLE=False
|
|
22
22
|
|
|
23
23
|
# Suppress the display of the table showing the the encoding assessments given by `chardet.detect()`
|
|
@@ -30,6 +30,8 @@
|
|
|
30
30
|
# Configure how many bytes before and after any binary data should be included in scans and visualizations
|
|
31
31
|
# YARALYZER_SURROUNDING_BYTES=64
|
|
32
32
|
|
|
33
|
+
|
|
34
|
+
|
|
33
35
|
# Size thresholds (in bytes) under/over which yaralyzer will NOT make attempts to decode a match.
|
|
34
36
|
# Longer byte sequences are for obvious reasons slower to decode by force.
|
|
35
37
|
# It may feel counterintuitive but larger chunks of random binary are also harder to examine and
|
|
@@ -45,6 +47,8 @@
|
|
|
45
47
|
# Minimum bytes to run chardet.detect() on a sequence of bytes
|
|
46
48
|
# YARALYZER_MIN_BYTES_TO_DETECT_ENCODING
|
|
47
49
|
|
|
50
|
+
|
|
51
|
+
|
|
48
52
|
# Directory to write application logs to. Must be an absolute path, not a relative one.
|
|
49
53
|
# These logs are not normally written to a file and the default log level means that the standard behavior
|
|
50
54
|
# is to more or less discard them. Be aware that if you configure this variable a few things will change:
|
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 1.0.1
|
|
4
|
+
* Fix iteration of byte offsets during attempted decodes for UTF-16 and UTF-32 (was starting at second byte instead of first)
|
|
5
|
+
* Label the byte offset for forced UTF-16 and UTF-32 decodes
|
|
6
|
+
* Show helpful message if logs are being sent to files in `YaralyzerConfig.LOG_DIR` instead of being written to stderr/stdout
|
|
7
|
+
* Warn if `--debug` and `--log-level` args both provided
|
|
8
|
+
|
|
9
|
+
# 1.0.0
|
|
10
|
+
* Add `--export-json` option
|
|
11
|
+
|
|
3
12
|
### 0.9.6
|
|
4
13
|
* Fix help message
|
|
5
14
|
|
yaralyzer/__init__.py
CHANGED
|
@@ -12,7 +12,7 @@ if not environ.get('INVOKED_BY_PYTEST', False):
|
|
|
12
12
|
break
|
|
13
13
|
|
|
14
14
|
from yaralyzer.config import YaralyzerConfig
|
|
15
|
-
from yaralyzer.output.file_export import invoke_rich_export
|
|
15
|
+
from yaralyzer.output.file_export import export_json, invoke_rich_export
|
|
16
16
|
from yaralyzer.output.rich_console import console
|
|
17
17
|
from yaralyzer.util.argument_parser import get_export_basepath, parse_arguments
|
|
18
18
|
from yaralyzer.util.logging import log
|
|
@@ -59,6 +59,9 @@ def yaralyze():
|
|
|
59
59
|
if args.export_svg:
|
|
60
60
|
invoke_rich_export(console.save_svg, output_basepath)
|
|
61
61
|
|
|
62
|
+
if args.export_json:
|
|
63
|
+
export_json(yaralyzer, output_basepath)
|
|
64
|
+
|
|
62
65
|
if args.file_to_scan_path.endswith('.pdf'):
|
|
63
66
|
console.print(PDFALYZER_MSG_TXT)
|
|
64
67
|
|
yaralyzer/bytes_match.py
CHANGED
|
@@ -14,9 +14,8 @@ from yara import StringMatch, StringMatchInstance
|
|
|
14
14
|
|
|
15
15
|
from yaralyzer.config import YaralyzerConfig
|
|
16
16
|
from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
|
|
17
|
-
from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
|
|
18
17
|
from yaralyzer.output.file_hashes_table import bytes_hashes_table
|
|
19
|
-
from yaralyzer.
|
|
18
|
+
from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class BytesMatch:
|
|
@@ -157,6 +156,25 @@ class BytesMatch:
|
|
|
157
156
|
|
|
158
157
|
return txt
|
|
159
158
|
|
|
159
|
+
def to_json(self) -> dict:
|
|
160
|
+
"""Convert this BytesMatch to a JSON-serializable dict."""
|
|
161
|
+
json_dict = {
|
|
162
|
+
'label': self.label,
|
|
163
|
+
'match_length': self.match_length,
|
|
164
|
+
'matched_bytes': self.bytes.hex(),
|
|
165
|
+
'ordinal': self.ordinal,
|
|
166
|
+
'start_idx': self.start_idx,
|
|
167
|
+
'end_idx': self.end_idx,
|
|
168
|
+
'surrounding_bytes': self.surrounding_bytes.hex(),
|
|
169
|
+
'surrounding_start_idx': self.surrounding_start_idx,
|
|
170
|
+
'surrounding_end_idx': self.surrounding_end_idx,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if self.match:
|
|
174
|
+
json_dict['pattern'] = self.match.re.pattern
|
|
175
|
+
|
|
176
|
+
return json_dict
|
|
177
|
+
|
|
160
178
|
def _find_surrounding_bytes(self, num_before: Optional[int] = None, num_after: Optional[int] = None) -> None:
|
|
161
179
|
"""Find the surrounding bytes, making sure not to step off the beginning or end"""
|
|
162
180
|
num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
|
yaralyzer/config.py
CHANGED
|
@@ -3,6 +3,8 @@ from argparse import ArgumentParser, Namespace
|
|
|
3
3
|
from os import environ
|
|
4
4
|
from typing import Any, List
|
|
5
5
|
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
6
8
|
YARALYZE = 'yaralyze'
|
|
7
9
|
YARALYZER = f"{YARALYZE}r".upper()
|
|
8
10
|
PYTEST_FLAG = 'INVOKED_BY_PYTEST'
|
|
@@ -56,6 +58,9 @@ class YaralyzerConfig:
|
|
|
56
58
|
LOG_LEVEL_ENV_VAR = f"{YARALYZER}_LOG_LEVEL"
|
|
57
59
|
LOG_LEVEL = logging.getLevelName(environ.get(LOG_LEVEL_ENV_VAR, 'WARN'))
|
|
58
60
|
|
|
61
|
+
if LOG_DIR and not is_invoked_by_pytest():
|
|
62
|
+
Console(color_system='256').print(f"Writing logs to '{LOG_DIR}' instead of stderr/stdout...", style='dim')
|
|
63
|
+
|
|
59
64
|
HIGHLIGHT_STYLE = 'orange1'
|
|
60
65
|
|
|
61
66
|
ONLY_CLI_ARGS = [
|
|
@@ -5,6 +5,7 @@ in the results.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from collections import defaultdict
|
|
8
|
+
from copy import deepcopy
|
|
8
9
|
from operator import attrgetter
|
|
9
10
|
from typing import List, Optional
|
|
10
11
|
|
|
@@ -13,18 +14,18 @@ from rich.console import Console, ConsoleOptions, NewLine, RenderResult
|
|
|
13
14
|
from rich.panel import Panel
|
|
14
15
|
from rich.table import Table
|
|
15
16
|
from rich.text import Text
|
|
16
|
-
from yaralyzer import bytes_match
|
|
17
17
|
|
|
18
18
|
#from yaralyzer.bytes_match import BytesMatch
|
|
19
19
|
from yaralyzer.config import YaralyzerConfig
|
|
20
20
|
from yaralyzer.decoding.decoding_attempt import DecodingAttempt
|
|
21
|
-
from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT
|
|
21
|
+
from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT, encoding_offsets
|
|
22
22
|
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
|
|
23
23
|
from yaralyzer.encoding_detection.encoding_detector import EncodingDetector
|
|
24
24
|
from yaralyzer.helpers.dict_helper import get_dict_key_by_value
|
|
25
|
+
from yaralyzer.helpers.list_helper import flatten
|
|
25
26
|
from yaralyzer.helpers.rich_text_helper import CENTER, DECODING_ERRORS_MSG, NO_DECODING_ERRORS_MSG
|
|
26
27
|
from yaralyzer.output.decoding_attempts_table import (DecodingTableRow, assessment_only_row,
|
|
27
|
-
|
|
28
|
+
decoding_table_row, new_decoding_attempts_table)
|
|
28
29
|
from yaralyzer.util.logging import log
|
|
29
30
|
|
|
30
31
|
# A 2-tuple that can be indexed by booleans of messages used in the table to show true vs. false
|
|
@@ -63,51 +64,48 @@ class BytesDecoder:
|
|
|
63
64
|
|
|
64
65
|
# In standalone mode we always print the hex/raw bytes
|
|
65
66
|
if self.bytes_match.is_decodable():
|
|
66
|
-
yield self.
|
|
67
|
+
yield self._build_decodings_table()
|
|
67
68
|
elif YaralyzerConfig.args.standalone_mode:
|
|
68
|
-
# TODO: yield self.bytes_match.suppression_notice()
|
|
69
|
-
yield self.
|
|
69
|
+
# TODO: yield self.bytes_match.suppression_notice() (i guess to show some notice that things are suppressed?)
|
|
70
|
+
yield self._build_decodings_table(True)
|
|
70
71
|
|
|
71
72
|
yield NewLine()
|
|
72
73
|
yield Align(self.bytes_match.bytes_hashes_table(), CENTER, style='dim')
|
|
73
74
|
|
|
74
|
-
def
|
|
75
|
+
def _build_decodings_table(self, suppress_decodes: bool = False) -> Table:
|
|
75
76
|
"""First rows are the raw / hex views of the bytes, next rows are the attempted decodings"""
|
|
76
|
-
self.table =
|
|
77
|
+
self.table = new_decoding_attempts_table(self.bytes_match)
|
|
77
78
|
|
|
78
|
-
if
|
|
79
|
-
|
|
79
|
+
# Add the encoding rows to the table if not suppressed
|
|
80
|
+
if not (YaralyzerConfig.args.suppress_decoding_attempts or suppress_decodes):
|
|
81
|
+
self.decodings = [DecodingAttempt(self.bytes_match, encoding) for encoding in ENCODINGS_TO_ATTEMPT]
|
|
82
|
+
# Attempt decodings we don't usually attempt if chardet is insistent enough
|
|
83
|
+
forced_decodes = self._undecoded_assessments(self.encoding_detector.force_decode_assessments)
|
|
84
|
+
self.decodings += [DecodingAttempt(self.bytes_match, a.encoding) for a in forced_decodes]
|
|
80
85
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
86
|
+
# If we still haven't decoded chardet's top choice, decode it
|
|
87
|
+
if len(self._forced_displays()) > 0 and not self._was_decoded(self._forced_displays()[0].encoding):
|
|
88
|
+
chardet_top_encoding = self._forced_displays()[0].encoding
|
|
89
|
+
log.info(f"Decoding {chardet_top_encoding} because it's chardet top choice...")
|
|
90
|
+
self.decodings.append(DecodingAttempt(self.bytes_match, chardet_top_encoding))
|
|
85
91
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
92
|
+
# Build the table rows from the decoding attempts
|
|
93
|
+
rows = [self._row_from_decoding_attempt(decoding) for decoding in self.decodings]
|
|
94
|
+
rows += [assessment_only_row(a, a.confidence * SCORE_SCALER) for a in self._forced_displays()]
|
|
95
|
+
self._track_decode_stats()
|
|
89
96
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
chardet_top_encoding = self._forced_displays()[0].encoding
|
|
93
|
-
log.info(f"Decoding {chardet_top_encoding} because it's chardet top choice...")
|
|
94
|
-
self.decodings.append(DecodingAttempt(self.bytes_match, chardet_top_encoding))
|
|
95
|
-
|
|
96
|
-
rows = [self._row_from_decoding_attempt(decoding) for decoding in self.decodings]
|
|
97
|
-
rows += [assessment_only_row(a, a.confidence * SCORE_SCALER) for a in self._forced_displays()]
|
|
98
|
-
self._track_decode_stats()
|
|
99
|
-
|
|
100
|
-
for row in sorted(rows, key=attrgetter('sort_score'), reverse=True):
|
|
101
|
-
self.table.add_row(*row[0:4])
|
|
97
|
+
for row in sorted(rows, key=attrgetter('sort_score', 'encoding_label_plain'), reverse=True):
|
|
98
|
+
self.table.add_row(*row[0:4])
|
|
102
99
|
|
|
103
100
|
return self.table
|
|
104
101
|
|
|
102
|
+
# TODO: rename this to something that makes more sense, maybe assessments_over_display_threshold()?
|
|
105
103
|
def _forced_displays(self) -> List[EncodingAssessment]:
|
|
106
|
-
"""Returns assessments over the display threshold that are not yet decoded"""
|
|
104
|
+
"""Returns assessments over the display threshold that are not yet decoded."""
|
|
107
105
|
return self._undecoded_assessments(self.encoding_detector.force_display_assessments)
|
|
108
106
|
|
|
109
107
|
def _undecoded_assessments(self, assessments: List[EncodingAssessment]) -> List[EncodingAssessment]:
|
|
110
|
-
"""
|
|
108
|
+
"""Filter out the already decoded assessments from a set of assessments"""
|
|
111
109
|
return [a for a in assessments if not self._was_decoded(a.encoding)]
|
|
112
110
|
|
|
113
111
|
def _was_decoded(self, encoding: str) -> bool:
|
|
@@ -115,7 +113,7 @@ class BytesDecoder:
|
|
|
115
113
|
return any(row.encoding == encoding for row in self.decodings)
|
|
116
114
|
|
|
117
115
|
def _decode_attempt_subheading(self) -> Panel:
|
|
118
|
-
"""Generate a rich.Panel for decode attempts"""
|
|
116
|
+
"""Generate a rich.Panel for displaying decode attempts"""
|
|
119
117
|
headline = Text(f"Found ", style='decode.subheading') + self.bytes_match.__rich__()
|
|
120
118
|
return Panel(headline, style='decode.subheading', expand=False)
|
|
121
119
|
|
|
@@ -132,26 +130,33 @@ class BytesDecoder:
|
|
|
132
130
|
self.was_match_force_decoded[decoding.encoding] += 1
|
|
133
131
|
|
|
134
132
|
def _row_from_decoding_attempt(self, decoding: DecodingAttempt) -> DecodingTableRow:
|
|
135
|
-
"""
|
|
136
|
-
Create a DecodingAttemptTable row from a DecodingAttempt.
|
|
137
|
-
If the decoding result is a duplicate of a previous decoding, replace the decoded text
|
|
138
|
-
with "same output as X" where X is the previous encoding that gave the same result.
|
|
139
|
-
"""
|
|
133
|
+
"""Create a DecodingAttemptTable row from a DecodingAttempt."""
|
|
140
134
|
assessment = self.encoding_detector.get_encoding_assessment(decoding.encoding)
|
|
135
|
+
|
|
136
|
+
# If the decoding can have a start offset add an appropriate extension to the encoding label
|
|
137
|
+
if decoding.start_offset_label:
|
|
138
|
+
if assessment.language:
|
|
139
|
+
log.warning(f"{decoding.encoding} has offset {decoding.start_offset} and language '{assessment.language}'")
|
|
140
|
+
else:
|
|
141
|
+
assessment = deepcopy(assessment)
|
|
142
|
+
assessment.set_encoding_label(decoding.start_offset_label)
|
|
143
|
+
|
|
141
144
|
plain_decoded_string = decoding.decoded_string.plain
|
|
142
145
|
sort_score = assessment.confidence * SCORE_SCALER
|
|
143
146
|
|
|
147
|
+
# If the decoding result is a duplicate of a previous decoding, replace the decoded text
|
|
148
|
+
# with "same output as X" where X is the previous encoding that gave the same result.
|
|
144
149
|
if plain_decoded_string in self.decoded_strings.values():
|
|
145
150
|
encoding_with_same_output = get_dict_key_by_value(self.decoded_strings, plain_decoded_string)
|
|
146
151
|
display_text = Text('same output as ', style='color(66) dim italic')
|
|
147
152
|
display_text.append(encoding_with_same_output, style=ENCODING).append('...', style='white')
|
|
148
153
|
else:
|
|
149
|
-
self.decoded_strings[decoding.
|
|
154
|
+
self.decoded_strings[decoding.encoding_label] = plain_decoded_string
|
|
150
155
|
display_text = decoding.decoded_string
|
|
151
156
|
|
|
152
157
|
# Set failures negative, shave off a little for forced decodes
|
|
153
158
|
if decoding.failed_to_decode:
|
|
154
|
-
sort_score = sort_score * -1 - 100
|
|
159
|
+
sort_score = (sort_score * -1) - 100
|
|
155
160
|
elif decoding.was_force_decoded:
|
|
156
161
|
sort_score -= 10
|
|
157
162
|
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Class to manage attempting to decode a chunk of bytes into strings with a given encoding.
|
|
3
|
+
"""
|
|
1
4
|
from sys import byteorder
|
|
2
5
|
from typing import Optional
|
|
3
6
|
|
|
4
7
|
from rich.markup import escape
|
|
5
|
-
from rich.panel import Panel
|
|
6
8
|
from rich.text import Text
|
|
7
9
|
|
|
8
|
-
#from yaralyzer.bytes_match import ALERT_STYLE, BytesMatch
|
|
9
10
|
from yaralyzer.encoding_detection.character_encodings import (ENCODINGS_TO_ATTEMPT, SINGLE_BYTE_ENCODINGS,
|
|
10
|
-
UTF_8,
|
|
11
|
-
from yaralyzer.helpers.bytes_helper import clean_byte_string
|
|
11
|
+
UTF_8, encoding_width, is_wide_utf)
|
|
12
|
+
from yaralyzer.helpers.bytes_helper import clean_byte_string, truncate_for_encoding
|
|
12
13
|
from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj, unprintable_byte_to_text
|
|
13
14
|
from yaralyzer.output.rich_console import ALERT_STYLE, BYTES_BRIGHTER, BYTES_BRIGHTEST, BYTES_NO_DIM, GREY_ADDRESS
|
|
14
15
|
from yaralyzer.util.logging import log
|
|
@@ -16,17 +17,27 @@ from yaralyzer.util.logging import log
|
|
|
16
17
|
|
|
17
18
|
class DecodingAttempt:
|
|
18
19
|
def __init__(self, bytes_match: 'BytesMatch', encoding: str) -> None:
|
|
20
|
+
# Args
|
|
21
|
+
self.bytes = bytes_match.surrounding_bytes
|
|
19
22
|
self.bytes_match = bytes_match
|
|
20
23
|
self.encoding = encoding
|
|
21
|
-
|
|
24
|
+
# Inferred / derived values
|
|
25
|
+
self.encoding_label = encoding
|
|
26
|
+
self.start_offset = 0 # Offset in bytes to start decoding from
|
|
27
|
+
self.start_offset_label = None # String to indicate what offset we were able to decode
|
|
22
28
|
self.was_force_decoded = False
|
|
23
29
|
self.failed_to_decode = False
|
|
24
30
|
self.decoded_string = self._decode_bytes()
|
|
25
31
|
|
|
32
|
+
def is_wide_utf_encoding(self) -> bool:
|
|
33
|
+
"""Returns True if the encoding is UTF-16 or UTF-32"""
|
|
34
|
+
return is_wide_utf(self.encoding)
|
|
35
|
+
|
|
26
36
|
def _decode_bytes(self) -> Text:
|
|
27
37
|
"""
|
|
28
|
-
|
|
29
|
-
(
|
|
38
|
+
Tries builtin decode, hands off to other methods for harsher treatement
|
|
39
|
+
(byte shifting for UTF-16/32 and custom decode for the rest) if that fails.
|
|
40
|
+
Has side effect of setting 'self.decoded_string' value.
|
|
30
41
|
"""
|
|
31
42
|
try:
|
|
32
43
|
decoded_string = self._to_rich_text(escape(self.bytes.decode(self.encoding)))
|
|
@@ -36,12 +47,12 @@ class DecodingAttempt:
|
|
|
36
47
|
log.info(f"{self.encoding} failed on 1st pass decoding {self.bytes_match} capture; custom decoding...")
|
|
37
48
|
except LookupError as e:
|
|
38
49
|
log.warning(f"Unknown encoding: {self.encoding}. {e}")
|
|
39
|
-
return self.
|
|
50
|
+
return self._failed_to_decode_msg_txt(e)
|
|
40
51
|
|
|
41
52
|
self.was_force_decoded = True
|
|
42
53
|
|
|
43
|
-
if self.
|
|
44
|
-
return self.
|
|
54
|
+
if self.is_wide_utf_encoding():
|
|
55
|
+
return self._decode_utf_multibyte()
|
|
45
56
|
else:
|
|
46
57
|
return self._custom_decode()
|
|
47
58
|
|
|
@@ -103,34 +114,35 @@ class DecodingAttempt:
|
|
|
103
114
|
|
|
104
115
|
return output
|
|
105
116
|
|
|
106
|
-
def
|
|
107
|
-
"""
|
|
108
|
-
char_width =
|
|
109
|
-
log.debug(f"Decoding {self.encoding}, char_width is {char_width}...")
|
|
117
|
+
def _decode_utf_multibyte(self) -> Text:
|
|
118
|
+
"""UTF-16/32 are fixed width and multibyte and therefore depend on the position of the starting byte."""
|
|
119
|
+
char_width = encoding_width(self.encoding)
|
|
110
120
|
last_exception = None
|
|
111
121
|
decoded_str = None
|
|
112
|
-
bytes_offset =
|
|
122
|
+
bytes_offset = 0
|
|
113
123
|
|
|
124
|
+
# Iterate through the possibly byte offsets until we find a valid decoded string (or don't)
|
|
114
125
|
while bytes_offset < char_width:
|
|
115
126
|
try:
|
|
116
|
-
decoded_str = self.bytes[bytes_offset:].decode(self.encoding)
|
|
127
|
+
decoded_str = truncate_for_encoding(self.bytes[bytes_offset:], self.encoding).decode(self.encoding)
|
|
117
128
|
except UnicodeDecodeError as e:
|
|
118
129
|
log.info(f"Exception decoding w/offset {bytes_offset} in {self.encoding}: {e}")
|
|
119
130
|
last_exception = e
|
|
120
131
|
|
|
132
|
+
# Append the current bytes_offset to the encoding label if we found a valid decoded string
|
|
121
133
|
if decoded_str is not None:
|
|
134
|
+
log.debug(f"Successfully decoded '{self.encoding}' w/offset {bytes_offset}")
|
|
135
|
+
self.start_offset = bytes_offset
|
|
136
|
+
self.start_offset_label = f"offset {self.start_offset} byte" + ('s' if self.start_offset > 1 else '')
|
|
137
|
+
self.encoding_label = f"{self.encoding} ({self.start_offset_label})"
|
|
122
138
|
break
|
|
123
139
|
|
|
124
140
|
bytes_offset += 1
|
|
125
141
|
|
|
126
|
-
if decoded_str is None:
|
|
127
|
-
return self.
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def _failed_to_decode(self, exception: Optional[Exception]) -> Text:
|
|
132
|
-
self.failed_to_decode = True
|
|
133
|
-
return prefix_with_plain_text_obj(f"(decode failed: {exception})", style='red dim italic')
|
|
142
|
+
if decoded_str is not None:
|
|
143
|
+
return self._to_rich_text(decoded_str, bytes_offset)
|
|
144
|
+
else:
|
|
145
|
+
return self._failed_to_decode_msg_txt(last_exception)
|
|
134
146
|
|
|
135
147
|
def _to_rich_text(self, _string: str, bytes_offset: int=0) -> Text:
|
|
136
148
|
"""Convert a decoded string to highlighted Text representation"""
|
|
@@ -165,3 +177,8 @@ class DecodingAttempt:
|
|
|
165
177
|
current_byte_idx += char_width
|
|
166
178
|
|
|
167
179
|
return txt
|
|
180
|
+
|
|
181
|
+
def _failed_to_decode_msg_txt(self, exception: Optional[Exception]) -> Text:
|
|
182
|
+
"""Set failed_to_decode flag and return a Text object with the error message."""
|
|
183
|
+
self.failed_to_decode = True
|
|
184
|
+
return prefix_with_plain_text_obj(f"(decode failed: {exception})", style='red dim italic')
|
|
@@ -136,22 +136,49 @@ UNPRINTABLE_ISO_8859_7.update({
|
|
|
136
136
|
})
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
#
|
|
140
|
-
#
|
|
139
|
+
# Keys are names of encodings we will attempt to decode with, values are dicts mapping the unprintable bytes
|
|
140
|
+
# in that encoding to appropriate string represenations of those unprintable bytes.
|
|
141
|
+
# Order matters here, as we will attempt the decoding in the order of the keys.
|
|
141
142
|
ENCODINGS_TO_ATTEMPT = {
|
|
142
143
|
ASCII: UNPRINTABLE_ASCII,
|
|
143
|
-
UTF_8:
|
|
144
|
-
UTF_16:
|
|
145
|
-
UTF_32:
|
|
146
|
-
#'utf-7':
|
|
144
|
+
UTF_8: UNPRINTABLE_UTF_8,
|
|
145
|
+
UTF_16: None,
|
|
146
|
+
UTF_32: None, # UTF-16 and 32 are handled differently
|
|
147
147
|
ISO_8859_1: UNPRINTABLE_ISO_8859_1,
|
|
148
|
-
WINDOWS_1252: UNPRINTABLE_WIN_1252
|
|
148
|
+
WINDOWS_1252: UNPRINTABLE_WIN_1252,
|
|
149
|
+
#'utf-7':
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
SINGLE_BYTE_ENCODINGS = [
|
|
153
|
+
ASCII,
|
|
154
|
+
ISO_8859_1,
|
|
155
|
+
WINDOWS_1252,
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
# Keys are encodings that use multiple bytes to represent a single character, values are the possible offsets
|
|
159
|
+
# to attempt to use as the starting point for decoding in a given set of bytes.
|
|
160
|
+
WIDE_UTF_ENCODINGS = {
|
|
161
|
+
UTF_16: [0, 1],
|
|
162
|
+
UTF_32: [0, 1, 2, 3],
|
|
149
163
|
}
|
|
150
164
|
|
|
151
165
|
|
|
152
|
-
|
|
166
|
+
def encoding_offsets(encoding: str) -> list:
|
|
167
|
+
"""Get possible offsets for a given encoding. If the encoding is not in WIDE_UTF_ENCODINGS, return [0]."""
|
|
168
|
+
return WIDE_UTF_ENCODINGS.get(encoding, [0])
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def encoding_width(encoding: str) -> int:
|
|
172
|
+
"""Get the width of a character in bytes for a given encoding, which is the number of possible offsets."""
|
|
173
|
+
return len(encoding_offsets(encoding))
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def is_wide_utf(encoding: str) -> bool:
|
|
177
|
+
"""Check if the encoding is a wide UTF encoding (UTF-16 or UTF-32)."""
|
|
178
|
+
return encoding in WIDE_UTF_ENCODINGS
|
|
179
|
+
|
|
153
180
|
|
|
154
|
-
#
|
|
181
|
+
# TODO: this is unused cruft (mostly Asian language encodings)
|
|
155
182
|
ENCODINGS = [
|
|
156
183
|
'big5',
|
|
157
184
|
'big5hkscs',
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Class to smooth some of the rough edges around the dicts returned by chardet.detect_all()
|
|
3
3
|
"""
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
from rich.text import Text
|
|
7
7
|
|
|
@@ -17,30 +17,32 @@ class EncodingAssessment:
|
|
|
17
17
|
def __init__(self, assessment: dict) -> None:
|
|
18
18
|
self.assessment = assessment
|
|
19
19
|
self.encoding = assessment[ENCODING].lower()
|
|
20
|
-
self.encoding_text = Text(self.encoding, 'encoding.header')
|
|
21
|
-
self.language = self._get_dict_empty_value_as_None(LANGUAGE)
|
|
22
|
-
self.language_text = None if self.language is None else Text(self.language, 'encoding.language')
|
|
23
20
|
|
|
24
21
|
# Shift confidence from 0-1.0 scale to 0-100.0 scale
|
|
25
|
-
confidence = self._get_dict_empty_value_as_None(CONFIDENCE) or 0.0
|
|
26
|
-
assert isinstance(confidence, float)
|
|
27
|
-
self.confidence = 100.0 * confidence
|
|
22
|
+
self.confidence = 100.0 * (self._get_dict_empty_value_as_None(CONFIDENCE) or 0.0)
|
|
28
23
|
self.confidence_text = prefix_with_plain_text_obj(f"{round(self.confidence, 1)}%", style=meter_style(self.confidence))
|
|
29
24
|
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
self.encoding_text.append(f" ({self.language.title()})", style=f"color(23) {dim}")
|
|
25
|
+
# Add detected language info and label if any language was detected
|
|
26
|
+
self.language = self._get_dict_empty_value_as_None(LANGUAGE)
|
|
27
|
+
self.set_encoding_label(self.language.title() if self.language else None)
|
|
34
28
|
|
|
35
29
|
@classmethod
|
|
36
30
|
def dummy_encoding_assessment(cls, encoding) -> 'EncodingAssessment':
|
|
37
|
-
"""Generate an empty EncodingAssessment to use as a dummy when chardet gives us nothing"""
|
|
38
|
-
assessment = cls({ENCODING: encoding,
|
|
31
|
+
"""Generate an empty EncodingAssessment to use as a dummy when chardet gives us nothing."""
|
|
32
|
+
assessment = cls({ENCODING: encoding, CONFIDENCE: 0.0})
|
|
39
33
|
assessment.confidence_text = Text('none', 'no_attempt')
|
|
40
34
|
return assessment
|
|
41
35
|
|
|
36
|
+
def set_encoding_label(self, alt_text: Optional[str]) -> None:
|
|
37
|
+
"""Alt text is displayed below the encoding in slightly dimmer font."""
|
|
38
|
+
self.encoding_label = Text(self.encoding, 'encoding.header')
|
|
39
|
+
|
|
40
|
+
if alt_text is not None:
|
|
41
|
+
dim = 'dim' if (self.confidence or 0.0) < DIM_COUNTRY_THRESHOLD else ''
|
|
42
|
+
self.encoding_label.append(f" ({alt_text})", style=f"color(23) {dim}")
|
|
43
|
+
|
|
42
44
|
def __rich__(self) -> Text:
|
|
43
|
-
return Text('<Chardet(', 'white') + self.
|
|
45
|
+
return Text('<Chardet(', 'white') + self.encoding_label + Text(':') + self.confidence_text + Text('>')
|
|
44
46
|
|
|
45
47
|
def __str__(self) -> str:
|
|
46
48
|
return self.__rich__().plain
|
|
@@ -53,7 +53,7 @@ class EncodingDetector:
|
|
|
53
53
|
self.force_decode_assessments = self.assessments_above_confidence(type(self).force_decode_threshold)
|
|
54
54
|
self.force_display_assessments = self.assessments_above_confidence(type(self).force_display_threshold)
|
|
55
55
|
|
|
56
|
-
def get_encoding_assessment(self, encoding) -> EncodingAssessment:
|
|
56
|
+
def get_encoding_assessment(self, encoding: str) -> EncodingAssessment:
|
|
57
57
|
"""If chardet produced one, return it, otherwise return a dummy node with confidence of 0"""
|
|
58
58
|
assessment = next((r for r in self.unique_assessments if r.encoding == encoding), None)
|
|
59
59
|
return assessment or EncodingAssessment.dummy_encoding_assessment(encoding)
|
|
@@ -75,7 +75,7 @@ class EncodingDetector:
|
|
|
75
75
|
if result.confidence < YaralyzerConfig.args.min_chardet_table_confidence:
|
|
76
76
|
continue
|
|
77
77
|
|
|
78
|
-
self.table.add_row(f"{i + 1}", result.
|
|
78
|
+
self.table.add_row(f"{i + 1}", result.encoding_label, result.confidence_text)
|
|
79
79
|
|
|
80
80
|
# self.unique_assessments retains one result per encoding possibility (the highest confidence one)
|
|
81
81
|
# Some encodings are not language specific and for those we don't care about the language
|
|
@@ -11,7 +11,7 @@ from rich.text import Text
|
|
|
11
11
|
|
|
12
12
|
from yaralyzer.bytes_match import BytesMatch
|
|
13
13
|
from yaralyzer.config import YaralyzerConfig
|
|
14
|
-
from yaralyzer.encoding_detection.character_encodings import NEWLINE_BYTE
|
|
14
|
+
from yaralyzer.encoding_detection.character_encodings import NEWLINE_BYTE, encoding_width
|
|
15
15
|
from yaralyzer.helpers.rich_text_helper import newline_join
|
|
16
16
|
from yaralyzer.output.rich_console import (BYTES, BYTES_BRIGHTER, BYTES_BRIGHTEST,
|
|
17
17
|
BYTES_HIGHLIGHT, GREY, console, console_width)
|
|
@@ -126,6 +126,21 @@ def print_bytes(bytes_array: bytes, style=None) -> None:
|
|
|
126
126
|
console.print(escape(clean_byte_string(line)), style=style or 'bytes')
|
|
127
127
|
|
|
128
128
|
|
|
129
|
+
def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
130
|
+
"""
|
|
131
|
+
Truncate bytes to the a modulus of the char width of the given encoding.
|
|
132
|
+
For utf-16 this means truncate to a multiple of 2, for utf-32 to a multiple of 4.
|
|
133
|
+
"""
|
|
134
|
+
char_width = encoding_width(encoding)
|
|
135
|
+
num_bytes = len(_bytes)
|
|
136
|
+
num_extra_bytes = num_bytes % char_width
|
|
137
|
+
|
|
138
|
+
if char_width <= 1 or num_bytes <= char_width or num_extra_bytes == 0:
|
|
139
|
+
return _bytes
|
|
140
|
+
else:
|
|
141
|
+
return _bytes[:-num_extra_bytes]
|
|
142
|
+
|
|
143
|
+
|
|
129
144
|
def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
|
|
130
145
|
"""
|
|
131
146
|
Find the position of bytes_str in surrounding_byte_str. Both args are raw text dumps of binary data.
|
yaralyzer/helpers/dict_helper.py
CHANGED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Help with lists.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
def flatten(a):
|
|
6
|
+
"""From https://www.geeksforgeeks.org/python/python-flatten-list-to-individual-elements/"""
|
|
7
|
+
return_value = []
|
|
8
|
+
|
|
9
|
+
for x in a:
|
|
10
|
+
if isinstance(x, list):
|
|
11
|
+
return_value.extend(flatten(x)) # Recursively flatten nested lists
|
|
12
|
+
else:
|
|
13
|
+
return_value.append(x) # Append individual elements
|
|
14
|
+
|
|
15
|
+
return return_value
|
|
@@ -10,8 +10,7 @@ from rich.panel import Panel
|
|
|
10
10
|
from rich.style import Style
|
|
11
11
|
from rich.text import Text
|
|
12
12
|
|
|
13
|
-
from yaralyzer.output.rich_console import (BYTES_BRIGHTEST, BYTES_HIGHLIGHT, YARALYZER_THEME_DICT,
|
|
14
|
-
console)
|
|
13
|
+
from yaralyzer.output.rich_console import (BYTES_BRIGHTEST, BYTES_HIGHLIGHT, YARALYZER_THEME_DICT, console)
|
|
15
14
|
from yaralyzer.util.logging import log
|
|
16
15
|
|
|
17
16
|
# String constants
|
|
@@ -11,7 +11,6 @@ Final output should be rich.table of decoding attempts that are sorted like this
|
|
|
11
11
|
3. Decodings that were the same as other decodings
|
|
12
12
|
4. Failed decodings
|
|
13
13
|
"""
|
|
14
|
-
|
|
15
14
|
from collections import namedtuple
|
|
16
15
|
|
|
17
16
|
from rich import box
|
|
@@ -20,21 +19,23 @@ from rich.text import Text
|
|
|
20
19
|
|
|
21
20
|
from yaralyzer.bytes_match import BytesMatch
|
|
22
21
|
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
|
|
23
|
-
from yaralyzer.helpers.bytes_helper import
|
|
24
|
-
rich_text_view_of_raw_bytes)
|
|
22
|
+
from yaralyzer.helpers.bytes_helper import ascii_view_of_raw_bytes, hex_view_of_raw_bytes, rich_text_view_of_raw_bytes
|
|
25
23
|
from yaralyzer.helpers.rich_text_helper import CENTER, FOLD, MIDDLE, RIGHT, na_txt
|
|
26
24
|
|
|
27
|
-
# The confidence and encoding will not be shown in the final display - instead their Text versions are shown
|
|
25
|
+
# The confidence and encoding will not be shown in the final display - instead their Text versions are shown.
|
|
26
|
+
# TODO: this should become a dataclass (requires Python 3.7+)
|
|
28
27
|
DecodingTableRow = namedtuple(
|
|
29
28
|
'DecodingTableRow',
|
|
30
29
|
[
|
|
31
|
-
'
|
|
30
|
+
'encoding_label',
|
|
32
31
|
'confidence_text',
|
|
33
32
|
'errors_while_decoded',
|
|
34
33
|
'decoded_string',
|
|
34
|
+
# Properties below here are not displayed in the table but are used for sorting etc.
|
|
35
35
|
'confidence',
|
|
36
36
|
'encoding',
|
|
37
|
-
'sort_score'
|
|
37
|
+
'sort_score',
|
|
38
|
+
'encoding_label_plain', # For sorting purposes, if confidences match
|
|
38
39
|
]
|
|
39
40
|
)
|
|
40
41
|
|
|
@@ -43,9 +44,9 @@ HEX = Text('HEX', style='bytes.title')
|
|
|
43
44
|
RAW_BYTES = Text('Raw', style=f"bytes")
|
|
44
45
|
|
|
45
46
|
|
|
46
|
-
def
|
|
47
|
-
"""
|
|
48
|
-
table = Table(show_lines=True, border_style='bytes', header_style='
|
|
47
|
+
def new_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
|
|
48
|
+
"""Build a new rich Table with two rows, the raw and hex views of the bytes_match data."""
|
|
49
|
+
table = Table(show_lines=True, border_style='bytes', header_style='decode.table_header')
|
|
49
50
|
|
|
50
51
|
def add_col(title, **kwargs):
|
|
51
52
|
kwargs['justify'] = kwargs.get('justify', CENTER)
|
|
@@ -64,19 +65,21 @@ def build_decoding_attempts_table(bytes_match: BytesMatch) -> Table:
|
|
|
64
65
|
|
|
65
66
|
|
|
66
67
|
def decoding_table_row(assessment: EncodingAssessment, is_forced: Text, txt: Text, score: float) -> DecodingTableRow:
|
|
67
|
-
"""
|
|
68
|
+
"""Build a table row for a decoding attempt."""
|
|
68
69
|
return DecodingTableRow(
|
|
69
|
-
assessment.
|
|
70
|
+
assessment.encoding_label,
|
|
70
71
|
assessment.confidence_text,
|
|
71
72
|
is_forced,
|
|
72
73
|
txt,
|
|
73
74
|
assessment.confidence,
|
|
74
75
|
assessment.encoding,
|
|
75
|
-
sort_score=score
|
|
76
|
+
sort_score=score,
|
|
77
|
+
encoding_label_plain=assessment.encoding_label.plain
|
|
78
|
+
)
|
|
76
79
|
|
|
77
80
|
|
|
78
81
|
def assessment_only_row(assessment: EncodingAssessment, score) -> DecodingTableRow:
|
|
79
|
-
"""Build a row with just chardet assessment data and no actual
|
|
82
|
+
"""Build a row with just chardet assessment confidence data and no actual decoding attempt string."""
|
|
80
83
|
return decoding_table_row(assessment, na_txt(), DECODE_NOT_ATTEMPTED_MSG, score)
|
|
81
84
|
|
|
82
85
|
|
|
@@ -85,8 +88,8 @@ def _hex_preview_subtable(bytes_match: BytesMatch) -> Table:
|
|
|
85
88
|
hex_table = Table(
|
|
86
89
|
'hex',
|
|
87
90
|
'ascii',
|
|
88
|
-
border_style='
|
|
89
|
-
header_style='
|
|
91
|
+
border_style='grey.darkest',
|
|
92
|
+
header_style='decode.table_header',
|
|
90
93
|
box=box.MINIMAL,
|
|
91
94
|
show_lines=True,
|
|
92
95
|
show_header=True,
|
yaralyzer/output/file_export.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import time
|
|
3
|
+
from argparse import Namespace
|
|
4
|
+
from pathlib import Path
|
|
2
5
|
from os import path
|
|
3
6
|
|
|
4
7
|
from rich.terminal_theme import TerminalTheme
|
|
5
8
|
|
|
6
9
|
from yaralyzer.util.logging import log_and_print
|
|
10
|
+
from yaralyzer.yaralyzer import Yaralyzer
|
|
7
11
|
|
|
8
12
|
# TerminalThemes are used when saving SVGS. This one just swaps white for black in DEFAULT_TERMINAL_THEME
|
|
9
13
|
YARALYZER_TERMINAL_THEME = TerminalTheme(
|
|
@@ -47,6 +51,22 @@ _EXPORT_KWARGS = {
|
|
|
47
51
|
}
|
|
48
52
|
|
|
49
53
|
|
|
54
|
+
def export_json(yaralyzer: Yaralyzer, output_basepath: str | None) -> str:
|
|
55
|
+
"""Export YARA scan results to JSON. Returns the path to the output file that was written."""
|
|
56
|
+
output_path = f"{output_basepath or 'yara_matches'}.json"
|
|
57
|
+
|
|
58
|
+
matches_data = [
|
|
59
|
+
bytes_match.to_json()
|
|
60
|
+
for bytes_match, _bytes_decoder in yaralyzer.match_iterator()
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
with open(output_path, 'w') as f:
|
|
64
|
+
json.dump(matches_data, f, indent=4)
|
|
65
|
+
|
|
66
|
+
log_and_print(f"YARA matches exported to JSON file: '{output_path}'")
|
|
67
|
+
return output_path
|
|
68
|
+
|
|
69
|
+
|
|
50
70
|
def invoke_rich_export(export_method, output_file_basepath) -> str:
|
|
51
71
|
"""
|
|
52
72
|
Announce the export, perform the export, announce completion.
|
yaralyzer/output/rich_console.py
CHANGED
|
@@ -53,6 +53,7 @@ YARALYZER_THEME_DICT = {
|
|
|
53
53
|
'decode.section_header': 'color(100) reverse',
|
|
54
54
|
'decode.subheading': PEACH,
|
|
55
55
|
'decode.subheading_2': 'color(215) dim italic',
|
|
56
|
+
'decode.table_header': 'color(101) bold',
|
|
56
57
|
'headline': 'bold white underline',
|
|
57
58
|
# bytes
|
|
58
59
|
'ascii': 'color(58)',
|
|
@@ -179,6 +179,11 @@ export.add_argument('-html', '--export-html',
|
|
|
179
179
|
const='html',
|
|
180
180
|
help='export analysis to styled html files')
|
|
181
181
|
|
|
182
|
+
export.add_argument('-json', '--export-json',
|
|
183
|
+
action='store_const',
|
|
184
|
+
const='json',
|
|
185
|
+
help='export analysis to JSON files')
|
|
186
|
+
|
|
182
187
|
export.add_argument('-out', '--output-dir',
|
|
183
188
|
metavar='OUTPUT_DIR',
|
|
184
189
|
help='write files to OUTPUT_DIR instead of current dir, does nothing if not exporting a file')
|
|
@@ -229,6 +234,9 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
229
234
|
|
|
230
235
|
if args.debug:
|
|
231
236
|
log.setLevel(logging.DEBUG)
|
|
237
|
+
|
|
238
|
+
if args.log_level and args.log_level != 'DEBUG':
|
|
239
|
+
log.warning("Ignoring --log-level option as debug mode means log level is DEBUG")
|
|
232
240
|
elif args.log_level:
|
|
233
241
|
log.setLevel(args.log_level)
|
|
234
242
|
|
|
@@ -257,7 +265,7 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
257
265
|
EncodingDetector.force_display_threshold = args.force_display_threshold
|
|
258
266
|
|
|
259
267
|
# File export options
|
|
260
|
-
if args.
|
|
268
|
+
if args.export_html or args.export_json or args.export_svg or args.export_txt:
|
|
261
269
|
args.output_dir = args.output_dir or getcwd()
|
|
262
270
|
elif args.output_dir:
|
|
263
271
|
log.warning('--output-dir provided but no export option was chosen')
|
yaralyzer/util/logging.py
CHANGED
|
@@ -54,7 +54,6 @@ def configure_logger(log_label: str) -> logging.Logger:
|
|
|
54
54
|
rich_stream_handler = RichHandler(rich_tracebacks=True)
|
|
55
55
|
rich_stream_handler.setLevel('WARN')
|
|
56
56
|
logger.addHandler(rich_stream_handler)
|
|
57
|
-
logger.info('File logging triggered by setting of YARALYZER_LOG_DIR')
|
|
58
57
|
else:
|
|
59
58
|
logger.addHandler(RichHandler(rich_tracebacks=True))
|
|
60
59
|
|
yaralyzer/yara/yara_match.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: yaralyzer
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Visualize and force decode YARA and regex matches found in a file or byte stream. With colors. Lots of colors.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/yaralyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -113,7 +113,7 @@ for bytes_match, bytes_decoder in yaralyzer.match_iterator():
|
|
|
113
113
|
```
|
|
114
114
|
|
|
115
115
|
# Example Output
|
|
116
|
-
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich). SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
116
|
+
The Yaralyzer can export visualizations to HTML, ANSI colored text, and SVG vector images using the file export functionality that comes with [Rich](https://github.com/Textualize/rich) as well as a (somewhat limited) plain text JSON format. SVGs can be turned into `png` format images with a tool like [Inkscape](https://inkscape.org/) or `cairosvg`. In our experience they both work though we've seen some glitchiness with `cairosvg`.
|
|
117
117
|
|
|
118
118
|
**PyPi Users:** If you are reading this document [on PyPi](https://pypi.org/project/yaralyzer/) be aware that it renders a lot better [over on GitHub](https://github.com/michelcrypt4d4mus/yaralyzer). Pretty pictures, footnotes that work, etc.
|
|
119
119
|
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
.yaralyzer.example,sha256=z3_mk41xxm0Pr_8MGM7AKQG0xEFRtGcyJLboMuelRp4,3504
|
|
2
|
+
CHANGELOG.md,sha256=5DIUFaaTQpYkCLKYotRBpBZUMILJgP4ECtqla9zKJRY,2539
|
|
3
|
+
yaralyzer/__init__.py,sha256=YItEM_QKbLUj-6QZg2ZINrTzPQZ1IHOjGgoxmRR2buA,2703
|
|
4
|
+
yaralyzer/bytes_match.py,sha256=ShAxI_jZYElG1w-FJ9wNF-5SReL2uv-iJTiQQS3VTM0,8213
|
|
5
|
+
yaralyzer/config.py,sha256=VU5RTQwbNV3Ai02p4mAjiJrbL30gjjf9xBGl4IOh0Qs,3927
|
|
6
|
+
yaralyzer/decoding/bytes_decoder.py,sha256=tJKFoWChIpmgW23XiCwlfZCHdTXUz5z277U8-CXsjsg,8752
|
|
7
|
+
yaralyzer/decoding/decoding_attempt.py,sha256=8o0A4gidE4olW187QXIAYrAAYdjUsqoGz2YRgPqbJ3Y,8391
|
|
8
|
+
yaralyzer/encoding_detection/character_encodings.py,sha256=KqN0sdGZsVMaJM9qjGfcZNyyjcUPlTCob0jBLh-DW7E,5383
|
|
9
|
+
yaralyzer/encoding_detection/encoding_assessment.py,sha256=-YMjkl4AuQYBdq2SFMw1LvA7A8auNxtVIM93az9Xwzc,2368
|
|
10
|
+
yaralyzer/encoding_detection/encoding_detector.py,sha256=tqTgTOv7WjQgfVhShGETXgJmZFw16HoQ2l6WhLlAY34,4738
|
|
11
|
+
yaralyzer/helpers/bytes_helper.py,sha256=8AEW3aPv0dROD-srfe8z9m12bVZLrdvHRq-RBNQ4Vso,7442
|
|
12
|
+
yaralyzer/helpers/dict_helper.py,sha256=THbCgnTLgtM2v8MjjxZk2V296cYBghzjss6xhRBqYPQ,212
|
|
13
|
+
yaralyzer/helpers/file_helper.py,sha256=uf8dTOhRrJng0V36o7Mwk5t-L5gc4_uOaGj9F0s5OBA,1254
|
|
14
|
+
yaralyzer/helpers/list_helper.py,sha256=r3YUMkkVvl5R5VBzc3rxCzyPW_Nxhj5CRinBCHs9dAY,393
|
|
15
|
+
yaralyzer/helpers/rich_text_helper.py,sha256=elkWt2LoV_FnajK-UadEs_gPWSBE7NSsyJHN1eQsmgw,4213
|
|
16
|
+
yaralyzer/helpers/string_helper.py,sha256=AT2_CAgpvtp8GiUSKLTiDoToDD3tBB9BbrlX-s2bL7o,932
|
|
17
|
+
yaralyzer/output/decoding_attempts_table.py,sha256=x6AViJqAj7ept92OXWl9-PVk8MyBSyYt62mUgJjsP7U,4040
|
|
18
|
+
yaralyzer/output/file_export.py,sha256=YfF5D8aHOUQHwV0akFaaSMafbhdhUakvipadpq6HZmk,2927
|
|
19
|
+
yaralyzer/output/file_hashes_table.py,sha256=SnS2ip8dSeHoycQ0Ng3Gtpv9rXJSkKnvD2krTuhNg7s,1632
|
|
20
|
+
yaralyzer/output/regex_match_metrics.py,sha256=deJPaVnhpy-AUX6PCE_jbPLIlmfIOtl-cEVWsiFp3KY,3003
|
|
21
|
+
yaralyzer/output/rich_console.py,sha256=NJi6LjvoOfFXm9Kq9TQbZ3P32C5nQtahccUMEY_Ykpw,4248
|
|
22
|
+
yaralyzer/util/argument_parser.py,sha256=PNmdmFULBq10lAXOt9McZImQ-H5VNnrNN2LeTRxd0P0,12928
|
|
23
|
+
yaralyzer/util/logging.py,sha256=6N-JrQfAbVdCMYvqJ3MUHMchSwFN9208-0giWvX4OYY,4248
|
|
24
|
+
yaralyzer/yara/yara_match.py,sha256=4_26eaJT9I0PULiCdxerQtX4TfAIwcT-B6GJociGM9A,5119
|
|
25
|
+
yaralyzer/yara/yara_rule_builder.py,sha256=kAa3RBojM5GEaXDJjKZODAyx6yj34AlkOnQhACAFfZM,3021
|
|
26
|
+
yaralyzer/yaralyzer.py,sha256=f1y8qST6GZHEWl7nDNEBWpQuYjnsJ8dm9nGPWqZ4Hkk,9417
|
|
27
|
+
yaralyzer-1.0.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
28
|
+
yaralyzer-1.0.1.dist-info/METADATA,sha256=oWGgWkTQelQydVVU4o9rDqaxHayeGOn6c7-EI2DrlpI,10795
|
|
29
|
+
yaralyzer-1.0.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
30
|
+
yaralyzer-1.0.1.dist-info/entry_points.txt,sha256=7LnLJrNTfql0vuctjRWwp_ZD-BYvtv9ENVipdjuT7XI,136
|
|
31
|
+
yaralyzer-1.0.1.dist-info/RECORD,,
|
yaralyzer-0.9.6.dist-info/RECORD
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
.yaralyzer.example,sha256=4QKFDDNvnAlT3NVS0eiM_Qed9Hxy4ZPQFJkye8lKYAk,3486
|
|
2
|
-
CHANGELOG.md,sha256=PfIwgr15ORPbZFuyAifcOwe8h6yhWlDGHOX0MaFPn4E,2120
|
|
3
|
-
yaralyzer/__init__.py,sha256=sU2sN1lizcfxUg4vghz_L3OUwi7sWXGvNinE_tJCa4Y,2616
|
|
4
|
-
yaralyzer/bytes_match.py,sha256=6QeW1Zk9DpNQ9ks-ixnQZ-clqT8qwB2FKsmH3nmxcYI,7583
|
|
5
|
-
yaralyzer/config.py,sha256=eRJ88wBFs1rfjOv4htI1Ye0LFCFfk4kGDiFHuqZfkX0,3730
|
|
6
|
-
yaralyzer/decoding/bytes_decoder.py,sha256=lulfZZhYmo9ky2KpqBW-c9hs5_uhlaz0gatdtT_NYSY,7951
|
|
7
|
-
yaralyzer/decoding/decoding_attempt.py,sha256=GAxMNOX7I_FsuzGWIelTWAECytLUJD-wpmUAuVe2bn0,7241
|
|
8
|
-
yaralyzer/encoding_detection/character_encodings.py,sha256=zrOUgNXwrcXkeYSgdo09vsFPmNYsTkaHvq7YzzpbMsk,4395
|
|
9
|
-
yaralyzer/encoding_detection/encoding_assessment.py,sha256=yMb1QSHS7JpNyL6jnZwt9Vq0Y6ueVStYZjMKyP6-f5A,2307
|
|
10
|
-
yaralyzer/encoding_detection/encoding_detector.py,sha256=e_UtZi1Nh3ZRBOESEFtjYz0vJ_1cZjIJ5uWRuzM91oc,4732
|
|
11
|
-
yaralyzer/helpers/bytes_helper.py,sha256=XemBmf9tXgjoN-X7AULHL1wKS1lkQR6XXGt_D2lMBY0,6915
|
|
12
|
-
yaralyzer/helpers/dict_helper.py,sha256=h8Sg01qCJRKfZ0bmTYhLP6X5OVxMg-7CZryJIjcbw8E,211
|
|
13
|
-
yaralyzer/helpers/file_helper.py,sha256=uf8dTOhRrJng0V36o7Mwk5t-L5gc4_uOaGj9F0s5OBA,1254
|
|
14
|
-
yaralyzer/helpers/rich_text_helper.py,sha256=9Wc6WM625iKxAXRvxBkVzvszfcxb8YtqoQ6d7d8EqoQ,4218
|
|
15
|
-
yaralyzer/helpers/string_helper.py,sha256=AT2_CAgpvtp8GiUSKLTiDoToDD3tBB9BbrlX-s2bL7o,932
|
|
16
|
-
yaralyzer/output/decoding_attempts_table.py,sha256=cMY9eCXZHj0FfGxJ9uoM5cpdhQve-EtTRHv3fTHKJAo,3712
|
|
17
|
-
yaralyzer/output/file_export.py,sha256=AsP43Y1kt-dzAHr3TuFtyihG4NqV1XNyxUve-77CAzU,2270
|
|
18
|
-
yaralyzer/output/file_hashes_table.py,sha256=SnS2ip8dSeHoycQ0Ng3Gtpv9rXJSkKnvD2krTuhNg7s,1632
|
|
19
|
-
yaralyzer/output/regex_match_metrics.py,sha256=deJPaVnhpy-AUX6PCE_jbPLIlmfIOtl-cEVWsiFp3KY,3003
|
|
20
|
-
yaralyzer/output/rich_console.py,sha256=Botb8aec4_aRiPyaEkwrnhwERHE8a5-lk5KfgzXVlBE,4202
|
|
21
|
-
yaralyzer/util/argument_parser.py,sha256=wC7gK2qGapwnE-nODLAgeLJci3Cs6WJnMIjFhoUUx7A,12575
|
|
22
|
-
yaralyzer/util/logging.py,sha256=3qtLnCFbN8L1nTSwIQvxfcM5jfhIRWTFZj9XGQk74kc,4326
|
|
23
|
-
yaralyzer/yara/yara_match.py,sha256=qR8GNnmHiN-SzNwkWYcJa1Kb6RQUeNtcjpjccoI8wIQ,5145
|
|
24
|
-
yaralyzer/yara/yara_rule_builder.py,sha256=kAa3RBojM5GEaXDJjKZODAyx6yj34AlkOnQhACAFfZM,3021
|
|
25
|
-
yaralyzer/yaralyzer.py,sha256=f1y8qST6GZHEWl7nDNEBWpQuYjnsJ8dm9nGPWqZ4Hkk,9417
|
|
26
|
-
yaralyzer-0.9.6.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
27
|
-
yaralyzer-0.9.6.dist-info/METADATA,sha256=f-7x3tNTCw6qWGok2_OqlGXtWVcHiqO8XXc1wmT1iwM,10740
|
|
28
|
-
yaralyzer-0.9.6.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
29
|
-
yaralyzer-0.9.6.dist-info/entry_points.txt,sha256=7LnLJrNTfql0vuctjRWwp_ZD-BYvtv9ENVipdjuT7XI,136
|
|
30
|
-
yaralyzer-0.9.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|