yaralyzer 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +8 -0
- yaralyzer/__init__.py +0 -2
- yaralyzer/bytes_match.py +35 -33
- yaralyzer/config.py +1 -1
- yaralyzer/decoding/bytes_decoder.py +3 -6
- yaralyzer/decoding/decoding_attempt.py +4 -3
- yaralyzer/encoding_detection/character_encodings.py +38 -39
- yaralyzer/encoding_detection/encoding_assessment.py +2 -2
- yaralyzer/encoding_detection/encoding_detector.py +3 -4
- yaralyzer/helpers/bytes_helper.py +4 -4
- yaralyzer/helpers/dict_helper.py +0 -1
- yaralyzer/helpers/list_helper.py +1 -0
- yaralyzer/helpers/rich_text_helper.py +7 -7
- yaralyzer/output/file_export.py +1 -1
- yaralyzer/output/file_hashes_table.py +4 -4
- yaralyzer/output/rich_console.py +1 -1
- yaralyzer/util/argument_parser.py +10 -10
- yaralyzer/util/logging.py +1 -1
- yaralyzer/yara/yara_match.py +1 -1
- yaralyzer/yara/yara_rule_builder.py +16 -17
- yaralyzer/yaralyzer.py +27 -28
- yaralyzer-1.0.7.dist-info/LICENSE +674 -0
- {yaralyzer-1.0.5.dist-info → yaralyzer-1.0.7.dist-info}/METADATA +7 -2
- yaralyzer-1.0.7.dist-info/RECORD +32 -0
- yaralyzer-1.0.5.dist-info/RECORD +0 -31
- /yaralyzer-1.0.5.dist-info/LICENSE → /LICENSE +0 -0
- {yaralyzer-1.0.5.dist-info → yaralyzer-1.0.7.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.5.dist-info → yaralyzer-1.0.7.dist-info}/entry_points.txt +0 -0
CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 1.0.7
|
|
4
|
+
* Add `Changelog` to PyPi URLs, add some more PyPi classifiers
|
|
5
|
+
* Add `.flake8` config file and fix style errors
|
|
6
|
+
|
|
7
|
+
### 1.0.6
|
|
8
|
+
* Add `Environment :: Console` and `Programming Language :: Python` to PyPi classifiers
|
|
9
|
+
* Add `LICENSE` to PyPi package
|
|
10
|
+
|
|
3
11
|
### 1.0.5
|
|
4
12
|
* Add `Development Status :: 5 - Production/Stable` to pypi classifiers
|
|
5
13
|
|
yaralyzer/__init__.py
CHANGED
|
@@ -11,11 +11,9 @@ if not environ.get('INVOKED_BY_PYTEST', False):
|
|
|
11
11
|
load_dotenv(dotenv_path=dotenv_file)
|
|
12
12
|
break
|
|
13
13
|
|
|
14
|
-
from yaralyzer.config import YaralyzerConfig
|
|
15
14
|
from yaralyzer.output.file_export import export_json, invoke_rich_export
|
|
16
15
|
from yaralyzer.output.rich_console import console
|
|
17
16
|
from yaralyzer.util.argument_parser import get_export_basepath, parse_arguments
|
|
18
|
-
from yaralyzer.util.logging import log
|
|
19
17
|
from yaralyzer.yara.yara_rule_builder import HEX, REGEX
|
|
20
18
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
21
19
|
|
yaralyzer/bytes_match.py
CHANGED
|
@@ -13,22 +13,22 @@ from rich.text import Text
|
|
|
13
13
|
from yara import StringMatch, StringMatchInstance
|
|
14
14
|
|
|
15
15
|
from yaralyzer.config import YaralyzerConfig
|
|
16
|
-
from yaralyzer.helpers.rich_text_helper import
|
|
16
|
+
from yaralyzer.helpers.rich_text_helper import prefix_with_style
|
|
17
17
|
from yaralyzer.output.file_hashes_table import bytes_hashes_table
|
|
18
18
|
from yaralyzer.output.rich_console import ALERT_STYLE, GREY_ADDRESS
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class BytesMatch:
|
|
22
22
|
def __init__(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
23
|
+
self,
|
|
24
|
+
matched_against: bytes,
|
|
25
|
+
start_idx: int,
|
|
26
|
+
length: int,
|
|
27
|
+
label: str,
|
|
28
|
+
ordinal: int,
|
|
29
|
+
match: Optional[re.Match] = None, # It's rough to get the regex from yara :(
|
|
30
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
31
|
+
) -> None:
|
|
32
32
|
"""
|
|
33
33
|
Ordinal means it's the Nth match with this regex (not super important but useful)
|
|
34
34
|
YARA makes it a little rouch to get the actual regex that matched. Can be done with plyara eventually.
|
|
@@ -52,24 +52,24 @@ class BytesMatch:
|
|
|
52
52
|
|
|
53
53
|
@classmethod
|
|
54
54
|
def from_regex_match(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
55
|
+
cls,
|
|
56
|
+
matched_against: bytes,
|
|
57
|
+
match: re.Match,
|
|
58
|
+
ordinal: int,
|
|
59
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
60
|
+
) -> 'BytesMatch':
|
|
61
61
|
return cls(matched_against, match.start(), len(match[0]), match.re.pattern, ordinal, match, highlight_style)
|
|
62
62
|
|
|
63
63
|
@classmethod
|
|
64
64
|
def from_yara_str(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
65
|
+
cls,
|
|
66
|
+
matched_against: bytes,
|
|
67
|
+
rule_name: str,
|
|
68
|
+
yara_str_match: StringMatch,
|
|
69
|
+
yara_str_match_instance: StringMatchInstance,
|
|
70
|
+
ordinal: int,
|
|
71
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
72
|
+
) -> 'BytesMatch':
|
|
73
73
|
"""Build a BytesMatch from a yara string match. 'matched_against' is the set of bytes yara was run against."""
|
|
74
74
|
pattern_label = yara_str_match.identifier
|
|
75
75
|
|
|
@@ -89,11 +89,11 @@ class BytesMatch:
|
|
|
89
89
|
|
|
90
90
|
@classmethod
|
|
91
91
|
def from_yara_match(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
92
|
+
cls,
|
|
93
|
+
matched_against: bytes,
|
|
94
|
+
yara_match: dict,
|
|
95
|
+
highlight_style: str = YaralyzerConfig.HIGHLIGHT_STYLE
|
|
96
|
+
) -> Iterator['BytesMatch']:
|
|
97
97
|
"""Iterator w/a BytesMatch for each string returned as part of a YARA match result dict."""
|
|
98
98
|
i = 0 # For numbered labeling
|
|
99
99
|
|
|
@@ -102,13 +102,14 @@ class BytesMatch:
|
|
|
102
102
|
for yara_str_match_instance in yara_str_match.instances:
|
|
103
103
|
i += 1
|
|
104
104
|
|
|
105
|
-
yield
|
|
105
|
+
yield cls.from_yara_str(
|
|
106
106
|
matched_against,
|
|
107
107
|
yara_match['rule'],
|
|
108
108
|
yara_str_match,
|
|
109
109
|
yara_str_match_instance,
|
|
110
110
|
i,
|
|
111
|
-
highlight_style
|
|
111
|
+
highlight_style
|
|
112
|
+
)
|
|
112
113
|
|
|
113
114
|
def style_at_position(self, idx) -> str:
|
|
114
115
|
"""Get the style for the byte at position idx within the matched bytes"""
|
|
@@ -119,11 +120,12 @@ class BytesMatch:
|
|
|
119
120
|
|
|
120
121
|
def location(self) -> Text:
|
|
121
122
|
"""Returns a Text obj like '(start idx: 348190, end idx: 348228)'"""
|
|
122
|
-
location_txt =
|
|
123
|
+
location_txt = prefix_with_style(
|
|
123
124
|
f"(start idx: ",
|
|
124
125
|
style='off_white',
|
|
125
126
|
root_style='decode.subheading'
|
|
126
127
|
)
|
|
128
|
+
|
|
127
129
|
location_txt.append(str(self.start_idx), style='number')
|
|
128
130
|
location_txt.append(', end idx: ', style='off_white')
|
|
129
131
|
location_txt.append(str(self.end_idx), style='number')
|
|
@@ -184,7 +186,7 @@ class BytesMatch:
|
|
|
184
186
|
self.surrounding_bytes: bytes = self.matched_against[self.surrounding_start_idx:self.surrounding_end_idx]
|
|
185
187
|
|
|
186
188
|
def __rich__(self) -> Text:
|
|
187
|
-
headline =
|
|
189
|
+
headline = prefix_with_style(str(self.match_length), style='number', root_style='decode.subheading')
|
|
188
190
|
headline.append(f" bytes matching ")
|
|
189
191
|
headline.append(f"{self.label} ", style=ALERT_STYLE if self.highlight_style == ALERT_STYLE else 'regex')
|
|
190
192
|
headline.append('at ')
|
yaralyzer/config.py
CHANGED
|
@@ -91,7 +91,7 @@ class YaralyzerConfig:
|
|
|
91
91
|
env_var = f"{YARALYZER}_{option.upper()}"
|
|
92
92
|
env_value = environ.get(env_var)
|
|
93
93
|
default_value = cls.get_default_arg(option)
|
|
94
|
-
#print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}")
|
|
94
|
+
# print(f"option: {option}, arg_value: {arg_value}, env_var: {env_var}, env_value: {env_value}, default: {default_value}") # noqa: E501
|
|
95
95
|
|
|
96
96
|
# TODO: as is you can't override env vars with CLI args
|
|
97
97
|
if isinstance(arg_value, bool):
|
|
@@ -3,7 +3,6 @@ Class to handle attempting to decode a chunk of bytes into strings with various
|
|
|
3
3
|
Leverages the chardet library to both guide what encodings are attempted as well as to rank decodings
|
|
4
4
|
in the results.
|
|
5
5
|
"""
|
|
6
|
-
|
|
7
6
|
from collections import defaultdict
|
|
8
7
|
from copy import deepcopy
|
|
9
8
|
from operator import attrgetter
|
|
@@ -15,14 +14,13 @@ from rich.panel import Panel
|
|
|
15
14
|
from rich.table import Table
|
|
16
15
|
from rich.text import Text
|
|
17
16
|
|
|
18
|
-
|
|
17
|
+
from yaralyzer.bytes_match import BytesMatch # Used to cause circular import issues
|
|
19
18
|
from yaralyzer.config import YaralyzerConfig
|
|
20
19
|
from yaralyzer.decoding.decoding_attempt import DecodingAttempt
|
|
21
|
-
from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT
|
|
20
|
+
from yaralyzer.encoding_detection.character_encodings import ENCODING, ENCODINGS_TO_ATTEMPT
|
|
22
21
|
from yaralyzer.encoding_detection.encoding_assessment import EncodingAssessment
|
|
23
22
|
from yaralyzer.encoding_detection.encoding_detector import EncodingDetector
|
|
24
23
|
from yaralyzer.helpers.dict_helper import get_dict_key_by_value
|
|
25
|
-
from yaralyzer.helpers.list_helper import flatten
|
|
26
24
|
from yaralyzer.helpers.rich_text_helper import CENTER, DECODING_ERRORS_MSG, NO_DECODING_ERRORS_MSG
|
|
27
25
|
from yaralyzer.output.decoding_attempts_table import (DecodingTableRow, assessment_only_row,
|
|
28
26
|
decoding_table_row, new_decoding_attempts_table)
|
|
@@ -66,7 +64,6 @@ class BytesDecoder:
|
|
|
66
64
|
if self.bytes_match.is_decodable():
|
|
67
65
|
yield self._build_decodings_table()
|
|
68
66
|
elif YaralyzerConfig.args.standalone_mode:
|
|
69
|
-
# TODO: yield self.bytes_match.suppression_notice() (i guess to show some notice that things are suppressed?)
|
|
70
67
|
yield self._build_decodings_table(True)
|
|
71
68
|
|
|
72
69
|
yield NewLine()
|
|
@@ -136,7 +133,7 @@ class BytesDecoder:
|
|
|
136
133
|
# If the decoding can have a start offset add an appropriate extension to the encoding label
|
|
137
134
|
if decoding.start_offset_label:
|
|
138
135
|
if assessment.language:
|
|
139
|
-
log.warning(f"{decoding.encoding}
|
|
136
|
+
log.warning(f"{decoding.encoding} offset {decoding.start_offset} AND language '{assessment.language}'")
|
|
140
137
|
else:
|
|
141
138
|
assessment = deepcopy(assessment)
|
|
142
139
|
assessment.set_encoding_label(decoding.start_offset_label)
|
|
@@ -7,10 +7,11 @@ from typing import Optional
|
|
|
7
7
|
from rich.markup import escape
|
|
8
8
|
from rich.text import Text
|
|
9
9
|
|
|
10
|
+
from yaralyzer.bytes_match import BytesMatch # Used to cause circular import issues
|
|
10
11
|
from yaralyzer.encoding_detection.character_encodings import (ENCODINGS_TO_ATTEMPT, SINGLE_BYTE_ENCODINGS,
|
|
11
12
|
UTF_8, encoding_width, is_wide_utf)
|
|
12
13
|
from yaralyzer.helpers.bytes_helper import clean_byte_string, truncate_for_encoding
|
|
13
|
-
from yaralyzer.helpers.rich_text_helper import
|
|
14
|
+
from yaralyzer.helpers.rich_text_helper import prefix_with_style, unprintable_byte_to_text
|
|
14
15
|
from yaralyzer.output.rich_console import ALERT_STYLE, BYTES_BRIGHTER, BYTES_BRIGHTEST, BYTES_NO_DIM, GREY_ADDRESS
|
|
15
16
|
from yaralyzer.util.logging import log
|
|
16
17
|
|
|
@@ -144,7 +145,7 @@ class DecodingAttempt:
|
|
|
144
145
|
else:
|
|
145
146
|
return self._failed_to_decode_msg_txt(last_exception)
|
|
146
147
|
|
|
147
|
-
def _to_rich_text(self, _string: str, bytes_offset: int=0) -> Text:
|
|
148
|
+
def _to_rich_text(self, _string: str, bytes_offset: int = 0) -> Text:
|
|
148
149
|
"""Convert a decoded string to highlighted Text representation"""
|
|
149
150
|
# Adjust where we start the highlighting given the multibyte nature of the encodings
|
|
150
151
|
log.debug(f"Stepping through {self.encoding} encoded string...")
|
|
@@ -181,4 +182,4 @@ class DecodingAttempt:
|
|
|
181
182
|
def _failed_to_decode_msg_txt(self, exception: Optional[Exception]) -> Text:
|
|
182
183
|
"""Set failed_to_decode flag and return a Text object with the error message."""
|
|
183
184
|
self.failed_to_decode = True
|
|
184
|
-
return
|
|
185
|
+
return prefix_with_style(f"(decode failed: {exception})", style='red dim italic')
|
|
@@ -13,7 +13,7 @@ ASCII = 'ascii'
|
|
|
13
13
|
UTF_8 = 'utf-8'
|
|
14
14
|
UTF_16 = 'utf-16'
|
|
15
15
|
UTF_32 = 'utf-32'
|
|
16
|
-
ISO_8859_1 =
|
|
16
|
+
ISO_8859_1 = 'iso-8859-1'
|
|
17
17
|
WINDOWS_1252 = 'windows-1252'
|
|
18
18
|
|
|
19
19
|
|
|
@@ -32,39 +32,39 @@ BOMS = {
|
|
|
32
32
|
# ASCII characters that either print nothing, put the cursor in a weird place, or (worst of all) actively
|
|
33
33
|
# delete stuff you already printed
|
|
34
34
|
UNPRINTABLE_ASCII = {
|
|
35
|
-
0: 'NUL',
|
|
36
|
-
1: 'SOH',
|
|
37
|
-
2: 'STX',
|
|
35
|
+
0: 'NUL', # 'Null',
|
|
36
|
+
1: 'SOH', # 'StartHeading',
|
|
37
|
+
2: 'STX', # 'StartText',
|
|
38
38
|
3: 'ETX',
|
|
39
|
-
4: 'EOT',
|
|
40
|
-
5: 'ENQ',
|
|
41
|
-
6: 'ACK',
|
|
42
|
-
7: 'BEL',
|
|
43
|
-
8: 'BS',
|
|
44
|
-
#9: 'HT'
|
|
45
|
-
#10: 'LF', # 'LineFeed',
|
|
46
|
-
11: 'VT',
|
|
47
|
-
12: 'FF',
|
|
48
|
-
13: 'CR',
|
|
49
|
-
14: 'SO',
|
|
50
|
-
15: 'SI',
|
|
51
|
-
16: 'DLE',
|
|
52
|
-
17: 'DC1',
|
|
53
|
-
18: 'DC2',
|
|
54
|
-
19: 'DC3',
|
|
55
|
-
20: 'DC4',
|
|
39
|
+
4: 'EOT', # End of transmission
|
|
40
|
+
5: 'ENQ', # 'Enquiry',
|
|
41
|
+
6: 'ACK', # 'Acknowledgement',
|
|
42
|
+
7: 'BEL', # 'Bell',
|
|
43
|
+
8: 'BS', # 'BackSpace',
|
|
44
|
+
# 9: 'HT' # 'HorizontalTab',
|
|
45
|
+
# 10: 'LF', # 'LineFeed',
|
|
46
|
+
11: 'VT', # 'VerticalTab',
|
|
47
|
+
12: 'FF', # 'FormFeed', AKA 'NewPage'
|
|
48
|
+
13: 'CR', # 'CarriageReturn',
|
|
49
|
+
14: 'SO', # 'ShiftOut',
|
|
50
|
+
15: 'SI', # 'ShiftIn',
|
|
51
|
+
16: 'DLE', # 'DataLineEscape',
|
|
52
|
+
17: 'DC1', # DeviceControl1',
|
|
53
|
+
18: 'DC2', # 'DeviceControl2',
|
|
54
|
+
19: 'DC3', # 'DeviceControl3',
|
|
55
|
+
20: 'DC4', # 'DeviceControl4',
|
|
56
56
|
21: 'NAK', # NegativeAcknowledgement',
|
|
57
|
-
22: 'SYN',
|
|
58
|
-
23: 'ETB',
|
|
59
|
-
24: 'CAN',
|
|
60
|
-
25: 'EM',
|
|
61
|
-
26: 'SUB',
|
|
62
|
-
27: 'ESC',
|
|
63
|
-
28: 'FS',
|
|
64
|
-
29: 'GS',
|
|
65
|
-
30: 'RS',
|
|
66
|
-
31: 'US',
|
|
67
|
-
127: 'DEL',
|
|
57
|
+
22: 'SYN', # 'SynchronousIdle',
|
|
58
|
+
23: 'ETB', # 'EndTransmitBlock',
|
|
59
|
+
24: 'CAN', # 'Cancel',
|
|
60
|
+
25: 'EM', # 'EndMedium',
|
|
61
|
+
26: 'SUB', # 'Substitute',
|
|
62
|
+
27: 'ESC', # 'Escape',
|
|
63
|
+
28: 'FS', # 'FileSeparator',
|
|
64
|
+
29: 'GS', # 'GroupSeparator',
|
|
65
|
+
30: 'RS', # 'RecordSeparator',
|
|
66
|
+
31: 'US', # 'UnitSeparator',
|
|
67
|
+
127: 'DEL', # Delete
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
|
|
@@ -116,12 +116,12 @@ UNPRINTABLE_UTF_8.update({
|
|
|
116
116
|
UNPRINTABLE_WIN_1252 = UNPRINTABLE_ASCII.copy()
|
|
117
117
|
|
|
118
118
|
UNPRINTABLE_WIN_1252.update({
|
|
119
|
-
129: 'HOP',
|
|
120
|
-
141: 'RLF',
|
|
121
|
-
143: 'SS3',
|
|
122
|
-
144: 'DCS',
|
|
123
|
-
147: 'STS',
|
|
124
|
-
160: 'NBSP',
|
|
119
|
+
129: 'HOP', # High Octet Preset
|
|
120
|
+
141: 'RLF', # Reverse Line Feed
|
|
121
|
+
143: 'SS3', # Single shift 3
|
|
122
|
+
144: 'DCS', # Device Control String
|
|
123
|
+
147: 'STS', # Set transmit state
|
|
124
|
+
160: 'NBSP', # Non-breaking space
|
|
125
125
|
})
|
|
126
126
|
|
|
127
127
|
|
|
@@ -146,7 +146,6 @@ ENCODINGS_TO_ATTEMPT = {
|
|
|
146
146
|
UTF_32: None, # UTF-16 and 32 are handled differently
|
|
147
147
|
ISO_8859_1: UNPRINTABLE_ISO_8859_1,
|
|
148
148
|
WINDOWS_1252: UNPRINTABLE_WIN_1252,
|
|
149
|
-
#'utf-7':
|
|
150
149
|
}
|
|
151
150
|
|
|
152
151
|
SINGLE_BYTE_ENCODINGS = [
|
|
@@ -7,7 +7,7 @@ from rich.text import Text
|
|
|
7
7
|
|
|
8
8
|
from yaralyzer.encoding_detection.character_encodings import ENCODING
|
|
9
9
|
from yaralyzer.helpers.rich_text_helper import (DIM_COUNTRY_THRESHOLD, meter_style,
|
|
10
|
-
|
|
10
|
+
prefix_with_style)
|
|
11
11
|
|
|
12
12
|
CONFIDENCE = 'confidence'
|
|
13
13
|
LANGUAGE = 'language'
|
|
@@ -20,7 +20,7 @@ class EncodingAssessment:
|
|
|
20
20
|
|
|
21
21
|
# Shift confidence from 0-1.0 scale to 0-100.0 scale
|
|
22
22
|
self.confidence = 100.0 * (self._get_dict_empty_value_as_None(CONFIDENCE) or 0.0)
|
|
23
|
-
self.confidence_text =
|
|
23
|
+
self.confidence_text = prefix_with_style(f"{round(self.confidence, 1)}%", style=meter_style(self.confidence))
|
|
24
24
|
|
|
25
25
|
# Add detected language info and label if any language was detected
|
|
26
26
|
self.language = self._get_dict_empty_value_as_None(LANGUAGE)
|
|
@@ -9,7 +9,6 @@ import chardet
|
|
|
9
9
|
from rich import box
|
|
10
10
|
from rich.padding import Padding
|
|
11
11
|
from rich.table import Table
|
|
12
|
-
from rich.text import Text
|
|
13
12
|
|
|
14
13
|
from yaralyzer.config import YaralyzerConfig
|
|
15
14
|
from yaralyzer.encoding_detection.encoding_assessment import ENCODING, EncodingAssessment
|
|
@@ -83,7 +82,7 @@ class EncodingDetector:
|
|
|
83
82
|
self.unique_assessments.append(result)
|
|
84
83
|
already_seen_encodings[result.encoding] = result
|
|
85
84
|
else:
|
|
86
|
-
log.debug(f"Skipping chardet result {result}
|
|
85
|
+
log.debug(f"Skipping chardet result {result} (already saw {already_seen_encodings[result.encoding]})")
|
|
87
86
|
|
|
88
87
|
self.unique_assessments.sort(key=attrgetter('confidence'), reverse=True)
|
|
89
88
|
|
|
@@ -105,8 +104,8 @@ def _empty_chardet_results_table():
|
|
|
105
104
|
style='dim',
|
|
106
105
|
box=box.SIMPLE,
|
|
107
106
|
show_edge=False,
|
|
108
|
-
collapse_padding=True
|
|
107
|
+
collapse_padding=True
|
|
108
|
+
)
|
|
109
109
|
|
|
110
110
|
table.columns[0].justify = 'right'
|
|
111
|
-
table.columns # TODO: ???
|
|
112
111
|
return table
|
|
@@ -144,9 +144,9 @@ def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
|
144
144
|
def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
|
|
145
145
|
"""
|
|
146
146
|
Find the position of bytes_str in surrounding_byte_str. Both args are raw text dumps of binary data.
|
|
147
|
-
Because strings are longer than bytes (stuff like '\xcc' are 4 chars when printed are one byte and
|
|
148
|
-
include stuff like 'NegativeAcknowledgement' which is over 20 chars) they represent
|
|
149
|
-
correctly.
|
|
147
|
+
Because strings are longer than bytes (stuff like '\xcc' are 4 chars when printed are one byte and
|
|
148
|
+
the ANSI unprintables include stuff like 'NegativeAcknowledgement' which is over 20 chars) they represent
|
|
149
|
+
so we have to re-find the location to highlight the bytes correctly.
|
|
150
150
|
"""
|
|
151
151
|
# Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
|
|
152
152
|
# in the extra bytes. Tthis isn't perfect - it's starting us at the first index into the *bytes* that's safe to
|
|
@@ -155,7 +155,7 @@ def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: st
|
|
|
155
155
|
|
|
156
156
|
# TODO: Somehow \' and ' don't always come out the same :(
|
|
157
157
|
if highlight_idx == -1:
|
|
158
|
-
log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
|
|
158
|
+
log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
|
|
159
159
|
"Highlighting may be off by a few chars,")
|
|
160
160
|
|
|
161
161
|
surrounding_bytes_str = surrounding_bytes_str.replace("\\'", "'")
|
yaralyzer/helpers/dict_helper.py
CHANGED
yaralyzer/helpers/list_helper.py
CHANGED
|
@@ -40,7 +40,7 @@ def na_txt(style: Union[str, Style] = 'white'):
|
|
|
40
40
|
return Text('N/A', style=style)
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def
|
|
43
|
+
def prefix_with_style(_str: str, style: str, root_style=None) -> Text:
|
|
44
44
|
"""Sometimes you need a Text() object to start plain lest the underline or whatever last forever"""
|
|
45
45
|
return Text('', style=root_style or 'white') + Text(_str, style)
|
|
46
46
|
|
|
@@ -61,7 +61,7 @@ def meter_style(meter_pct):
|
|
|
61
61
|
return style
|
|
62
62
|
|
|
63
63
|
|
|
64
|
-
def unprintable_byte_to_text(code: str, style='') -> Text:
|
|
64
|
+
def unprintable_byte_to_text(code: str, style: str = '') -> Text:
|
|
65
65
|
"""Used with ASCII escape codes and the like, gives colored results like '[NBSP]'."""
|
|
66
66
|
style = BYTES_HIGHLIGHT if style == BYTES_BRIGHTEST else style
|
|
67
67
|
txt = Text('[', style=style)
|
|
@@ -70,7 +70,7 @@ def unprintable_byte_to_text(code: str, style='') -> Text:
|
|
|
70
70
|
return txt
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None]=None):
|
|
73
|
+
def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
|
|
74
74
|
"""Apply 'dim' style if 'is_dim'. 'style' overrides for Text and applies for strings."""
|
|
75
75
|
txt = txt.copy() if isinstance(txt, Text) else Text(txt, style=style or '')
|
|
76
76
|
|
|
@@ -95,24 +95,24 @@ def show_color_theme(styles: dict) -> None:
|
|
|
95
95
|
console.print(Panel('The Yaralyzer Color Theme', style='reverse'))
|
|
96
96
|
|
|
97
97
|
colors = [
|
|
98
|
-
|
|
98
|
+
prefix_with_style(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
|
|
99
99
|
for name, style in styles.items()
|
|
100
100
|
if name not in ['reset', 'repr_url']
|
|
101
101
|
]
|
|
102
102
|
|
|
103
|
-
console.print(Columns(colors, column_first=True, padding=(0,5), equal=True))
|
|
103
|
+
console.print(Columns(colors, column_first=True, padding=(0, 5), equal=True))
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
def size_text(num_bytes: int) -> Text:
|
|
107
107
|
"""Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)"""
|
|
108
|
-
kb_txt =
|
|
108
|
+
kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
|
|
109
109
|
kb_txt.append(' kb ')
|
|
110
110
|
bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
|
|
111
111
|
return kb_txt + bytes_txt
|
|
112
112
|
|
|
113
113
|
|
|
114
114
|
def size_in_bytes_text(num_bytes: int) -> Text:
|
|
115
|
-
return
|
|
115
|
+
return Text(f"{num_bytes:,d}", 'number').append(' bytes', style='white')
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
def newline_join(texts: List[Text]) -> Text:
|
yaralyzer/output/file_export.py
CHANGED
|
@@ -83,7 +83,7 @@ def invoke_rich_export(export_method, output_file_basepath) -> str:
|
|
|
83
83
|
kwargs.update({'clear': False})
|
|
84
84
|
|
|
85
85
|
if 'svg' in method_name:
|
|
86
|
-
kwargs.update({'title': path.basename(output_file_path)
|
|
86
|
+
kwargs.update({'title': path.basename(output_file_path)})
|
|
87
87
|
|
|
88
88
|
# Invoke it
|
|
89
89
|
log_and_print(f"Invoking Rich.console.{method_name}('{output_file_path}') with kwargs: '{kwargs}'...")
|
|
@@ -14,10 +14,10 @@ BytesInfo = namedtuple('BytesInfo', ['size', 'md5', 'sha1', 'sha256'])
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def bytes_hashes_table(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
bytes_or_bytes_info: Union[bytes, BytesInfo],
|
|
18
|
+
title: Optional[str] = None,
|
|
19
|
+
title_justify: str = LEFT
|
|
20
|
+
) -> Table:
|
|
21
21
|
"""Build a table to show the MD5, SHA1, SHA256, etc."""
|
|
22
22
|
if isinstance(bytes_or_bytes_info, bytes):
|
|
23
23
|
bytes_info = compute_file_hashes(bytes_or_bytes_info)
|
yaralyzer/output/rich_console.py
CHANGED
|
@@ -123,5 +123,5 @@ def print_fatal_error_and_exit(error_message: str) -> None:
|
|
|
123
123
|
exit()
|
|
124
124
|
|
|
125
125
|
|
|
126
|
-
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0,2)) -> None:
|
|
126
|
+
def print_header_panel(headline: str, style: str, expand: bool = True, padding: tuple = (0, 2)) -> None:
|
|
127
127
|
console.print(Panel(headline, box=box.DOUBLE_EDGE, style=style, expand=expand, padding=padding))
|
|
@@ -78,8 +78,8 @@ source.add_argument('--regex-modifier', '-mod',
|
|
|
78
78
|
# Fine tuning
|
|
79
79
|
tuning = parser.add_argument_group(
|
|
80
80
|
'FINE TUNING',
|
|
81
|
-
"Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
|
|
82
|
-
"a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
|
|
81
|
+
"Tune various aspects of the analyses and visualizations to your needs. As an example setting " +
|
|
82
|
+
"a low --max-decode-length (or suppressing brute force binary decode attempts altogether) can " +
|
|
83
83
|
"dramatically improve run times and only occasionally leads to a fatal lack of insight.")
|
|
84
84
|
|
|
85
85
|
tuning.add_argument('--maximize-width', action='store_true',
|
|
@@ -119,14 +119,14 @@ tuning.add_argument('--min-chardet-bytes',
|
|
|
119
119
|
type=int)
|
|
120
120
|
|
|
121
121
|
tuning.add_argument('--min-chardet-table-confidence',
|
|
122
|
-
help="minimum chardet confidence to display the encoding name/score in the character " +
|
|
122
|
+
help="minimum chardet confidence to display the encoding name/score in the character " +
|
|
123
123
|
"decection scores table",
|
|
124
124
|
default=YaralyzerConfig.DEFAULT_MIN_CHARDET_TABLE_CONFIDENCE,
|
|
125
125
|
metavar='PCT_CONFIDENCE',
|
|
126
126
|
type=int)
|
|
127
127
|
|
|
128
128
|
tuning.add_argument('--force-display-threshold',
|
|
129
|
-
help="encodings with chardet confidence below this number will neither be displayed nor " +
|
|
129
|
+
help="encodings with chardet confidence below this number will neither be displayed nor " +
|
|
130
130
|
"decoded in the decodings table",
|
|
131
131
|
default=EncodingDetector.force_display_threshold,
|
|
132
132
|
metavar='PCT_CONFIDENCE',
|
|
@@ -134,9 +134,9 @@ tuning.add_argument('--force-display-threshold',
|
|
|
134
134
|
choices=CONFIDENCE_SCORE_RANGE)
|
|
135
135
|
|
|
136
136
|
tuning.add_argument('--force-decode-threshold',
|
|
137
|
-
help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
|
|
138
|
-
"as to the likelihood some bytes were written with a particular encoding will cause " +
|
|
139
|
-
"the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
|
|
137
|
+
help="extremely high (AKA 'above this number') confidence scores from chardet.detect() " +
|
|
138
|
+
"as to the likelihood some bytes were written with a particular encoding will cause " +
|
|
139
|
+
"the yaralyzer to attempt decoding those bytes in that encoding even if it is not a " +
|
|
140
140
|
"configured encoding",
|
|
141
141
|
default=EncodingDetector.force_decode_threshold,
|
|
142
142
|
metavar='PCT_CONFIDENCE',
|
|
@@ -159,8 +159,8 @@ tuning.add_argument('--yara-stack-size',
|
|
|
159
159
|
# Export options
|
|
160
160
|
export = parser.add_argument_group(
|
|
161
161
|
'FILE EXPORT',
|
|
162
|
-
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
|
|
163
|
-
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
|
|
162
|
+
"Multiselect. Choosing nothing is choosing nothing. Sends what you see on the screen to various file " +
|
|
163
|
+
"formats in parallel. Writes files to the current directory if --output-dir is not provided. " +
|
|
164
164
|
"Filenames are expansions of the scanned filename though you can use --file-prefix to make your " +
|
|
165
165
|
"filenames more unique and beautiful to their beholder.")
|
|
166
166
|
|
|
@@ -282,7 +282,7 @@ def parse_arguments(args: Optional[Namespace] = None):
|
|
|
282
282
|
|
|
283
283
|
def get_export_basepath(args: Namespace, yaralyzer: Yaralyzer):
|
|
284
284
|
file_prefix = (args.file_prefix + '_') if args.file_prefix else ''
|
|
285
|
-
args.output_basename
|
|
285
|
+
args.output_basename = f"{file_prefix}{yaralyzer._filename_string()}" # noqa: E221
|
|
286
286
|
args.output_basename += f"__maxdecode{YaralyzerConfig.args.max_decode_length}"
|
|
287
287
|
args.output_basename += ('_' + args.file_suffix) if args.file_suffix else ''
|
|
288
288
|
return path.join(args.output_dir, args.output_basename + f"__at_{args.invoked_at_str}")
|
yaralyzer/util/logging.py
CHANGED
yaralyzer/yara/yara_match.py
CHANGED
|
@@ -97,7 +97,7 @@ def _rich_yara_match(element: Any, depth: int = 0) -> Text:
|
|
|
97
97
|
list_txt = Text('[', style='white')
|
|
98
98
|
|
|
99
99
|
if total_length > console_width() or len(element) > 3:
|
|
100
|
-
join_txt = Text(f"\n{indent}"
|
|
100
|
+
join_txt = Text(f"\n{indent}")
|
|
101
101
|
list_txt.append(join_txt).append(Text(f",{join_txt}").join(elements_txt))
|
|
102
102
|
list_txt += Text(f'\n{end_indent}]', style='white')
|
|
103
103
|
else:
|