yaralyzer 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yaralyzer might be problematic. Click here for more details.
- CHANGELOG.md +10 -0
- yaralyzer/__init__.py +5 -2
- yaralyzer/bytes_match.py +145 -52
- yaralyzer/config.py +18 -6
- yaralyzer/decoding/bytes_decoder.py +34 -15
- yaralyzer/decoding/decoding_attempt.py +10 -9
- yaralyzer/encoding_detection/character_encodings.py +40 -40
- yaralyzer/encoding_detection/encoding_assessment.py +10 -4
- yaralyzer/encoding_detection/encoding_detector.py +17 -13
- yaralyzer/helpers/bytes_helper.py +113 -16
- yaralyzer/helpers/dict_helper.py +1 -2
- yaralyzer/helpers/file_helper.py +3 -3
- yaralyzer/helpers/list_helper.py +1 -0
- yaralyzer/helpers/rich_text_helper.py +13 -11
- yaralyzer/helpers/string_helper.py +1 -1
- yaralyzer/output/file_export.py +2 -1
- yaralyzer/output/file_hashes_table.py +34 -6
- yaralyzer/output/regex_match_metrics.py +13 -10
- yaralyzer/output/rich_console.py +18 -3
- yaralyzer/util/argument_parser.py +11 -10
- yaralyzer/util/logging.py +6 -6
- yaralyzer/yara/yara_match.py +1 -1
- yaralyzer/yara/yara_rule_builder.py +16 -17
- yaralyzer/yaralyzer.py +66 -51
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/METADATA +12 -7
- yaralyzer-1.0.8.dist-info/RECORD +32 -0
- yaralyzer-1.0.6.dist-info/RECORD +0 -32
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/LICENSE +0 -0
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/WHEEL +0 -0
- {yaralyzer-1.0.6.dist-info → yaralyzer-1.0.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Constants related to character encodings
|
|
2
|
+
Constants related to character encodings.
|
|
3
|
+
|
|
3
4
|
* https://www.mit.edu/people/kenta/two/iso8859.html
|
|
4
5
|
* https://www.utf8-chartable.de/unicode-utf8-table.pl?utf8=dec
|
|
5
6
|
"""
|
|
@@ -13,7 +14,7 @@ ASCII = 'ascii'
|
|
|
13
14
|
UTF_8 = 'utf-8'
|
|
14
15
|
UTF_16 = 'utf-16'
|
|
15
16
|
UTF_32 = 'utf-32'
|
|
16
|
-
ISO_8859_1 =
|
|
17
|
+
ISO_8859_1 = 'iso-8859-1'
|
|
17
18
|
WINDOWS_1252 = 'windows-1252'
|
|
18
19
|
|
|
19
20
|
|
|
@@ -32,39 +33,39 @@ BOMS = {
|
|
|
32
33
|
# ASCII characters that either print nothing, put the cursor in a weird place, or (worst of all) actively
|
|
33
34
|
# delete stuff you already printed
|
|
34
35
|
UNPRINTABLE_ASCII = {
|
|
35
|
-
0: 'NUL',
|
|
36
|
-
1: 'SOH',
|
|
37
|
-
2: 'STX',
|
|
36
|
+
0: 'NUL', # 'Null',
|
|
37
|
+
1: 'SOH', # 'StartHeading',
|
|
38
|
+
2: 'STX', # 'StartText',
|
|
38
39
|
3: 'ETX',
|
|
39
|
-
4: 'EOT',
|
|
40
|
-
5: 'ENQ',
|
|
41
|
-
6: 'ACK',
|
|
42
|
-
7: 'BEL',
|
|
43
|
-
8: 'BS',
|
|
44
|
-
#9: 'HT'
|
|
45
|
-
#10: 'LF', # 'LineFeed',
|
|
46
|
-
11: 'VT',
|
|
47
|
-
12: 'FF',
|
|
48
|
-
13: 'CR',
|
|
49
|
-
14: 'SO',
|
|
50
|
-
15: 'SI',
|
|
51
|
-
16: 'DLE',
|
|
52
|
-
17: 'DC1',
|
|
53
|
-
18: 'DC2',
|
|
54
|
-
19: 'DC3',
|
|
55
|
-
20: 'DC4',
|
|
40
|
+
4: 'EOT', # End of transmission
|
|
41
|
+
5: 'ENQ', # 'Enquiry',
|
|
42
|
+
6: 'ACK', # 'Acknowledgement',
|
|
43
|
+
7: 'BEL', # 'Bell',
|
|
44
|
+
8: 'BS', # 'BackSpace',
|
|
45
|
+
# 9: 'HT' # 'HorizontalTab',
|
|
46
|
+
# 10: 'LF', # 'LineFeed',
|
|
47
|
+
11: 'VT', # 'VerticalTab',
|
|
48
|
+
12: 'FF', # 'FormFeed', AKA 'NewPage'
|
|
49
|
+
13: 'CR', # 'CarriageReturn',
|
|
50
|
+
14: 'SO', # 'ShiftOut',
|
|
51
|
+
15: 'SI', # 'ShiftIn',
|
|
52
|
+
16: 'DLE', # 'DataLineEscape',
|
|
53
|
+
17: 'DC1', # DeviceControl1',
|
|
54
|
+
18: 'DC2', # 'DeviceControl2',
|
|
55
|
+
19: 'DC3', # 'DeviceControl3',
|
|
56
|
+
20: 'DC4', # 'DeviceControl4',
|
|
56
57
|
21: 'NAK', # NegativeAcknowledgement',
|
|
57
|
-
22: 'SYN',
|
|
58
|
-
23: 'ETB',
|
|
59
|
-
24: 'CAN',
|
|
60
|
-
25: 'EM',
|
|
61
|
-
26: 'SUB',
|
|
62
|
-
27: 'ESC',
|
|
63
|
-
28: 'FS',
|
|
64
|
-
29: 'GS',
|
|
65
|
-
30: 'RS',
|
|
66
|
-
31: 'US',
|
|
67
|
-
127: 'DEL',
|
|
58
|
+
22: 'SYN', # 'SynchronousIdle',
|
|
59
|
+
23: 'ETB', # 'EndTransmitBlock',
|
|
60
|
+
24: 'CAN', # 'Cancel',
|
|
61
|
+
25: 'EM', # 'EndMedium',
|
|
62
|
+
26: 'SUB', # 'Substitute',
|
|
63
|
+
27: 'ESC', # 'Escape',
|
|
64
|
+
28: 'FS', # 'FileSeparator',
|
|
65
|
+
29: 'GS', # 'GroupSeparator',
|
|
66
|
+
30: 'RS', # 'RecordSeparator',
|
|
67
|
+
31: 'US', # 'UnitSeparator',
|
|
68
|
+
127: 'DEL', # Delete
|
|
68
69
|
}
|
|
69
70
|
|
|
70
71
|
|
|
@@ -116,12 +117,12 @@ UNPRINTABLE_UTF_8.update({
|
|
|
116
117
|
UNPRINTABLE_WIN_1252 = UNPRINTABLE_ASCII.copy()
|
|
117
118
|
|
|
118
119
|
UNPRINTABLE_WIN_1252.update({
|
|
119
|
-
129: 'HOP',
|
|
120
|
-
141: 'RLF',
|
|
121
|
-
143: 'SS3',
|
|
122
|
-
144: 'DCS',
|
|
123
|
-
147: 'STS',
|
|
124
|
-
160: 'NBSP',
|
|
120
|
+
129: 'HOP', # High Octet Preset
|
|
121
|
+
141: 'RLF', # Reverse Line Feed
|
|
122
|
+
143: 'SS3', # Single shift 3
|
|
123
|
+
144: 'DCS', # Device Control String
|
|
124
|
+
147: 'STS', # Set transmit state
|
|
125
|
+
160: 'NBSP', # Non-breaking space
|
|
125
126
|
})
|
|
126
127
|
|
|
127
128
|
|
|
@@ -146,7 +147,6 @@ ENCODINGS_TO_ATTEMPT = {
|
|
|
146
147
|
UTF_32: None, # UTF-16 and 32 are handled differently
|
|
147
148
|
ISO_8859_1: UNPRINTABLE_ISO_8859_1,
|
|
148
149
|
WINDOWS_1252: UNPRINTABLE_WIN_1252,
|
|
149
|
-
#'utf-7':
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
SINGLE_BYTE_ENCODINGS = [
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Help with chardet library.
|
|
3
3
|
"""
|
|
4
4
|
from typing import Any, Optional
|
|
5
5
|
|
|
@@ -7,27 +7,33 @@ from rich.text import Text
|
|
|
7
7
|
|
|
8
8
|
from yaralyzer.encoding_detection.character_encodings import ENCODING
|
|
9
9
|
from yaralyzer.helpers.rich_text_helper import (DIM_COUNTRY_THRESHOLD, meter_style,
|
|
10
|
-
|
|
10
|
+
prefix_with_style)
|
|
11
11
|
|
|
12
12
|
CONFIDENCE = 'confidence'
|
|
13
13
|
LANGUAGE = 'language'
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class EncodingAssessment:
|
|
17
|
+
"""Class to smooth some of the rough edges around the dicts returned by chardet.detect_all()"""
|
|
18
|
+
|
|
17
19
|
def __init__(self, assessment: dict) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Args:
|
|
22
|
+
assessment (dict): The dict returned by chardet.detect_all().
|
|
23
|
+
"""
|
|
18
24
|
self.assessment = assessment
|
|
19
25
|
self.encoding = assessment[ENCODING].lower()
|
|
20
26
|
|
|
21
27
|
# Shift confidence from 0-1.0 scale to 0-100.0 scale
|
|
22
28
|
self.confidence = 100.0 * (self._get_dict_empty_value_as_None(CONFIDENCE) or 0.0)
|
|
23
|
-
self.confidence_text =
|
|
29
|
+
self.confidence_text = prefix_with_style(f"{round(self.confidence, 1)}%", style=meter_style(self.confidence))
|
|
24
30
|
|
|
25
31
|
# Add detected language info and label if any language was detected
|
|
26
32
|
self.language = self._get_dict_empty_value_as_None(LANGUAGE)
|
|
27
33
|
self.set_encoding_label(self.language.title() if self.language else None)
|
|
28
34
|
|
|
29
35
|
@classmethod
|
|
30
|
-
def dummy_encoding_assessment(cls, encoding) -> 'EncodingAssessment':
|
|
36
|
+
def dummy_encoding_assessment(cls, encoding: str) -> 'EncodingAssessment':
|
|
31
37
|
"""Generate an empty EncodingAssessment to use as a dummy when chardet gives us nothing."""
|
|
32
38
|
assessment = cls({ENCODING: encoding, CONFIDENCE: 0.0})
|
|
33
39
|
assessment.confidence_text = Text('none', 'no_attempt')
|
|
@@ -1,7 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Manager class to ease dealing with the chardet encoding detection library 'chardet'.
|
|
3
|
-
Each instance of this class manages a chardet.detect_all() scan on a single set of bytes.
|
|
4
|
-
"""
|
|
1
|
+
"""EncodingDetector class for managing chardet encoding detection."""
|
|
5
2
|
from operator import attrgetter
|
|
6
3
|
from typing import List
|
|
7
4
|
|
|
@@ -9,7 +6,6 @@ import chardet
|
|
|
9
6
|
from rich import box
|
|
10
7
|
from rich.padding import Padding
|
|
11
8
|
from rich.table import Table
|
|
12
|
-
from rich.text import Text
|
|
13
9
|
|
|
14
10
|
from yaralyzer.config import YaralyzerConfig
|
|
15
11
|
from yaralyzer.encoding_detection.encoding_assessment import ENCODING, EncodingAssessment
|
|
@@ -19,10 +15,15 @@ CONFIDENCE_SCORE_RANGE = range(0, 101)
|
|
|
19
15
|
|
|
20
16
|
|
|
21
17
|
class EncodingDetector:
|
|
22
|
-
|
|
18
|
+
"""
|
|
19
|
+
Manager class to ease dealing with the chardet encoding detection library 'chardet'.
|
|
20
|
+
Each instance of this class manages a chardet.detect_all() scan on a single set of bytes.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# Default value for encodings w/confidences below this will not be displayed in the decoded table
|
|
23
24
|
force_display_threshold = 20.0
|
|
24
25
|
|
|
25
|
-
#
|
|
26
|
+
# Default value for what chardet.detect() confidence % should we force a decode with an obscure encoding.
|
|
26
27
|
force_decode_threshold = 50.0
|
|
27
28
|
|
|
28
29
|
def __init__(self, _bytes: bytes) -> None:
|
|
@@ -54,21 +55,23 @@ class EncodingDetector:
|
|
|
54
55
|
self.force_display_assessments = self.assessments_above_confidence(type(self).force_display_threshold)
|
|
55
56
|
|
|
56
57
|
def get_encoding_assessment(self, encoding: str) -> EncodingAssessment:
|
|
57
|
-
"""If chardet produced one, return it, otherwise return a dummy node with confidence of 0"""
|
|
58
|
+
"""If chardet produced one, return it, otherwise return a dummy node with confidence of 0."""
|
|
58
59
|
assessment = next((r for r in self.unique_assessments if r.encoding == encoding), None)
|
|
59
60
|
return assessment or EncodingAssessment.dummy_encoding_assessment(encoding)
|
|
60
61
|
|
|
61
62
|
def has_enough_bytes(self) -> bool:
|
|
63
|
+
"""Return true if we have enough bytes to run chardet.detect()."""
|
|
62
64
|
return self.bytes_len >= YaralyzerConfig.args.min_chardet_bytes
|
|
63
65
|
|
|
64
66
|
def assessments_above_confidence(self, cutoff: float) -> List[EncodingAssessment]:
|
|
67
|
+
"""Return the assessments above the given confidence cutoff."""
|
|
65
68
|
return [a for a in self.unique_assessments if a.confidence >= cutoff]
|
|
66
69
|
|
|
67
70
|
def __rich__(self) -> Padding:
|
|
68
71
|
return Padding(self.table, (0, 0, 0, 0))
|
|
69
72
|
|
|
70
73
|
def _uniquify_results_and_build_table(self) -> None:
|
|
71
|
-
"""Keep the highest result per encoding, ignoring the language chardet has indicated"""
|
|
74
|
+
"""Keep the highest result per encoding, ignoring the language chardet has indicated."""
|
|
72
75
|
already_seen_encodings = {}
|
|
73
76
|
|
|
74
77
|
for i, result in enumerate(self.assessments):
|
|
@@ -83,11 +86,12 @@ class EncodingDetector:
|
|
|
83
86
|
self.unique_assessments.append(result)
|
|
84
87
|
already_seen_encodings[result.encoding] = result
|
|
85
88
|
else:
|
|
86
|
-
log.debug(f"Skipping chardet result {result}
|
|
89
|
+
log.debug(f"Skipping chardet result {result} (already saw {already_seen_encodings[result.encoding]})")
|
|
87
90
|
|
|
88
91
|
self.unique_assessments.sort(key=attrgetter('confidence'), reverse=True)
|
|
89
92
|
|
|
90
93
|
def _set_empty_results(self) -> None:
|
|
94
|
+
"""Set empty results for when chardet can't help us."""
|
|
91
95
|
self.assessments = []
|
|
92
96
|
self.unique_assessments = []
|
|
93
97
|
self.raw_chardet_assessments = []
|
|
@@ -96,7 +100,7 @@ class EncodingDetector:
|
|
|
96
100
|
|
|
97
101
|
|
|
98
102
|
def _empty_chardet_results_table():
|
|
99
|
-
"""Returns a fresh table"""
|
|
103
|
+
"""Returns a fresh table."""
|
|
100
104
|
table = Table(
|
|
101
105
|
'Rank', 'Encoding', 'Confidence',
|
|
102
106
|
title='chardet.detect results',
|
|
@@ -105,8 +109,8 @@ def _empty_chardet_results_table():
|
|
|
105
109
|
style='dim',
|
|
106
110
|
box=box.SIMPLE,
|
|
107
111
|
show_edge=False,
|
|
108
|
-
collapse_padding=True
|
|
112
|
+
collapse_padding=True
|
|
113
|
+
)
|
|
109
114
|
|
|
110
115
|
table.columns[0].justify = 'right'
|
|
111
|
-
table.columns # TODO: ???
|
|
112
116
|
return table
|
|
@@ -25,15 +25,35 @@ HEX_CHARS_PER_LINE = HEX_CHARS_PER_GROUP * HEX_GROUPS_PER_LINE
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def get_bytes_before_and_after_match(_bytes: bytes, match: re.Match, num_before=None, num_after=None) -> bytes:
|
|
28
|
-
"""
|
|
29
|
-
Get
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
r"""
|
|
29
|
+
Get bytes before and after a regex match within a byte sequence.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
_bytes (bytes): The full byte sequence.
|
|
33
|
+
match (re.Match): The regex match object.
|
|
34
|
+
num_before (int, optional): Number of bytes before the match to include. Defaults to config.
|
|
35
|
+
num_after (int, optional): Number of bytes after the match to include. Defaults to either config or num_before value.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
bytes: The surrounding bytes including the match.
|
|
32
39
|
"""
|
|
33
40
|
return get_bytes_surrounding_range(_bytes, match.start(), match.end(), num_before, num_after)
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num_before=None, num_after=None) -> bytes:
|
|
44
|
+
r"""
|
|
45
|
+
Get bytes surrounding a specified range in a byte sequence.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
_bytes (bytes): The full byte sequence.
|
|
49
|
+
start_idx (int): Start index of the range.
|
|
50
|
+
end_idx (int): End index of the range.
|
|
51
|
+
num_before (int, optional): Number of bytes before the range. Defaults to config.
|
|
52
|
+
num_after (int, optional): Number of bytes after the range. Defaults to config.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
bytes: The surrounding bytes including the range.
|
|
56
|
+
"""
|
|
37
57
|
num_after = num_after or num_before or YaralyzerConfig.args.surrounding_bytes
|
|
38
58
|
num_before = num_before or YaralyzerConfig.args.surrounding_bytes
|
|
39
59
|
start_idx = max(start_idx - num_before, 0)
|
|
@@ -42,7 +62,16 @@ def get_bytes_surrounding_range(_bytes: bytes, start_idx: int, end_idx: int, num
|
|
|
42
62
|
|
|
43
63
|
|
|
44
64
|
def clean_byte_string(bytes_array: bytes) -> str:
|
|
45
|
-
"""
|
|
65
|
+
r"""
|
|
66
|
+
Return a clean string representation of bytes, without Python's b'' or b"" wrappers.
|
|
67
|
+
e.g. '\x80\nx44' instead of "b'\x80\nx44'".
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
bytes_array (bytes): The bytes to convert.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
str: Clean string representation of the bytes.
|
|
74
|
+
"""
|
|
46
75
|
byte_printer = Console(file=StringIO())
|
|
47
76
|
byte_printer.out(bytes_array, end='')
|
|
48
77
|
bytestr = byte_printer.file.getvalue()
|
|
@@ -58,7 +87,16 @@ def clean_byte_string(bytes_array: bytes) -> str:
|
|
|
58
87
|
|
|
59
88
|
|
|
60
89
|
def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
61
|
-
"""
|
|
90
|
+
r"""
|
|
91
|
+
Return a rich Text object of raw bytes, highlighting the matched bytes.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
_bytes (bytes): The full byte sequence.
|
|
95
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Text: Rich Text object with highlighted match.
|
|
99
|
+
"""
|
|
62
100
|
surrounding_bytes_str = clean_byte_string(_bytes)
|
|
63
101
|
highlighted_bytes_str = clean_byte_string(bytes_match.bytes)
|
|
64
102
|
highlighted_bytes_str_length = len(highlighted_bytes_str)
|
|
@@ -72,6 +110,16 @@ def rich_text_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
72
110
|
|
|
73
111
|
|
|
74
112
|
def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
113
|
+
r"""
|
|
114
|
+
Return a hexadecimal view of raw bytes, highlighting the matched bytes.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
_bytes (bytes): The full byte sequence.
|
|
118
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Text: Rich Text object with highlighted match in hex view.
|
|
122
|
+
"""
|
|
75
123
|
hex_str = hex_text(_bytes)
|
|
76
124
|
highlight_start_idx = bytes_match.highlight_start_idx * 3
|
|
77
125
|
highlight_end_idx = bytes_match.highlight_end_idx * 3
|
|
@@ -81,6 +129,16 @@ def hex_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
81
129
|
|
|
82
130
|
|
|
83
131
|
def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
132
|
+
r"""
|
|
133
|
+
Return an ASCII view of raw bytes, highlighting the matched bytes.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
_bytes (bytes): The full byte sequence.
|
|
137
|
+
bytes_match (BytesMatch): The BytesMatch object indicating which bytes to highlight.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Text: Rich Text object with highlighted match in ASCII view.
|
|
141
|
+
"""
|
|
84
142
|
txt = Text('', style=BYTES)
|
|
85
143
|
|
|
86
144
|
for i, b in enumerate(_bytes):
|
|
@@ -113,23 +171,54 @@ def ascii_view_of_raw_bytes(_bytes: bytes, bytes_match: BytesMatch) -> Text:
|
|
|
113
171
|
|
|
114
172
|
|
|
115
173
|
def hex_text(_bytes: bytes) -> Text:
|
|
174
|
+
r"""
|
|
175
|
+
Return a rich Text object of the hex string for the given bytes.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
_bytes (bytes): The bytes to convert.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Text: Rich Text object of the hex string.
|
|
182
|
+
"""
|
|
116
183
|
return Text(hex_string(_bytes), style=GREY)
|
|
117
184
|
|
|
118
185
|
|
|
119
186
|
def hex_string(_bytes: bytes) -> str:
|
|
187
|
+
r"""
|
|
188
|
+
Return a hex string representation of the given bytes.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
_bytes (bytes): The bytes to convert.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
str: Hex string representation of the bytes.
|
|
195
|
+
"""
|
|
120
196
|
return ' '.join([hex(b).removeprefix('0x').rjust(2, '0') for i, b in enumerate(_bytes)])
|
|
121
197
|
|
|
122
198
|
|
|
123
199
|
def print_bytes(bytes_array: bytes, style=None) -> None:
|
|
124
|
-
"""
|
|
200
|
+
r"""
|
|
201
|
+
Print a string representation of bytes to the console.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
bytes_array (bytes): The bytes to print.
|
|
205
|
+
style (str, optional): Style to use for printing. Defaults to 'bytes'.
|
|
206
|
+
"""
|
|
125
207
|
for line in bytes_array.split(NEWLINE_BYTE):
|
|
126
208
|
console.print(escape(clean_byte_string(line)), style=style or 'bytes')
|
|
127
209
|
|
|
128
210
|
|
|
129
211
|
def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
130
|
-
"""
|
|
131
|
-
Truncate bytes to
|
|
132
|
-
For utf-16 this means
|
|
212
|
+
r"""
|
|
213
|
+
Truncate bytes to a multiple of the character width for the given encoding.
|
|
214
|
+
For example, for utf-16 this means truncating to a multiple of 2, for utf-32 to a multiple of 4.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
_bytes (bytes): The bytes to truncate.
|
|
218
|
+
encoding (str): The encoding to consider.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
bytes: Truncated bytes.
|
|
133
222
|
"""
|
|
134
223
|
char_width = encoding_width(encoding)
|
|
135
224
|
num_bytes = len(_bytes)
|
|
@@ -142,11 +231,19 @@ def truncate_for_encoding(_bytes: bytes, encoding: str) -> bytes:
|
|
|
142
231
|
|
|
143
232
|
|
|
144
233
|
def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: str, highlighted_bytes: BytesMatch):
|
|
145
|
-
"""
|
|
146
|
-
Find the position of
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
234
|
+
r"""
|
|
235
|
+
Find the position of the highlighted bytes string within the surrounding bytes string.
|
|
236
|
+
|
|
237
|
+
Both arguments are string representations of binary data. This is needed because the string
|
|
238
|
+
representation of bytes can be longer than the actual bytes (e.g., '\\xcc' is 4 chars for 1 byte).
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
surrounding_bytes_str (str): String representation of the full byte sequence.
|
|
242
|
+
highlighted_bytes_str (str): String representation of the matched bytes.
|
|
243
|
+
highlighted_bytes (BytesMatch): The BytesMatch object for context.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
int: The index in the surrounding string where the highlighted bytes start, or -1 if not found.
|
|
150
247
|
"""
|
|
151
248
|
# Start a few chars in to avoid errors: sometimes we're searching for 1 or 2 bytes and there's a false positive
|
|
152
249
|
# in the extra bytes. Tthis isn't perfect - it's starting us at the first index into the *bytes* that's safe to
|
|
@@ -155,7 +252,7 @@ def _find_str_rep_of_bytes(surrounding_bytes_str: str, highlighted_bytes_str: st
|
|
|
155
252
|
|
|
156
253
|
# TODO: Somehow \' and ' don't always come out the same :(
|
|
157
254
|
if highlight_idx == -1:
|
|
158
|
-
log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
|
|
255
|
+
log.info(f"Failed to find highlighted_bytes in first pass so deleting single quotes and retrying. " +
|
|
159
256
|
"Highlighting may be off by a few chars,")
|
|
160
257
|
|
|
161
258
|
surrounding_bytes_str = surrounding_bytes_str.replace("\\'", "'")
|
yaralyzer/helpers/dict_helper.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Help with dicts.
|
|
3
3
|
"""
|
|
4
|
-
from numbers import Number
|
|
5
4
|
|
|
6
5
|
|
|
7
6
|
def get_dict_key_by_value(_dict: dict, value):
|
|
8
|
-
"""Inverse of the usual dict operation"""
|
|
7
|
+
"""Inverse of the usual dict operation."""
|
|
9
8
|
return list(_dict.keys())[list(_dict.values()).index(value)]
|
yaralyzer/helpers/file_helper.py
CHANGED
|
@@ -7,12 +7,12 @@ from typing import List, Optional
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def timestamp_for_filename() -> str:
|
|
10
|
-
"""Returns a string showing current time in a file name friendly format"""
|
|
10
|
+
"""Returns a string showing current time in a file name friendly format."""
|
|
11
11
|
return datetime.now().strftime("%Y-%m-%dT%H.%M.%S")
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def files_in_dir(dir: str, with_extname: Optional[str] = None) -> List[str]:
|
|
15
|
-
"""paths for non dot files, optionally ending in 'with_extname'"""
|
|
15
|
+
"""paths for non dot files, optionally ending in 'with_extname'."""
|
|
16
16
|
files = [path.join(dir, path.basename(file)) for file in listdir(dir) if not file.startswith('.')]
|
|
17
17
|
files = [file for file in files if not path.isdir(file)]
|
|
18
18
|
|
|
@@ -27,7 +27,7 @@ def files_with_extname(files: List[str], extname: str) -> List[str]:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def load_word_list(file_path):
|
|
30
|
-
"""For very simple files (1 col CSVs, if you
|
|
30
|
+
"""For very simple files (1 col CSVs, if you will)."""
|
|
31
31
|
with open(file_path, 'r') as f:
|
|
32
32
|
return [line.rstrip().lstrip() for line in f.readlines()]
|
|
33
33
|
|
yaralyzer/helpers/list_helper.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Methods to handle turning various objects into Rich text/table/etc representations
|
|
3
|
+
|
|
3
4
|
Rich colors: https://rich.readthedocs.io/en/stable/appendix/colors.html
|
|
4
5
|
TODO: interesting colors # row_styles[0] = 'reverse bold on color(144)' <-
|
|
5
6
|
"""
|
|
@@ -40,13 +41,13 @@ def na_txt(style: Union[str, Style] = 'white'):
|
|
|
40
41
|
return Text('N/A', style=style)
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
def
|
|
44
|
-
"""Sometimes you need a Text() object to start plain lest the underline or whatever last forever"""
|
|
44
|
+
def prefix_with_style(_str: str, style: str, root_style=None) -> Text:
|
|
45
|
+
"""Sometimes you need a Text() object to start plain lest the underline or whatever last forever."""
|
|
45
46
|
return Text('', style=root_style or 'white') + Text(_str, style)
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def meter_style(meter_pct):
|
|
49
|
-
"""For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer"""
|
|
50
|
+
"""For coloring numbers between 0 and 100 (AKA pcts). Closer to 100 means greener, closer to 0.0 means bluer."""
|
|
50
51
|
if meter_pct > 100 or meter_pct < 0:
|
|
51
52
|
log.warning(f"Invalid meter_pct: {meter_pct}")
|
|
52
53
|
|
|
@@ -61,7 +62,7 @@ def meter_style(meter_pct):
|
|
|
61
62
|
return style
|
|
62
63
|
|
|
63
64
|
|
|
64
|
-
def unprintable_byte_to_text(code: str, style='') -> Text:
|
|
65
|
+
def unprintable_byte_to_text(code: str, style: str = '') -> Text:
|
|
65
66
|
"""Used with ASCII escape codes and the like, gives colored results like '[NBSP]'."""
|
|
66
67
|
style = BYTES_HIGHLIGHT if style == BYTES_BRIGHTEST else style
|
|
67
68
|
txt = Text('[', style=style)
|
|
@@ -70,7 +71,7 @@ def unprintable_byte_to_text(code: str, style='') -> Text:
|
|
|
70
71
|
return txt
|
|
71
72
|
|
|
72
73
|
|
|
73
|
-
def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None]=None):
|
|
74
|
+
def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None] = None):
|
|
74
75
|
"""Apply 'dim' style if 'is_dim'. 'style' overrides for Text and applies for strings."""
|
|
75
76
|
txt = txt.copy() if isinstance(txt, Text) else Text(txt, style=style or '')
|
|
76
77
|
|
|
@@ -81,7 +82,7 @@ def dim_if(txt: Union[str, Text], is_dim: bool, style: Union[str, None]=None):
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
def reverse_color(style: Style) -> Style:
|
|
84
|
-
"""Reverses the color for a given style"""
|
|
85
|
+
"""Reverses the color for a given style."""
|
|
85
86
|
return Style(color=style.bgcolor, bgcolor=style.color, underline=style.underline, bold=style.bold)
|
|
86
87
|
|
|
87
88
|
|
|
@@ -95,25 +96,26 @@ def show_color_theme(styles: dict) -> None:
|
|
|
95
96
|
console.print(Panel('The Yaralyzer Color Theme', style='reverse'))
|
|
96
97
|
|
|
97
98
|
colors = [
|
|
98
|
-
|
|
99
|
+
prefix_with_style(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
|
|
99
100
|
for name, style in styles.items()
|
|
100
101
|
if name not in ['reset', 'repr_url']
|
|
101
102
|
]
|
|
102
103
|
|
|
103
|
-
console.print(Columns(colors, column_first=True, padding=(0,5), equal=True))
|
|
104
|
+
console.print(Columns(colors, column_first=True, padding=(0, 5), equal=True))
|
|
104
105
|
|
|
105
106
|
|
|
106
107
|
def size_text(num_bytes: int) -> Text:
|
|
107
|
-
"""Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)"""
|
|
108
|
-
kb_txt =
|
|
108
|
+
"""Convert a number of bytes into (e.g.) 54,213 bytes (52 KB)."""
|
|
109
|
+
kb_txt = prefix_with_style("{:,.1f}".format(num_bytes / 1024), style='bright_cyan', root_style='white')
|
|
109
110
|
kb_txt.append(' kb ')
|
|
110
111
|
bytes_txt = Text('(', 'white') + size_in_bytes_text(num_bytes) + Text(')')
|
|
111
112
|
return kb_txt + bytes_txt
|
|
112
113
|
|
|
113
114
|
|
|
114
115
|
def size_in_bytes_text(num_bytes: int) -> Text:
|
|
115
|
-
return
|
|
116
|
+
return Text(f"{num_bytes:,d}", 'number').append(' bytes', style='white')
|
|
116
117
|
|
|
117
118
|
|
|
118
119
|
def newline_join(texts: List[Text]) -> Text:
|
|
120
|
+
"""Join a list of Text objects with newlines between them."""
|
|
119
121
|
return Text("\n").join(texts)
|
|
@@ -17,7 +17,7 @@ def line_count(_string: str) -> int:
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def hex_to_string(_string: str) -> str:
|
|
20
|
-
"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
|
|
20
|
+
r"""String '0D 0A 25 25 45 4F 46 0D 0A' becomes '\r\n%%EOF\r\n'"""
|
|
21
21
|
return bytearray.fromhex(_string.replace(' ', '')).decode()
|
|
22
22
|
|
|
23
23
|
|
yaralyzer/output/file_export.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
"""Functions to export Yaralyzer results to various file formats."""
|
|
1
2
|
import json
|
|
2
3
|
import time
|
|
3
4
|
from os import path
|
|
@@ -83,7 +84,7 @@ def invoke_rich_export(export_method, output_file_basepath) -> str:
|
|
|
83
84
|
kwargs.update({'clear': False})
|
|
84
85
|
|
|
85
86
|
if 'svg' in method_name:
|
|
86
|
-
kwargs.update({'title': path.basename(output_file_path)
|
|
87
|
+
kwargs.update({'title': path.basename(output_file_path)})
|
|
87
88
|
|
|
88
89
|
# Invoke it
|
|
89
90
|
log_and_print(f"Invoking Rich.console.{method_name}('{output_file_path}') with kwargs: '{kwargs}'...")
|