txt2stix 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2stix/__init__.py +33 -0
- txt2stix/ai_extractor/__init__.py +15 -0
- txt2stix/ai_extractor/anthropic.py +12 -0
- txt2stix/ai_extractor/base.py +87 -0
- txt2stix/ai_extractor/deepseek.py +19 -0
- txt2stix/ai_extractor/gemini.py +18 -0
- txt2stix/ai_extractor/openai.py +15 -0
- txt2stix/ai_extractor/openrouter.py +20 -0
- txt2stix/ai_extractor/prompts.py +164 -0
- txt2stix/ai_extractor/utils.py +85 -0
- txt2stix/attack_flow.py +101 -0
- txt2stix/bundler.py +428 -0
- txt2stix/common.py +23 -0
- txt2stix/extractions.py +59 -0
- txt2stix/includes/__init__.py +0 -0
- txt2stix/includes/extractions/ai/config.yaml +1023 -0
- txt2stix/includes/extractions/lookup/config.yaml +393 -0
- txt2stix/includes/extractions/pattern/config.yaml +609 -0
- txt2stix/includes/helpers/mimetype_filename_extension_list.csv +936 -0
- txt2stix/includes/helpers/stix_relationship_types.txt +41 -0
- txt2stix/includes/helpers/tlds.txt +1446 -0
- txt2stix/includes/helpers/windows_registry_key_prefix.txt +12 -0
- txt2stix/includes/lookups/_README.md +11 -0
- txt2stix/includes/lookups/_generate_lookups.py +247 -0
- txt2stix/includes/lookups/attack_pattern.txt +1 -0
- txt2stix/includes/lookups/campaign.txt +1 -0
- txt2stix/includes/lookups/country_iso3166_alpha2.txt +249 -0
- txt2stix/includes/lookups/course_of_action.txt +1 -0
- txt2stix/includes/lookups/disarm_id_v1_5.txt +345 -0
- txt2stix/includes/lookups/disarm_name_v1_5.txt +347 -0
- txt2stix/includes/lookups/extensions.txt +78 -0
- txt2stix/includes/lookups/identity.txt +1 -0
- txt2stix/includes/lookups/infrastructure.txt +1 -0
- txt2stix/includes/lookups/intrusion_set.txt +1 -0
- txt2stix/includes/lookups/malware.txt +2 -0
- txt2stix/includes/lookups/mitre_atlas_id_v4_5_2.txt +116 -0
- txt2stix/includes/lookups/mitre_atlas_name_v4_5_2.txt +117 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +1502 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +1656 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +1765 -0
- txt2stix/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +141 -0
- txt2stix/includes/lookups/mitre_attack_ics_id_v16_0.txt +254 -0
- txt2stix/includes/lookups/mitre_attack_ics_name_v16_0.txt +293 -0
- txt2stix/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +159 -0
- txt2stix/includes/lookups/mitre_attack_mobile_id_v16_0.txt +277 -0
- txt2stix/includes/lookups/mitre_attack_mobile_name_v16_0.txt +296 -0
- txt2stix/includes/lookups/mitre_capec_id_v3_9.txt +559 -0
- txt2stix/includes/lookups/mitre_capec_name_v3_9.txt +560 -0
- txt2stix/includes/lookups/mitre_cwe_id_v4_15.txt +939 -0
- txt2stix/includes/lookups/mitre_cwe_name_v4_15.txt +939 -0
- txt2stix/includes/lookups/threat_actor.txt +1 -0
- txt2stix/includes/lookups/tld.txt +1422 -0
- txt2stix/includes/lookups/tool.txt +1 -0
- txt2stix/includes/tests/test_cases.yaml +695 -0
- txt2stix/indicator.py +860 -0
- txt2stix/lookups.py +68 -0
- txt2stix/pattern/__init__.py +13 -0
- txt2stix/pattern/extractors/__init__.py +0 -0
- txt2stix/pattern/extractors/base_extractor.py +167 -0
- txt2stix/pattern/extractors/card/README.md +34 -0
- txt2stix/pattern/extractors/card/__init__.py +15 -0
- txt2stix/pattern/extractors/card/amex_card_extractor.py +52 -0
- txt2stix/pattern/extractors/card/diners_card_extractor.py +47 -0
- txt2stix/pattern/extractors/card/discover_card_extractor.py +48 -0
- txt2stix/pattern/extractors/card/jcb_card_extractor.py +43 -0
- txt2stix/pattern/extractors/card/master_card_extractor.py +63 -0
- txt2stix/pattern/extractors/card/union_card_extractor.py +38 -0
- txt2stix/pattern/extractors/card/visa_card_extractor.py +46 -0
- txt2stix/pattern/extractors/crypto/__init__.py +3 -0
- txt2stix/pattern/extractors/crypto/btc_extractor.py +38 -0
- txt2stix/pattern/extractors/directory/__init__.py +10 -0
- txt2stix/pattern/extractors/directory/unix_directory_extractor.py +40 -0
- txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +47 -0
- txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/domain/__init__.py +8 -0
- txt2stix/pattern/extractors/domain/domain_extractor.py +39 -0
- txt2stix/pattern/extractors/domain/hostname_extractor.py +36 -0
- txt2stix/pattern/extractors/domain/sub_domain_extractor.py +49 -0
- txt2stix/pattern/extractors/hashes/__init__.py +16 -0
- txt2stix/pattern/extractors/hashes/md5_extractor.py +16 -0
- txt2stix/pattern/extractors/hashes/sha1_extractor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha224_extractor.py +18 -0
- txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +13 -0
- txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +15 -0
- txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +16 -0
- txt2stix/pattern/extractors/helper.py +64 -0
- txt2stix/pattern/extractors/ip/__init__.py +14 -0
- txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +49 -0
- txt2stix/pattern/extractors/ip/ipv4_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +42 -0
- txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv6_extractor.py +16 -0
- txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +46 -0
- txt2stix/pattern/extractors/others/__init__.py +22 -0
- txt2stix/pattern/extractors/others/asn_extractor.py +14 -0
- txt2stix/pattern/extractors/others/cpe_extractor.py +29 -0
- txt2stix/pattern/extractors/others/cve_extractor.py +14 -0
- txt2stix/pattern/extractors/others/email_extractor.py +21 -0
- txt2stix/pattern/extractors/others/filename_extractor.py +17 -0
- txt2stix/pattern/extractors/others/iban_extractor.py +15 -0
- txt2stix/pattern/extractors/others/mac_address_extractor.py +13 -0
- txt2stix/pattern/extractors/others/phonenumber_extractor.py +41 -0
- txt2stix/pattern/extractors/others/user_agent_extractor.py +20 -0
- txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +18 -0
- txt2stix/pattern/extractors/url/__init__.py +7 -0
- txt2stix/pattern/extractors/url/url_extractor.py +22 -0
- txt2stix/pattern/extractors/url/url_file_extractor.py +21 -0
- txt2stix/pattern/extractors/url/url_path_extractor.py +74 -0
- txt2stix/retriever.py +126 -0
- txt2stix/stix.py +1 -0
- txt2stix/txt2stix.py +336 -0
- txt2stix/utils.py +86 -0
- txt2stix-0.0.4.dist-info/METADATA +190 -0
- txt2stix-0.0.4.dist-info/RECORD +119 -0
- txt2stix-0.0.4.dist-info/WHEEL +4 -0
- txt2stix-0.0.4.dist-info/entry_points.txt +2 -0
- txt2stix-0.0.4.dist-info/licenses/LICENSE +202 -0
txt2stix/indicator.py
ADDED
@@ -0,0 +1,860 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from stix2.parsing import dict_to_stix2
|
5
|
+
from stix2 import HashConstant
|
6
|
+
from stix2.v21.vocab import HASHING_ALGORITHM
|
7
|
+
from stix2.patterns import _HASH_REGEX as HASHING_ALGORITHM_2
|
8
|
+
from ipaddress import ip_address
|
9
|
+
from pathlib import PurePosixPath, PureWindowsPath
|
10
|
+
from phonenumbers import geocoder
|
11
|
+
import logging
|
12
|
+
from stix2extensions.tools import creditcard2stix, crypto2stix
|
13
|
+
from typing import TYPE_CHECKING
|
14
|
+
|
15
|
+
import validators
|
16
|
+
|
17
|
+
from txt2stix.pattern.extractors.others.phonenumber_extractor import PhoneNumberExtractor
|
18
|
+
from txt2stix.utils import validate_file_mimetype, validate_reg_key
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
from .bundler import txt2stixBundler
|
22
|
+
|
23
|
+
# from schwifty import IBAN
|
24
|
+
|
25
|
+
from .common import MinorException
|
26
|
+
|
27
|
+
from .retriever import retrieve_stix_objects
|
28
|
+
|
29
|
+
logger = logging.getLogger("txt2stix.indicator")
|
30
|
+
|
31
|
+
|
32
|
+
class BadDataException(MinorException):
|
33
|
+
pass
|
34
|
+
|
35
|
+
|
36
|
+
def validate_email(email_addr):
|
37
|
+
_, domain_part = email_addr.rsplit("@", 1)
|
38
|
+
return validators.domain(domain_part, consider_tld=True) and validators.email(
|
39
|
+
email_addr
|
40
|
+
)
|
41
|
+
|
42
|
+
|
43
|
+
def find_hash_type(value, name):
|
44
|
+
for alg in HASHING_ALGORITHM + ["SHA-384"]:
|
45
|
+
if alg.upper() in name.upper():
|
46
|
+
return alg
|
47
|
+
for _, alg in HASHING_ALGORITHM_2.values():
|
48
|
+
try:
|
49
|
+
HashConstant(value, alg)
|
50
|
+
return alg
|
51
|
+
except:
|
52
|
+
pass
|
53
|
+
return
|
54
|
+
|
55
|
+
|
56
|
+
class ParseObservableError(Exception):
|
57
|
+
pass
|
58
|
+
|
59
|
+
|
60
|
+
def parse_path(pathstr):
|
61
|
+
path = PureWindowsPath(pathstr)
|
62
|
+
if pathstr == path.as_posix():
|
63
|
+
path = PurePosixPath(pathstr)
|
64
|
+
return path
|
65
|
+
|
66
|
+
|
67
|
+
def split_ip_port(ip_port: str):
|
68
|
+
ip, _, port = ip_port.rpartition(":")
|
69
|
+
ip = ip.replace("[", "").replace("]", "") # remove the [] enclosuure if it's ipv6
|
70
|
+
ip = ip_address(ip)
|
71
|
+
|
72
|
+
return ip.exploded, int(port)
|
73
|
+
|
74
|
+
def get_country_code(number: str) -> str:
|
75
|
+
phone = PhoneNumberExtractor.parse_phone_number(number)
|
76
|
+
if phone:
|
77
|
+
return geocoder.region_codes_for_country_code(phone.country_code)[0]
|
78
|
+
else:
|
79
|
+
raise BadDataException('bad phone number')
|
80
|
+
|
81
|
+
|
82
|
+
def get_iban_details(number) -> tuple[str, str]:
|
83
|
+
return number[:2], None
|
84
|
+
|
85
|
+
|
86
|
+
def build_observables(
|
87
|
+
bundler: txt2stixBundler, stix_mapping, indicator, extracted_value, extractor
|
88
|
+
):
|
89
|
+
try:
|
90
|
+
return _build_observables(
|
91
|
+
bundler, stix_mapping, indicator, extracted_value, extractor
|
92
|
+
)
|
93
|
+
except BadDataException:
|
94
|
+
raise
|
95
|
+
except BaseException as e:
|
96
|
+
raise BadDataException("unknown data error") from e
|
97
|
+
|
98
|
+
|
99
|
+
def _build_observables(
|
100
|
+
bundler: txt2stixBundler, stix_mapping, indicator, extracted_value, extractor
|
101
|
+
):
|
102
|
+
retrieved_objects = retrieve_stix_objects(stix_mapping, extracted_value)
|
103
|
+
if retrieved_objects:
|
104
|
+
return retrieved_objects, [sdo["id"] for sdo in retrieved_objects]
|
105
|
+
if retrieved_objects == []:
|
106
|
+
logger.warning(
|
107
|
+
f"could not find `{stix_mapping}` with id=`{extracted_value}` in remote"
|
108
|
+
)
|
109
|
+
raise BadDataException(
|
110
|
+
f"could not find `{stix_mapping}` with id=`{extracted_value}` in remote"
|
111
|
+
)
|
112
|
+
|
113
|
+
stix_objects = [indicator]
|
114
|
+
|
115
|
+
if stix_mapping == "ipv4-addr":
|
116
|
+
indicator["name"] = f"ipv4: {extracted_value}"
|
117
|
+
indicator["pattern"] = f"[ ipv4-addr:value = { repr(extracted_value) } ]"
|
118
|
+
|
119
|
+
stix_objects.append(
|
120
|
+
dict_to_stix2(
|
121
|
+
{"type": "ipv4-addr", "spec_version": "2.1", "value": extracted_value}
|
122
|
+
)
|
123
|
+
)
|
124
|
+
|
125
|
+
stix_objects.append(
|
126
|
+
bundler.new_relationship(
|
127
|
+
stix_objects[1].id,
|
128
|
+
indicator["id"],
|
129
|
+
"detected-using",
|
130
|
+
description=f"{stix_objects[1]['value']} can be detected in the STIX pattern {indicator['name']}",
|
131
|
+
external_references=indicator["external_references"],
|
132
|
+
)
|
133
|
+
)
|
134
|
+
|
135
|
+
if stix_mapping == "ipv4-addr-port":
|
136
|
+
extracted_value, port = split_ip_port(extracted_value)
|
137
|
+
indicator["name"] = f"ipv4: {extracted_value}"
|
138
|
+
indicator["pattern"] = f"[ ipv4-addr:value = { repr(extracted_value) } ]"
|
139
|
+
|
140
|
+
stix_objects.append(
|
141
|
+
dict_to_stix2(
|
142
|
+
{"type": "ipv4-addr", "spec_version": "2.1", "value": extracted_value}
|
143
|
+
)
|
144
|
+
)
|
145
|
+
id = stix_objects[-1].id
|
146
|
+
stix_objects.append(
|
147
|
+
bundler.new_relationship(
|
148
|
+
stix_objects[1].id,
|
149
|
+
indicator["id"],
|
150
|
+
"detected-using",
|
151
|
+
description=f"{stix_objects[1]['value']} can be detected in the STIX pattern {indicator['name']}",
|
152
|
+
)
|
153
|
+
)
|
154
|
+
|
155
|
+
stix_objects.append(
|
156
|
+
dict_to_stix2(
|
157
|
+
{
|
158
|
+
"type": "network-traffic",
|
159
|
+
"spec_version": "2.1",
|
160
|
+
"dst_ref": id,
|
161
|
+
"dst_port": port,
|
162
|
+
"protocols": ["ipv4"],
|
163
|
+
}
|
164
|
+
)
|
165
|
+
)
|
166
|
+
|
167
|
+
if stix_mapping == "ipv6-addr":
|
168
|
+
indicator["name"] = f"ipv6: {extracted_value}"
|
169
|
+
indicator["pattern"] = f"[ ipv6-addr:value = { repr(extracted_value) } ]"
|
170
|
+
|
171
|
+
stix_objects.append(
|
172
|
+
dict_to_stix2(
|
173
|
+
{"type": "ipv6-addr", "spec_version": "2.1", "value": extracted_value}
|
174
|
+
)
|
175
|
+
)
|
176
|
+
stix_objects.append(
|
177
|
+
bundler.new_relationship(
|
178
|
+
stix_objects[1].id,
|
179
|
+
indicator["id"],
|
180
|
+
"detected-using",
|
181
|
+
description=f"{stix_objects[1]['value']} can be detected in the STIX pattern {indicator['name']}",
|
182
|
+
external_references=indicator["external_references"],
|
183
|
+
)
|
184
|
+
)
|
185
|
+
|
186
|
+
if stix_mapping == "ipv6-addr-port":
|
187
|
+
extracted_value, port = split_ip_port(extracted_value)
|
188
|
+
indicator["name"] = f"ipv6: {extracted_value}"
|
189
|
+
indicator["pattern"] = f"[ ipv6-addr:value = { repr(extracted_value) } ]"
|
190
|
+
|
191
|
+
stix_objects.append(
|
192
|
+
dict_to_stix2(
|
193
|
+
{"type": "ipv6-addr", "spec_version": "2.1", "value": extracted_value}
|
194
|
+
)
|
195
|
+
)
|
196
|
+
id = stix_objects[-1].id
|
197
|
+
stix_objects.append(
|
198
|
+
bundler.new_relationship(
|
199
|
+
stix_objects[1].id,
|
200
|
+
indicator["id"],
|
201
|
+
"detected-using",
|
202
|
+
description=f"{stix_objects[1]['value']} can be detected in the STIX pattern {indicator['name']}",
|
203
|
+
external_references=indicator["external_references"],
|
204
|
+
)
|
205
|
+
)
|
206
|
+
stix_objects.append(
|
207
|
+
dict_to_stix2(
|
208
|
+
{
|
209
|
+
"type": "network-traffic",
|
210
|
+
"spec_version": "2.1",
|
211
|
+
"dst_ref": id,
|
212
|
+
"dst_port": port,
|
213
|
+
"protocols": ["ipv6"],
|
214
|
+
}
|
215
|
+
)
|
216
|
+
)
|
217
|
+
|
218
|
+
if stix_mapping == "domain-name":
|
219
|
+
q = validators.hostname(
|
220
|
+
extracted_value,
|
221
|
+
may_have_port=False,
|
222
|
+
skip_ipv6_addr=True,
|
223
|
+
skip_ipv4_addr=True,
|
224
|
+
)
|
225
|
+
if q != True:
|
226
|
+
r = validators.domain(extracted_value, consider_tld=True)
|
227
|
+
if r != True:
|
228
|
+
raise BadDataException("invalid domain or hostname") from r
|
229
|
+
indicator["name"] = f"Domain: {extracted_value}"
|
230
|
+
indicator["pattern"] = f"[ domain-name:value = { repr(extracted_value) } ]"
|
231
|
+
|
232
|
+
stix_objects.append(
|
233
|
+
dict_to_stix2(
|
234
|
+
{"type": "domain-name", "spec_version": "2.1", "value": extracted_value}
|
235
|
+
)
|
236
|
+
)
|
237
|
+
stix_objects.append(
|
238
|
+
bundler.new_relationship(
|
239
|
+
stix_objects[1].id,
|
240
|
+
indicator["id"],
|
241
|
+
"detected-using",
|
242
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
243
|
+
external_references=indicator["external_references"],
|
244
|
+
)
|
245
|
+
)
|
246
|
+
|
247
|
+
if stix_mapping == "url":
|
248
|
+
if (q := validators.url(extracted_value, simple_host=True)) and q != True:
|
249
|
+
raise BadDataException("invalid url") from q
|
250
|
+
# assert validators.url(extracted_value) == True
|
251
|
+
indicator["name"] = f"URL: {extracted_value}"
|
252
|
+
indicator["pattern"] = f"[ url:value = { repr(extracted_value) } ]"
|
253
|
+
|
254
|
+
stix_objects.append(
|
255
|
+
dict_to_stix2(
|
256
|
+
{"type": "url", "spec_version": "2.1", "value": extracted_value}
|
257
|
+
)
|
258
|
+
)
|
259
|
+
stix_objects.append(
|
260
|
+
bundler.new_relationship(
|
261
|
+
stix_objects[1].id,
|
262
|
+
indicator["id"],
|
263
|
+
"detected-using",
|
264
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
265
|
+
external_references=indicator["external_references"],
|
266
|
+
)
|
267
|
+
)
|
268
|
+
|
269
|
+
mimetype = validate_file_mimetype(extracted_value)
|
270
|
+
if stix_mapping in ["file", "directory-file"]:
|
271
|
+
if not mimetype:
|
272
|
+
raise BadDataException(f"invalid file extension in `{extracted_value}`")
|
273
|
+
file = dict_to_stix2(
|
274
|
+
{
|
275
|
+
"type": "file",
|
276
|
+
"spec_version": "2.1",
|
277
|
+
"name": extracted_value,
|
278
|
+
"mime_type": mimetype,
|
279
|
+
}
|
280
|
+
)
|
281
|
+
|
282
|
+
if stix_mapping == "file":
|
283
|
+
indicator["name"] = f"File name: {extracted_value}"
|
284
|
+
indicator["pattern"] = f"[ file:name = { repr(extracted_value) } ]"
|
285
|
+
|
286
|
+
stix_objects.append(file)
|
287
|
+
stix_objects.append(
|
288
|
+
bundler.new_relationship(
|
289
|
+
stix_objects[1].id,
|
290
|
+
indicator["id"],
|
291
|
+
"detected-using",
|
292
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
293
|
+
external_references=indicator["external_references"],
|
294
|
+
)
|
295
|
+
)
|
296
|
+
|
297
|
+
if stix_mapping == "directory":
|
298
|
+
indicator["name"] = f"Directory: {extracted_value}"
|
299
|
+
indicator["pattern"] = f"[ directory:path = { repr(extracted_value) } ]"
|
300
|
+
|
301
|
+
stix_objects.append(
|
302
|
+
dict_to_stix2(
|
303
|
+
{"type": "directory", "spec_version": "2.1", "path": extracted_value}
|
304
|
+
)
|
305
|
+
)
|
306
|
+
stix_objects.append(
|
307
|
+
bundler.new_relationship(
|
308
|
+
stix_objects[1].id,
|
309
|
+
indicator["id"],
|
310
|
+
"detected-using",
|
311
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
312
|
+
external_references=indicator["external_references"],
|
313
|
+
)
|
314
|
+
)
|
315
|
+
|
316
|
+
if stix_mapping == "directory-file":
|
317
|
+
path = parse_path(extracted_value)
|
318
|
+
extracted_value = str(path.parent)
|
319
|
+
indicator["name"] = f"Directory: {extracted_value}"
|
320
|
+
indicator["pattern"] = f"[ directory:path = { repr(extracted_value) } ]"
|
321
|
+
|
322
|
+
dir_obj = dict_to_stix2(
|
323
|
+
{"type": "directory", "spec_version": "2.1", "path": extracted_value}
|
324
|
+
)
|
325
|
+
stix_objects.append(dir_obj)
|
326
|
+
dir = stix_objects[-1]
|
327
|
+
stix_objects.append(
|
328
|
+
bundler.new_relationship(
|
329
|
+
stix_objects[1].id,
|
330
|
+
indicator["id"],
|
331
|
+
"detected-using",
|
332
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
333
|
+
external_references=indicator["external_references"],
|
334
|
+
)
|
335
|
+
)
|
336
|
+
|
337
|
+
stix_objects.append(file)
|
338
|
+
stix_objects.append(
|
339
|
+
bundler.new_relationship(
|
340
|
+
file.id,
|
341
|
+
dir.id,
|
342
|
+
"directory",
|
343
|
+
description=f"{extracted_value} directory {indicator['name']}",
|
344
|
+
external_references=indicator["external_references"],
|
345
|
+
)
|
346
|
+
)
|
347
|
+
return stix_objects, [dir_obj.id]
|
348
|
+
|
349
|
+
if stix_mapping == "file-hash":
|
350
|
+
file_hash_type = (
|
351
|
+
find_hash_type(extracted_value, extractor.name) or extractor.slug
|
352
|
+
)
|
353
|
+
# this needs to be updated, maybe put hash_type in notes?
|
354
|
+
indicator["name"] = f"{file_hash_type}: {extracted_value}"
|
355
|
+
indicator["pattern"] = (
|
356
|
+
f"[ file:hashes.'{file_hash_type}' = { repr(extracted_value) } ]"
|
357
|
+
)
|
358
|
+
stix_objects[0] = dict_to_stix2(indicator, allow_custom=True)
|
359
|
+
|
360
|
+
stix_objects.append(
|
361
|
+
dict_to_stix2(
|
362
|
+
{
|
363
|
+
"type": "file",
|
364
|
+
"spec_version": "2.1",
|
365
|
+
"hashes": {file_hash_type: extracted_value},
|
366
|
+
},
|
367
|
+
allow_custom=True,
|
368
|
+
)
|
369
|
+
)
|
370
|
+
stix_objects.append(
|
371
|
+
bundler.new_relationship(
|
372
|
+
stix_objects[1].id,
|
373
|
+
indicator["id"],
|
374
|
+
"detected-using",
|
375
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
376
|
+
external_references=indicator["external_references"],
|
377
|
+
)
|
378
|
+
)
|
379
|
+
|
380
|
+
if stix_mapping == "email-addr":
|
381
|
+
q = validate_email(extracted_value)
|
382
|
+
if q != True:
|
383
|
+
raise BadDataException("invalid email") from q
|
384
|
+
indicator["name"] = f"Email Address: {extracted_value}"
|
385
|
+
indicator["pattern"] = f"[ email-addr:value = { repr(extracted_value) } ]"
|
386
|
+
|
387
|
+
stix_objects.append(
|
388
|
+
dict_to_stix2(
|
389
|
+
{"type": "email-addr", "spec_version": "2.1", "value": extracted_value}
|
390
|
+
)
|
391
|
+
)
|
392
|
+
stix_objects.append(
|
393
|
+
bundler.new_relationship(
|
394
|
+
stix_objects[1].id,
|
395
|
+
indicator["id"],
|
396
|
+
"detected-using",
|
397
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
398
|
+
external_references=indicator["external_references"],
|
399
|
+
)
|
400
|
+
)
|
401
|
+
|
402
|
+
if stix_mapping == "mac-addr":
|
403
|
+
q = validators.mac_address(extracted_value)
|
404
|
+
if q != True:
|
405
|
+
raise BadDataException("invalid email") from q
|
406
|
+
indicator["name"] = f"MAC Address: {extracted_value}"
|
407
|
+
indicator["pattern"] = f"[ mac-addr:value = { repr(extracted_value) } ]"
|
408
|
+
|
409
|
+
stix_objects.append(
|
410
|
+
dict_to_stix2(
|
411
|
+
{"type": "mac-addr", "spec_version": "2.1", "value": extracted_value}
|
412
|
+
)
|
413
|
+
)
|
414
|
+
stix_objects.append(
|
415
|
+
bundler.new_relationship(
|
416
|
+
stix_objects[1].id,
|
417
|
+
indicator["id"],
|
418
|
+
"detected-using",
|
419
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
420
|
+
external_references=indicator["external_references"],
|
421
|
+
)
|
422
|
+
)
|
423
|
+
|
424
|
+
if stix_mapping == "windows-registry-key":
|
425
|
+
if not validate_reg_key(extracted_value):
|
426
|
+
raise BadDataException("Invalid registry key")
|
427
|
+
indicator["name"] = f"Windows Registry Key: {extracted_value}"
|
428
|
+
indicator["pattern"] = (
|
429
|
+
f"[ windows-registry-key:key = { repr(extracted_value) } ]"
|
430
|
+
)
|
431
|
+
|
432
|
+
stix_objects.append(
|
433
|
+
dict_to_stix2(
|
434
|
+
{
|
435
|
+
"type": "windows-registry-key",
|
436
|
+
"spec_version": "2.1",
|
437
|
+
"key": extracted_value,
|
438
|
+
}
|
439
|
+
)
|
440
|
+
)
|
441
|
+
stix_objects.append(
|
442
|
+
bundler.new_relationship(
|
443
|
+
stix_objects[1].id,
|
444
|
+
indicator["id"],
|
445
|
+
"detected-using",
|
446
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
447
|
+
external_references=indicator["external_references"],
|
448
|
+
)
|
449
|
+
)
|
450
|
+
|
451
|
+
if stix_mapping == "user-agent":
|
452
|
+
indicator["name"] = f"User Agent: {extracted_value}"
|
453
|
+
indicator["pattern"] = f"[ user-agent:string = { repr(extracted_value) } ]"
|
454
|
+
|
455
|
+
stix_objects.append(
|
456
|
+
dict_to_stix2(
|
457
|
+
{"type": "user-agent", "spec_version": "2.1", "string": extracted_value}
|
458
|
+
)
|
459
|
+
)
|
460
|
+
stix_objects.append(
|
461
|
+
bundler.new_relationship(
|
462
|
+
stix_objects[1].id,
|
463
|
+
indicator["id"],
|
464
|
+
"detected-using",
|
465
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
466
|
+
external_references=indicator["external_references"],
|
467
|
+
)
|
468
|
+
)
|
469
|
+
|
470
|
+
if stix_mapping == "autonomous-system":
|
471
|
+
match = re.search(r"\d+", extracted_value)
|
472
|
+
if not match:
|
473
|
+
raise BadDataException(
|
474
|
+
f"AS Number must contain a number, got `{extracted_value}`"
|
475
|
+
)
|
476
|
+
extracted_value = int(match.group(0))
|
477
|
+
assert extracted_value >= 1 and extracted_value <= 65535, "AS Number must be between 1 and 65535"
|
478
|
+
indicator["name"] = f"AS{extracted_value}"
|
479
|
+
indicator["pattern"] = (
|
480
|
+
f"[ autonomous-system:number = { repr(extracted_value) } ]"
|
481
|
+
)
|
482
|
+
|
483
|
+
stix_objects.append(
|
484
|
+
dict_to_stix2(
|
485
|
+
{
|
486
|
+
"type": "autonomous-system",
|
487
|
+
"spec_version": "2.1",
|
488
|
+
"number": extracted_value,
|
489
|
+
}
|
490
|
+
)
|
491
|
+
)
|
492
|
+
stix_objects.append(
|
493
|
+
bundler.new_relationship(
|
494
|
+
stix_objects[1].id,
|
495
|
+
indicator["id"],
|
496
|
+
"detected-using",
|
497
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
498
|
+
external_references=indicator["external_references"],
|
499
|
+
)
|
500
|
+
)
|
501
|
+
|
502
|
+
if stix_mapping == "cryptocurrency-wallet":
|
503
|
+
# ASSUMPTION: always BTC
|
504
|
+
# TODO: parse crypto types
|
505
|
+
|
506
|
+
currency_symbol = "BTC"
|
507
|
+
btc2stix = crypto2stix.BTC2Stix()
|
508
|
+
indicator["name"] = f"{currency_symbol} Wallet: {extracted_value}"
|
509
|
+
indicator["pattern"] = (
|
510
|
+
f"[ cryptocurrency-wallet:address = { repr(extracted_value) } ]"
|
511
|
+
)
|
512
|
+
wallet_obj, *other_objects = btc2stix.process_wallet(
|
513
|
+
extracted_value, wallet_only=True, transactions_only=False
|
514
|
+
)
|
515
|
+
|
516
|
+
stix_objects.append(wallet_obj)
|
517
|
+
stix_objects.extend(other_objects)
|
518
|
+
stix_objects.append(
|
519
|
+
bundler.new_relationship(
|
520
|
+
wallet_obj.id,
|
521
|
+
indicator["id"],
|
522
|
+
"detected-using",
|
523
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
524
|
+
external_references=indicator["external_references"],
|
525
|
+
)
|
526
|
+
)
|
527
|
+
return stix_objects, [wallet_obj.id]
|
528
|
+
|
529
|
+
if stix_mapping == "cryptocurrency-transaction":
|
530
|
+
# ASSUMPTION: always BTC
|
531
|
+
# TODO: do something about this
|
532
|
+
currency_symbol = "BTC"
|
533
|
+
btc2stix = crypto2stix.BTC2Stix()
|
534
|
+
txn_object, *other_objects = btc2stix.process_transaction(extracted_value)
|
535
|
+
indicator["name"] = f"{currency_symbol} Transaction: {extracted_value}"
|
536
|
+
indicator["pattern"] = (
|
537
|
+
f"[ cryptocurrency-transaction:hash = { repr(extracted_value) } ]"
|
538
|
+
)
|
539
|
+
|
540
|
+
stix_objects.append(txn_object)
|
541
|
+
stix_objects.extend(other_objects)
|
542
|
+
stix_objects.append(
|
543
|
+
bundler.new_relationship(
|
544
|
+
txn_object.id,
|
545
|
+
indicator["id"],
|
546
|
+
"detected-using",
|
547
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
548
|
+
external_references=indicator["external_references"],
|
549
|
+
)
|
550
|
+
)
|
551
|
+
|
552
|
+
return stix_objects, [txn_object.id]
|
553
|
+
|
554
|
+
if stix_mapping == "cryptocurrency-wallet-with-transaction":
|
555
|
+
# ASSUMPTION: always BTC
|
556
|
+
# TODO: parse crypto types
|
557
|
+
|
558
|
+
currency_symbol = "BTC"
|
559
|
+
btc2stix = crypto2stix.BTC2Stix()
|
560
|
+
indicator["name"] = f"{currency_symbol} Wallet: {extracted_value}"
|
561
|
+
indicator["pattern"] = (
|
562
|
+
f"[ cryptocurrency-wallet:address = { repr(extracted_value) } ]"
|
563
|
+
)
|
564
|
+
wallet_obj, *other_objects = btc2stix.process_wallet(
|
565
|
+
extracted_value, wallet_only=False, transactions_only=True
|
566
|
+
)
|
567
|
+
|
568
|
+
stix_objects.append(wallet_obj)
|
569
|
+
stix_objects.extend(other_objects)
|
570
|
+
stix_objects.append(
|
571
|
+
bundler.new_relationship(
|
572
|
+
wallet_obj.id,
|
573
|
+
indicator["id"],
|
574
|
+
"detected-using",
|
575
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
576
|
+
external_references=indicator["external_references"],
|
577
|
+
)
|
578
|
+
)
|
579
|
+
return stix_objects, [wallet_obj.id]
|
580
|
+
if stix_mapping == "bank-card":
|
581
|
+
# TODO
|
582
|
+
card_type = extractor.name
|
583
|
+
if "Bank Card" in extractor.name:
|
584
|
+
card_type = extractor.name.split("Bank Card ")[1]
|
585
|
+
|
586
|
+
extracted_value = extracted_value.replace("-", "").replace(" ", "")
|
587
|
+
indicator["id"] = bundler.indicator_id_from_value(extracted_value, stix_mapping)
|
588
|
+
card_object, *other_objects = creditcard2stix.create_objects(
|
589
|
+
{"card_number": extracted_value}, os.getenv("BIN_LIST_API_KEY", "")
|
590
|
+
)
|
591
|
+
stix_objects.append(card_object)
|
592
|
+
stix_objects.extend(other_objects)
|
593
|
+
|
594
|
+
if card_object.get("scheme"):
|
595
|
+
card_type = card_object["scheme"]
|
596
|
+
|
597
|
+
indicator["name"] = f"{card_type}: {extracted_value}"
|
598
|
+
indicator["pattern"] = f"[ bank-card:number = { repr(extracted_value) } ]"
|
599
|
+
|
600
|
+
stix_objects.append(
|
601
|
+
bundler.new_relationship(
|
602
|
+
card_object["id"],
|
603
|
+
indicator["id"],
|
604
|
+
"detected-using",
|
605
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
606
|
+
external_references=indicator["external_references"],
|
607
|
+
)
|
608
|
+
)
|
609
|
+
return stix_objects, [card_object["id"]]
|
610
|
+
|
611
|
+
if stix_mapping == "bank-account":
|
612
|
+
q = validators.iban(extracted_value)
|
613
|
+
if q != True:
|
614
|
+
raise BadDataException('invalid iban number') from q
|
615
|
+
indicator["name"] = f"Bank account: {extracted_value}"
|
616
|
+
indicator["pattern"] = (
|
617
|
+
f"[ bank-account:iban_number = { repr(extracted_value) } ]"
|
618
|
+
)
|
619
|
+
extracted_value = extracted_value.replace("-", "").replace(" ", "")
|
620
|
+
|
621
|
+
country_code, bank_code = get_iban_details(extracted_value)
|
622
|
+
|
623
|
+
stix_objects.append(
|
624
|
+
dict_to_stix2(
|
625
|
+
{
|
626
|
+
"type": "bank-account",
|
627
|
+
"spec_version": "2.1",
|
628
|
+
"iban_number": extracted_value,
|
629
|
+
"country": country_code,
|
630
|
+
}
|
631
|
+
)
|
632
|
+
)
|
633
|
+
stix_objects.append(
|
634
|
+
bundler.new_relationship(
|
635
|
+
stix_objects[1].id,
|
636
|
+
indicator["id"],
|
637
|
+
"detected-using",
|
638
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
639
|
+
external_references=indicator["external_references"],
|
640
|
+
)
|
641
|
+
)
|
642
|
+
|
643
|
+
if stix_mapping == "phone-number":
|
644
|
+
country_code = get_country_code(extracted_value)
|
645
|
+
if not country_code:
|
646
|
+
raise BadDataException('parse phone number failed')
|
647
|
+
indicator["name"] = f"Phone Number: {extracted_value}"
|
648
|
+
indicator["pattern"] = f"[ phone-number:number = { repr(extracted_value) }"
|
649
|
+
if country_code:
|
650
|
+
indicator["pattern"] += f" AND phone-number:country = '{country_code}' "
|
651
|
+
indicator["pattern"] += " ]"
|
652
|
+
|
653
|
+
stix_objects.append(
|
654
|
+
dict_to_stix2(
|
655
|
+
{
|
656
|
+
"type": "phone-number",
|
657
|
+
"spec_version": "2.1",
|
658
|
+
"number": extracted_value,
|
659
|
+
"country": country_code,
|
660
|
+
}
|
661
|
+
)
|
662
|
+
)
|
663
|
+
stix_objects.append(
|
664
|
+
bundler.new_relationship(
|
665
|
+
stix_objects[1].id,
|
666
|
+
indicator["id"],
|
667
|
+
"detected-using",
|
668
|
+
description=f"{extracted_value} can be detected in the STIX pattern {indicator['name']}",
|
669
|
+
external_references=indicator["external_references"],
|
670
|
+
)
|
671
|
+
)
|
672
|
+
|
673
|
+
if stix_mapping == "attack-pattern":
|
674
|
+
stix_objects = [
|
675
|
+
dict_to_stix2(
|
676
|
+
{
|
677
|
+
"type": "attack-pattern",
|
678
|
+
"spec_version": "2.1",
|
679
|
+
"created_by_ref": indicator["created_by_ref"],
|
680
|
+
"created": indicator["created"],
|
681
|
+
"modified": indicator["modified"],
|
682
|
+
"name": extracted_value,
|
683
|
+
"external_references": indicator["external_references"],
|
684
|
+
}
|
685
|
+
)
|
686
|
+
]
|
687
|
+
|
688
|
+
if stix_mapping == "campaign":
|
689
|
+
stix_objects = [
|
690
|
+
dict_to_stix2(
|
691
|
+
{
|
692
|
+
"type": "campaign",
|
693
|
+
"spec_version": "2.1",
|
694
|
+
"created_by_ref": indicator["created_by_ref"],
|
695
|
+
"created": indicator["created"],
|
696
|
+
"modified": indicator["modified"],
|
697
|
+
"name": extracted_value,
|
698
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
699
|
+
"external_references": indicator["external_references"],
|
700
|
+
}
|
701
|
+
)
|
702
|
+
]
|
703
|
+
|
704
|
+
if stix_mapping == "course-of-action":
|
705
|
+
stix_objects = [
|
706
|
+
dict_to_stix2(
|
707
|
+
{
|
708
|
+
"type": "course-of-action",
|
709
|
+
"spec_version": "2.1",
|
710
|
+
"created_by_ref": indicator["created_by_ref"],
|
711
|
+
"created": indicator["created"],
|
712
|
+
"modified": indicator["modified"],
|
713
|
+
"name": extracted_value,
|
714
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
715
|
+
"external_references": indicator["external_references"],
|
716
|
+
}
|
717
|
+
)
|
718
|
+
]
|
719
|
+
|
720
|
+
if stix_mapping == "infrastructure":
|
721
|
+
stix_objects = [
|
722
|
+
dict_to_stix2(
|
723
|
+
{
|
724
|
+
"type": "infrastructure",
|
725
|
+
"spec_version": "2.1",
|
726
|
+
"created_by_ref": indicator["created_by_ref"],
|
727
|
+
"created": indicator["created"],
|
728
|
+
"modified": indicator["modified"],
|
729
|
+
"name": extracted_value,
|
730
|
+
"infrastructure_types": ["unknown"],
|
731
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
732
|
+
"external_references": indicator["external_references"],
|
733
|
+
}
|
734
|
+
)
|
735
|
+
]
|
736
|
+
|
737
|
+
if stix_mapping == "intrusion-set":
|
738
|
+
stix_objects = [
|
739
|
+
dict_to_stix2(
|
740
|
+
{
|
741
|
+
"type": "intrusion-set",
|
742
|
+
"spec_version": "2.1",
|
743
|
+
"created_by_ref": indicator["created_by_ref"],
|
744
|
+
"created": indicator["created"],
|
745
|
+
"modified": indicator["modified"],
|
746
|
+
"name": extracted_value,
|
747
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
748
|
+
"external_references": indicator["external_references"],
|
749
|
+
}
|
750
|
+
)
|
751
|
+
]
|
752
|
+
|
753
|
+
if stix_mapping == "malware":
|
754
|
+
stix_objects = [
|
755
|
+
dict_to_stix2(
|
756
|
+
{
|
757
|
+
"type": "malware",
|
758
|
+
"spec_version": "2.1",
|
759
|
+
"created_by_ref": indicator["created_by_ref"],
|
760
|
+
"created": indicator["created"],
|
761
|
+
"modified": indicator["modified"],
|
762
|
+
"name": extracted_value,
|
763
|
+
"malware_types": ["unknown"],
|
764
|
+
"is_family": True,
|
765
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
766
|
+
"external_references": indicator["external_references"],
|
767
|
+
}
|
768
|
+
)
|
769
|
+
]
|
770
|
+
|
771
|
+
if stix_mapping == "threat-actor":
|
772
|
+
stix_objects = [
|
773
|
+
dict_to_stix2(
|
774
|
+
{
|
775
|
+
"type": "threat-actor",
|
776
|
+
"spec_version": "2.1",
|
777
|
+
"created_by_ref": indicator["created_by_ref"],
|
778
|
+
"created": indicator["created"],
|
779
|
+
"modified": indicator["modified"],
|
780
|
+
"name": extracted_value,
|
781
|
+
"threat_actor_types": "unknown",
|
782
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
783
|
+
"external_references": indicator["external_references"],
|
784
|
+
}
|
785
|
+
)
|
786
|
+
]
|
787
|
+
|
788
|
+
if stix_mapping == "tool":
|
789
|
+
stix_objects = [
|
790
|
+
dict_to_stix2(
|
791
|
+
{
|
792
|
+
"type": "tool",
|
793
|
+
"spec_version": "2.1",
|
794
|
+
"created_by_ref": indicator["created_by_ref"],
|
795
|
+
"created": indicator["created"],
|
796
|
+
"modified": indicator["modified"],
|
797
|
+
"name": extracted_value,
|
798
|
+
"tool_types": "unknown",
|
799
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
800
|
+
"external_references": indicator["external_references"],
|
801
|
+
}
|
802
|
+
)
|
803
|
+
]
|
804
|
+
|
805
|
+
if stix_mapping == "identity":
|
806
|
+
stix_objects = [
|
807
|
+
dict_to_stix2(
|
808
|
+
{
|
809
|
+
"type": "identity",
|
810
|
+
"spec_version": "2.1",
|
811
|
+
"created_by_ref": indicator["created_by_ref"],
|
812
|
+
"created": indicator["created"],
|
813
|
+
"modified": indicator["modified"],
|
814
|
+
"name": extracted_value,
|
815
|
+
"identity_class": "unspecified",
|
816
|
+
"object_marking_refs": indicator["object_marking_refs"],
|
817
|
+
"external_references": indicator["external_references"],
|
818
|
+
}
|
819
|
+
)
|
820
|
+
]
|
821
|
+
|
822
|
+
RELATABLE = [
|
823
|
+
"ipv4-addr",
|
824
|
+
"ipv6-addr",
|
825
|
+
"domain-name",
|
826
|
+
"url",
|
827
|
+
"file",
|
828
|
+
"directory",
|
829
|
+
"directory",
|
830
|
+
"file",
|
831
|
+
"email-addr",
|
832
|
+
"mac-addr",
|
833
|
+
"windows-registry-key",
|
834
|
+
"autonomous-system",
|
835
|
+
"user-agent",
|
836
|
+
"cryptocurrency-wallet",
|
837
|
+
"cryptocurrency-transaction",
|
838
|
+
"bank-card",
|
839
|
+
"bank-account",
|
840
|
+
"phone-number",
|
841
|
+
"attack-pattern",
|
842
|
+
"campaign",
|
843
|
+
"course-of-action",
|
844
|
+
"infrastructure",
|
845
|
+
"intrusion-set",
|
846
|
+
"malware",
|
847
|
+
"threat-actor",
|
848
|
+
"tool",
|
849
|
+
"identity",
|
850
|
+
"location",
|
851
|
+
]
|
852
|
+
relationships = []
|
853
|
+
for i, indicator in enumerate(stix_objects):
|
854
|
+
if isinstance(indicator, dict):
|
855
|
+
indicator = dict_to_stix2(indicator)
|
856
|
+
stix_objects[i] = indicator
|
857
|
+
if indicator.type in RELATABLE:
|
858
|
+
relationships.append(indicator.id)
|
859
|
+
|
860
|
+
return stix_objects, relationships
|