txt2stix 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2stix/__init__.py +33 -0
- txt2stix/ai_extractor/__init__.py +15 -0
- txt2stix/ai_extractor/anthropic.py +12 -0
- txt2stix/ai_extractor/base.py +87 -0
- txt2stix/ai_extractor/deepseek.py +19 -0
- txt2stix/ai_extractor/gemini.py +18 -0
- txt2stix/ai_extractor/openai.py +15 -0
- txt2stix/ai_extractor/openrouter.py +20 -0
- txt2stix/ai_extractor/prompts.py +164 -0
- txt2stix/ai_extractor/utils.py +85 -0
- txt2stix/attack_flow.py +101 -0
- txt2stix/bundler.py +428 -0
- txt2stix/common.py +23 -0
- txt2stix/extractions.py +59 -0
- txt2stix/includes/__init__.py +0 -0
- txt2stix/includes/extractions/ai/config.yaml +1023 -0
- txt2stix/includes/extractions/lookup/config.yaml +393 -0
- txt2stix/includes/extractions/pattern/config.yaml +609 -0
- txt2stix/includes/helpers/mimetype_filename_extension_list.csv +936 -0
- txt2stix/includes/helpers/stix_relationship_types.txt +41 -0
- txt2stix/includes/helpers/tlds.txt +1446 -0
- txt2stix/includes/helpers/windows_registry_key_prefix.txt +12 -0
- txt2stix/includes/lookups/_README.md +11 -0
- txt2stix/includes/lookups/_generate_lookups.py +247 -0
- txt2stix/includes/lookups/attack_pattern.txt +1 -0
- txt2stix/includes/lookups/campaign.txt +1 -0
- txt2stix/includes/lookups/country_iso3166_alpha2.txt +249 -0
- txt2stix/includes/lookups/course_of_action.txt +1 -0
- txt2stix/includes/lookups/disarm_id_v1_5.txt +345 -0
- txt2stix/includes/lookups/disarm_name_v1_5.txt +347 -0
- txt2stix/includes/lookups/extensions.txt +78 -0
- txt2stix/includes/lookups/identity.txt +1 -0
- txt2stix/includes/lookups/infrastructure.txt +1 -0
- txt2stix/includes/lookups/intrusion_set.txt +1 -0
- txt2stix/includes/lookups/malware.txt +2 -0
- txt2stix/includes/lookups/mitre_atlas_id_v4_5_2.txt +116 -0
- txt2stix/includes/lookups/mitre_atlas_name_v4_5_2.txt +117 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +1502 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +1656 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +1765 -0
- txt2stix/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +141 -0
- txt2stix/includes/lookups/mitre_attack_ics_id_v16_0.txt +254 -0
- txt2stix/includes/lookups/mitre_attack_ics_name_v16_0.txt +293 -0
- txt2stix/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +159 -0
- txt2stix/includes/lookups/mitre_attack_mobile_id_v16_0.txt +277 -0
- txt2stix/includes/lookups/mitre_attack_mobile_name_v16_0.txt +296 -0
- txt2stix/includes/lookups/mitre_capec_id_v3_9.txt +559 -0
- txt2stix/includes/lookups/mitre_capec_name_v3_9.txt +560 -0
- txt2stix/includes/lookups/mitre_cwe_id_v4_15.txt +939 -0
- txt2stix/includes/lookups/mitre_cwe_name_v4_15.txt +939 -0
- txt2stix/includes/lookups/threat_actor.txt +1 -0
- txt2stix/includes/lookups/tld.txt +1422 -0
- txt2stix/includes/lookups/tool.txt +1 -0
- txt2stix/includes/tests/test_cases.yaml +695 -0
- txt2stix/indicator.py +860 -0
- txt2stix/lookups.py +68 -0
- txt2stix/pattern/__init__.py +13 -0
- txt2stix/pattern/extractors/__init__.py +0 -0
- txt2stix/pattern/extractors/base_extractor.py +167 -0
- txt2stix/pattern/extractors/card/README.md +34 -0
- txt2stix/pattern/extractors/card/__init__.py +15 -0
- txt2stix/pattern/extractors/card/amex_card_extractor.py +52 -0
- txt2stix/pattern/extractors/card/diners_card_extractor.py +47 -0
- txt2stix/pattern/extractors/card/discover_card_extractor.py +48 -0
- txt2stix/pattern/extractors/card/jcb_card_extractor.py +43 -0
- txt2stix/pattern/extractors/card/master_card_extractor.py +63 -0
- txt2stix/pattern/extractors/card/union_card_extractor.py +38 -0
- txt2stix/pattern/extractors/card/visa_card_extractor.py +46 -0
- txt2stix/pattern/extractors/crypto/__init__.py +3 -0
- txt2stix/pattern/extractors/crypto/btc_extractor.py +38 -0
- txt2stix/pattern/extractors/directory/__init__.py +10 -0
- txt2stix/pattern/extractors/directory/unix_directory_extractor.py +40 -0
- txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +47 -0
- txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/domain/__init__.py +8 -0
- txt2stix/pattern/extractors/domain/domain_extractor.py +39 -0
- txt2stix/pattern/extractors/domain/hostname_extractor.py +36 -0
- txt2stix/pattern/extractors/domain/sub_domain_extractor.py +49 -0
- txt2stix/pattern/extractors/hashes/__init__.py +16 -0
- txt2stix/pattern/extractors/hashes/md5_extractor.py +16 -0
- txt2stix/pattern/extractors/hashes/sha1_extractor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha224_extractor.py +18 -0
- txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +13 -0
- txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +15 -0
- txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +16 -0
- txt2stix/pattern/extractors/helper.py +64 -0
- txt2stix/pattern/extractors/ip/__init__.py +14 -0
- txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +49 -0
- txt2stix/pattern/extractors/ip/ipv4_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +42 -0
- txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv6_extractor.py +16 -0
- txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +46 -0
- txt2stix/pattern/extractors/others/__init__.py +22 -0
- txt2stix/pattern/extractors/others/asn_extractor.py +14 -0
- txt2stix/pattern/extractors/others/cpe_extractor.py +29 -0
- txt2stix/pattern/extractors/others/cve_extractor.py +14 -0
- txt2stix/pattern/extractors/others/email_extractor.py +21 -0
- txt2stix/pattern/extractors/others/filename_extractor.py +17 -0
- txt2stix/pattern/extractors/others/iban_extractor.py +15 -0
- txt2stix/pattern/extractors/others/mac_address_extractor.py +13 -0
- txt2stix/pattern/extractors/others/phonenumber_extractor.py +41 -0
- txt2stix/pattern/extractors/others/user_agent_extractor.py +20 -0
- txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +18 -0
- txt2stix/pattern/extractors/url/__init__.py +7 -0
- txt2stix/pattern/extractors/url/url_extractor.py +22 -0
- txt2stix/pattern/extractors/url/url_file_extractor.py +21 -0
- txt2stix/pattern/extractors/url/url_path_extractor.py +74 -0
- txt2stix/retriever.py +126 -0
- txt2stix/stix.py +1 -0
- txt2stix/txt2stix.py +336 -0
- txt2stix/utils.py +86 -0
- txt2stix-0.0.4.dist-info/METADATA +190 -0
- txt2stix-0.0.4.dist-info/RECORD +119 -0
- txt2stix-0.0.4.dist-info/WHEEL +4 -0
- txt2stix-0.0.4.dist-info/entry_points.txt +2 -0
- txt2stix-0.0.4.dist-info/licenses/LICENSE +202 -0
txt2stix/bundler.py
ADDED
@@ -0,0 +1,428 @@
|
|
1
|
+
import enum
|
2
|
+
import logging
|
3
|
+
from stix2 import (
|
4
|
+
Report,
|
5
|
+
Identity,
|
6
|
+
MarkingDefinition,
|
7
|
+
Relationship,
|
8
|
+
Bundle,
|
9
|
+
)
|
10
|
+
from stix2.parsing import dict_to_stix2, parse as parse_stix
|
11
|
+
from stix2.serialization import serialize
|
12
|
+
import hashlib
|
13
|
+
from stix2 import (
|
14
|
+
v21,
|
15
|
+
)
|
16
|
+
import requests
|
17
|
+
|
18
|
+
|
19
|
+
from .common import UUID_NAMESPACE, MinorException
|
20
|
+
from datetime import UTC, datetime as dt
|
21
|
+
import uuid
|
22
|
+
import json
|
23
|
+
from .indicator import build_observables
|
24
|
+
|
25
|
+
|
26
|
+
logger = logging.getLogger("txt2stix.stix")
|
27
|
+
|
28
|
+
|
29
|
+
class TLP_LEVEL(enum.Enum):
|
30
|
+
CLEAR = MarkingDefinition(
|
31
|
+
spec_version="2.1",
|
32
|
+
id="marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
33
|
+
created="2022-10-01T00:00:00.000Z",
|
34
|
+
definition_type="TLP:CLEAR",
|
35
|
+
extensions={
|
36
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
37
|
+
"extension_type": "property-extension",
|
38
|
+
"tlp_2_0": "clear",
|
39
|
+
}
|
40
|
+
},
|
41
|
+
)
|
42
|
+
GREEN = MarkingDefinition(
|
43
|
+
spec_version="2.1",
|
44
|
+
id="marking-definition--bab4a63c-aed9-4cf5-a766-dfca5abac2bb",
|
45
|
+
created="2022-10-01T00:00:00.000Z",
|
46
|
+
definition_type="TLP:GREEN",
|
47
|
+
extensions={
|
48
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
49
|
+
"extension_type": "property-extension",
|
50
|
+
"tlp_2_0": "green",
|
51
|
+
}
|
52
|
+
},
|
53
|
+
)
|
54
|
+
AMBER = MarkingDefinition(
|
55
|
+
spec_version="2.1",
|
56
|
+
id="marking-definition--55d920b0-5e8b-4f79-9ee9-91f868d9b421",
|
57
|
+
created="2022-10-01T00:00:00.000Z",
|
58
|
+
definition_type="TLP:AMBER",
|
59
|
+
extensions={
|
60
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
61
|
+
"extension_type": "property-extension",
|
62
|
+
"tlp_2_0": "amber",
|
63
|
+
}
|
64
|
+
},
|
65
|
+
)
|
66
|
+
AMBER_STRICT = MarkingDefinition(
|
67
|
+
spec_version="2.1",
|
68
|
+
id="marking-definition--939a9414-2ddd-4d32-a0cd-375ea402b003",
|
69
|
+
created="2022-10-01T00:00:00.000Z",
|
70
|
+
definition_type="TLP:AMBER+STRICT",
|
71
|
+
extensions={
|
72
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
73
|
+
"extension_type": "property-extension",
|
74
|
+
"tlp_2_0": "amber+strict",
|
75
|
+
}
|
76
|
+
},
|
77
|
+
)
|
78
|
+
RED = MarkingDefinition(
|
79
|
+
spec_version="2.1",
|
80
|
+
id="marking-definition--e828b379-4e03-4974-9ac4-e53a884c97c1",
|
81
|
+
created="2022-10-01T00:00:00.000Z",
|
82
|
+
definition_type="TLP:RED",
|
83
|
+
extensions={
|
84
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
85
|
+
"extension_type": "property-extension",
|
86
|
+
"tlp_2_0": "red",
|
87
|
+
}
|
88
|
+
},
|
89
|
+
)
|
90
|
+
|
91
|
+
@classmethod
|
92
|
+
def levels(cls):
|
93
|
+
return dict(
|
94
|
+
clear=cls.CLEAR,
|
95
|
+
green=cls.GREEN,
|
96
|
+
amber=cls.AMBER,
|
97
|
+
amber_strict=cls.AMBER_STRICT,
|
98
|
+
red=cls.RED,
|
99
|
+
)
|
100
|
+
|
101
|
+
@classmethod
|
102
|
+
def values(cls):
|
103
|
+
return [
|
104
|
+
cls.CLEAR.value,
|
105
|
+
cls.GREEN.value,
|
106
|
+
cls.AMBER.value,
|
107
|
+
cls.AMBER_STRICT.value,
|
108
|
+
cls.RED.value,
|
109
|
+
]
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def get(cls, level):
|
113
|
+
if isinstance(level, cls):
|
114
|
+
return level
|
115
|
+
return cls.levels()[level]
|
116
|
+
|
117
|
+
@property
|
118
|
+
def name(self):
|
119
|
+
return super().name.lower()
|
120
|
+
|
121
|
+
|
122
|
+
class txt2stixBundler:
|
123
|
+
EXTENSION_MAPPING = {
|
124
|
+
"user-agent": None,
|
125
|
+
"cryptocurrency-wallet": None,
|
126
|
+
"cryptocurrency-transaction": None,
|
127
|
+
"bank-card": None,
|
128
|
+
"bank-account": None,
|
129
|
+
"phone-number": None,
|
130
|
+
"weakness": None,
|
131
|
+
}
|
132
|
+
EXTENSION_DEFINITION_BASE_URL = "https://raw.githubusercontent.com/muchdogesec/stix2extensions/main/extension-definitions"
|
133
|
+
report = None
|
134
|
+
identity = None
|
135
|
+
object_marking_refs = []
|
136
|
+
uuid = None
|
137
|
+
id_map = dict()
|
138
|
+
id_value_map = dict()
|
139
|
+
_flow_objects = []
|
140
|
+
# this identity is https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/identity/txt2stix.json
|
141
|
+
default_identity = Identity(
|
142
|
+
type="identity",
|
143
|
+
spec_version="2.1",
|
144
|
+
id="identity--f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5",
|
145
|
+
created_by_ref="identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
146
|
+
created="2020-01-01T00:00:00.000Z",
|
147
|
+
modified="2020-01-01T00:00:00.000Z",
|
148
|
+
name="txt2stix",
|
149
|
+
description="https://github.com/muchdogsec/txt2stix",
|
150
|
+
identity_class="system",
|
151
|
+
sectors=["technology"],
|
152
|
+
contact_information="https://www.dogesec.com/contact/",
|
153
|
+
object_marking_refs=[
|
154
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
155
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
156
|
+
],
|
157
|
+
)
|
158
|
+
# this marking-definition is https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/marking-definition/txt2stix.json
|
159
|
+
default_marking = MarkingDefinition(
|
160
|
+
type="marking-definition",
|
161
|
+
spec_version="2.1",
|
162
|
+
id="marking-definition--f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5",
|
163
|
+
created_by_ref="identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
164
|
+
created="2020-01-01T00:00:00.000Z",
|
165
|
+
definition_type="statement",
|
166
|
+
definition={
|
167
|
+
"statement": "This object was created using: https://github.com/muchdogesec/txt2stix"
|
168
|
+
},
|
169
|
+
object_marking_refs=[
|
170
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
171
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
172
|
+
],
|
173
|
+
)
|
174
|
+
|
175
|
+
def __init__(
|
176
|
+
self,
|
177
|
+
name,
|
178
|
+
identity,
|
179
|
+
tlp_level,
|
180
|
+
description,
|
181
|
+
confidence,
|
182
|
+
extractors,
|
183
|
+
labels,
|
184
|
+
report_id=None,
|
185
|
+
created=None,
|
186
|
+
external_references=None,
|
187
|
+
modified=None,
|
188
|
+
) -> None:
|
189
|
+
self.observables_processed = 0
|
190
|
+
self.created = created or dt.now(tz=UTC)
|
191
|
+
self.all_extractors = extractors
|
192
|
+
self.identity = identity or self.default_identity
|
193
|
+
self.tlp_level = TLP_LEVEL.get(tlp_level)
|
194
|
+
if report_id:
|
195
|
+
self.uuid = report_id
|
196
|
+
else:
|
197
|
+
self.uuid = str(
|
198
|
+
uuid.uuid5(UUID_NAMESPACE, f"{self.identity.id}+{self.created}+{name}")
|
199
|
+
)
|
200
|
+
external_references = external_references or []
|
201
|
+
labels = labels or []
|
202
|
+
labels.append('placeholder_label')
|
203
|
+
|
204
|
+
self.job_id = f"report--{self.uuid}"
|
205
|
+
self.report_md5 = hashlib.md5(description.encode()).hexdigest()
|
206
|
+
self.report = Report(
|
207
|
+
created_by_ref=self.identity.id,
|
208
|
+
name=name,
|
209
|
+
id=self.job_id,
|
210
|
+
description=description,
|
211
|
+
object_refs=[
|
212
|
+
f"note--{self.uuid}"
|
213
|
+
], # won't allow creation with empty object_refs
|
214
|
+
created=self.created,
|
215
|
+
modified=modified or self.created,
|
216
|
+
object_marking_refs=[self.tlp_level.value.id],
|
217
|
+
labels=labels,
|
218
|
+
published=self.created,
|
219
|
+
external_references=[
|
220
|
+
{
|
221
|
+
"source_name": "txt2stix_report_id",
|
222
|
+
"external_id": self.uuid,
|
223
|
+
},
|
224
|
+
{
|
225
|
+
"source_name": "txt2stix Report MD5",
|
226
|
+
"description": self.report_md5,
|
227
|
+
},
|
228
|
+
] + external_references,
|
229
|
+
confidence=confidence,
|
230
|
+
)
|
231
|
+
self.report.object_refs.clear() # clear object refs
|
232
|
+
self.report.labels.pop(-1) # remove txt2stix placeholder
|
233
|
+
self.added_objects = set()
|
234
|
+
self.set_defaults()
|
235
|
+
|
236
|
+
def set_defaults(self):
|
237
|
+
# self.value.extend(TLP_LEVEL.values()) # adds all tlp levels
|
238
|
+
self.bundle = Bundle(objects=[self.tlp_level.value], id=f"bundle--{self.uuid}")
|
239
|
+
|
240
|
+
self.bundle.objects.extend([self.default_marking, self.identity, self.report])
|
241
|
+
# add default STIX 2.1 marking definition for txt2stix
|
242
|
+
self.report.object_marking_refs.append(self.default_marking.id)
|
243
|
+
|
244
|
+
def add_extension(self, object):
|
245
|
+
_type = object["type"]
|
246
|
+
if self.EXTENSION_MAPPING.get(_type, "") is None:
|
247
|
+
if isinstance(object, v21._Observable):
|
248
|
+
url = self.EXTENSION_DEFINITION_BASE_URL + f"/scos/{_type}.json"
|
249
|
+
elif isinstance(object, v21._DomainObject):
|
250
|
+
url = self.EXTENSION_DEFINITION_BASE_URL + f"/sdos/{_type}.json"
|
251
|
+
else:
|
252
|
+
raise Exception(
|
253
|
+
f"Unknown custom object object.type = {_type}, {type(object)=}"
|
254
|
+
)
|
255
|
+
logger.info(f'getting extension definition for "{_type}" from `{url}`')
|
256
|
+
self.EXTENSION_MAPPING[_type] = self.load_stix_object_from_url(url)
|
257
|
+
extension = self.EXTENSION_MAPPING[_type]
|
258
|
+
self.add_ref(extension, is_report_object=False)
|
259
|
+
|
260
|
+
@staticmethod
|
261
|
+
def load_stix_object_from_url(url):
|
262
|
+
resp = requests.get(url)
|
263
|
+
return dict_to_stix2(resp.json())
|
264
|
+
|
265
|
+
def add_ref(self, sdo, is_report_object=True):
|
266
|
+
self.add_extension(sdo)
|
267
|
+
sdo_id = sdo["id"]
|
268
|
+
if sdo_id not in self.added_objects:
|
269
|
+
self.added_objects.add(sdo_id)
|
270
|
+
if is_report_object:
|
271
|
+
self.report.object_refs.append(sdo_id)
|
272
|
+
self.bundle.objects.append(sdo)
|
273
|
+
|
274
|
+
sdo_value = ""
|
275
|
+
for key in ['name', 'value', 'path', 'key', 'string', 'number', 'iban_number', 'address', 'hashes']:
|
276
|
+
if v := sdo.get(key):
|
277
|
+
sdo_value = v
|
278
|
+
break
|
279
|
+
else:
|
280
|
+
if refs := sdo.get('external_references', []):
|
281
|
+
sdo_value = refs[0]['external_id']
|
282
|
+
else:
|
283
|
+
sdo_value = "{NOTEXTRACTED}"
|
284
|
+
|
285
|
+
|
286
|
+
self.id_value_map[sdo_id] = sdo_value
|
287
|
+
|
288
|
+
|
289
|
+
def add_indicator(self, extracted_dict, add_standard_relationship):
|
290
|
+
extractor = self.all_extractors[extracted_dict["type"]]
|
291
|
+
stix_mapping = extractor.stix_mapping
|
292
|
+
extracted_value = extracted_dict["value"]
|
293
|
+
extracted_id = extracted_dict["id"]
|
294
|
+
|
295
|
+
|
296
|
+
indicator = self.new_indicator(extractor, stix_mapping, extracted_value)
|
297
|
+
# set id so it doesn''t need to be created in build_observables
|
298
|
+
if extracted_dict.get("indexes"):
|
299
|
+
indicator["external_references"].append(
|
300
|
+
dict(
|
301
|
+
source_name="indexes",
|
302
|
+
description=json.dumps(extracted_dict["indexes"]),
|
303
|
+
)
|
304
|
+
)
|
305
|
+
objects, related_refs = build_observables(
|
306
|
+
self, stix_mapping, indicator, extracted_dict['value'], extractor
|
307
|
+
)
|
308
|
+
if not objects:
|
309
|
+
raise MinorException(
|
310
|
+
f"build observable returns {objects} from extraction: {extracted_dict}"
|
311
|
+
)
|
312
|
+
self.id_map[extracted_id] = related_refs
|
313
|
+
|
314
|
+
for sdo in objects:
|
315
|
+
sdo = parse_stix(sdo, allow_custom=True)
|
316
|
+
self.add_ref(sdo)
|
317
|
+
|
318
|
+
def new_indicator(self, extractor, stix_mapping, extracted_value):
|
319
|
+
indicator = {
|
320
|
+
"type": "indicator",
|
321
|
+
"id": self.indicator_id_from_value(extracted_value, stix_mapping),
|
322
|
+
"spec_version": "2.1",
|
323
|
+
"created_by_ref": self.report.created_by_ref,
|
324
|
+
"created": self.report.created,
|
325
|
+
"modified": self.report.modified,
|
326
|
+
"indicator_types": ["unknown"],
|
327
|
+
"name": extracted_value,
|
328
|
+
"pattern_type": "stix",
|
329
|
+
"pattern": f"[ {stix_mapping}:value = { repr(extracted_value) } ]",
|
330
|
+
"valid_from": self.report.created,
|
331
|
+
"object_marking_refs": self.report.object_marking_refs,
|
332
|
+
"external_references": [
|
333
|
+
{
|
334
|
+
"source_name": "txt2stix_report_id",
|
335
|
+
"external_id": self.uuid,
|
336
|
+
},
|
337
|
+
{
|
338
|
+
"source_name": "txt2stix_extraction_type",
|
339
|
+
"description": f"{extractor.slug}_{extractor.version}",
|
340
|
+
},
|
341
|
+
],
|
342
|
+
}
|
343
|
+
|
344
|
+
return indicator
|
345
|
+
|
346
|
+
def add_ai_relationship(self, gpt_out):
|
347
|
+
for source_ref in self.id_map.get(gpt_out["source_ref"], []):
|
348
|
+
for target_ref in self.id_map.get(gpt_out["target_ref"], []):
|
349
|
+
self.add_standard_relationship(
|
350
|
+
source_ref, target_ref, gpt_out["relationship_type"],
|
351
|
+
)
|
352
|
+
|
353
|
+
def add_standard_relationship(self, source_ref, target_ref, relationship_type):
|
354
|
+
descriptor = ' '.join(relationship_type.split('-'))
|
355
|
+
self.add_ref(self.new_relationship(
|
356
|
+
source_ref, target_ref, relationship_type,
|
357
|
+
description=f"{self.id_value_map.get(source_ref, source_ref)} {descriptor} {self.id_value_map.get(target_ref, target_ref)}"
|
358
|
+
))
|
359
|
+
|
360
|
+
def new_relationship(self, source_ref, target_ref, relationship_type, description=None, external_references=None):
|
361
|
+
return Relationship(
|
362
|
+
id="relationship--"
|
363
|
+
+ str(
|
364
|
+
uuid.uuid5(
|
365
|
+
UUID_NAMESPACE, f"{relationship_type}+{source_ref}+{target_ref}"
|
366
|
+
)
|
367
|
+
),
|
368
|
+
source_ref=source_ref,
|
369
|
+
target_ref=target_ref,
|
370
|
+
relationship_type=relationship_type,
|
371
|
+
created_by_ref=self.report.created_by_ref,
|
372
|
+
created=self.report.created,
|
373
|
+
description=description,
|
374
|
+
modified=self.report.modified,
|
375
|
+
object_marking_refs=self.report.object_marking_refs,
|
376
|
+
allow_custom=True,
|
377
|
+
external_references=external_references or [
|
378
|
+
{
|
379
|
+
"source_name": "txt2stix_report_id",
|
380
|
+
"external_id": self.uuid,
|
381
|
+
}
|
382
|
+
],
|
383
|
+
)
|
384
|
+
|
385
|
+
def to_json(self):
|
386
|
+
return serialize(self.bundle, indent=4)
|
387
|
+
|
388
|
+
def process_observables(self, extractions, add_standard_relationship=False):
|
389
|
+
for ex in extractions:
|
390
|
+
try:
|
391
|
+
if ex.get('id', '').startswith('ai'): #so id is distinct across multiple AIExtractors
|
392
|
+
ex["id"] = f'{ex["id"]}_{self.observables_processed}'
|
393
|
+
ex["id"] = ex.get("id", f"ex_{self.observables_processed}")
|
394
|
+
self.observables_processed += 1
|
395
|
+
self.add_indicator(ex, add_standard_relationship)
|
396
|
+
except BaseException as e:
|
397
|
+
logger.debug(
|
398
|
+
f"ran into exception while processing observable `{ex}`",
|
399
|
+
stack_info=True,
|
400
|
+
)
|
401
|
+
|
402
|
+
def process_relationships(self, observables):
|
403
|
+
for relationship in observables:
|
404
|
+
try:
|
405
|
+
self.add_ai_relationship(relationship)
|
406
|
+
except BaseException as e:
|
407
|
+
logger.debug(
|
408
|
+
f"ran into exception while processing relationship `{relationship}`",
|
409
|
+
stack_info=True,
|
410
|
+
)
|
411
|
+
|
412
|
+
def indicator_id_from_value(self, value, stix_mapping):
|
413
|
+
return "indicator--" + str(
|
414
|
+
uuid.uuid5(UUID_NAMESPACE, f"txt2stix+{self.identity['id']}+{self.report_md5}+{stix_mapping}+{value}")
|
415
|
+
)
|
416
|
+
|
417
|
+
@property
|
418
|
+
def flow_objects(self):
|
419
|
+
return self._flow_objects
|
420
|
+
|
421
|
+
@flow_objects.setter
|
422
|
+
def flow_objects(self, objects):
|
423
|
+
for obj in objects:
|
424
|
+
if obj['id'] == self.report.id:
|
425
|
+
continue
|
426
|
+
is_report_object = obj['type'] != "extension-definition"
|
427
|
+
self.add_ref(obj, is_report_object=is_report_object)
|
428
|
+
self._flow_objects = objects
|
txt2stix/common.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from uuid import UUID
|
3
|
+
|
4
|
+
UUID_NAMESPACE = UUID("f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5")
|
5
|
+
|
6
|
+
class NamedDict(dict):
|
7
|
+
def __getattribute__(self, attr: str):
|
8
|
+
value = None
|
9
|
+
try:
|
10
|
+
value = super().__getattribute__(attr)
|
11
|
+
except:
|
12
|
+
pass
|
13
|
+
if value is not None:
|
14
|
+
return value
|
15
|
+
return super().get(attr, "")
|
16
|
+
|
17
|
+
def __setattr__(self, __name: str, __value: Any) -> None:
|
18
|
+
return super().__setitem__(__name, __value)
|
19
|
+
|
20
|
+
class FatalException(Exception):
|
21
|
+
pass
|
22
|
+
class MinorException(Exception):
|
23
|
+
pass
|
txt2stix/extractions.py
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
from typing import Any, Type
|
2
|
+
import yaml
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
import txt2stix.pattern.extractors.base_extractor
|
8
|
+
from .common import NamedDict
|
9
|
+
|
10
|
+
class Extractor(NamedDict):
|
11
|
+
extraction_key = None
|
12
|
+
name = None
|
13
|
+
type: str = None
|
14
|
+
description = None
|
15
|
+
created = None
|
16
|
+
modified = None
|
17
|
+
created_by = None
|
18
|
+
version = None
|
19
|
+
prompt_base = None
|
20
|
+
prompt_helper = None
|
21
|
+
prompt_extraction_processing = None
|
22
|
+
prompt_positive_examples = None
|
23
|
+
prompt_negative_examples = None
|
24
|
+
stix_mapping = None
|
25
|
+
prompt_extraction_extra = None
|
26
|
+
pattern_extractor : 'Type[txt2stix.pattern.extractors.base_extractor.BaseExtractor]' = None
|
27
|
+
|
28
|
+
|
29
|
+
def __init__(self, key, dct, include_path=None, test_cases: dict[str, list[str]]=None):
|
30
|
+
super().__init__(dct)
|
31
|
+
self.extraction_key = key
|
32
|
+
self.slug = key
|
33
|
+
test_cases = test_cases or dict()
|
34
|
+
self.prompt_negative_examples = test_cases.get('test_negative_examples') or []
|
35
|
+
self.prompt_positive_examples = test_cases.get('test_positive_examples') or []
|
36
|
+
if self.file and not Path(self.file).is_absolute() and include_path:
|
37
|
+
self.file = Path(include_path) / self.file
|
38
|
+
|
39
|
+
|
40
|
+
def load(self):
|
41
|
+
if self.type == "lookup":
|
42
|
+
self.lookups = set()
|
43
|
+
file = Path(self.file)
|
44
|
+
for line in file.read_text().splitlines():
|
45
|
+
self.lookups.add(line.strip())
|
46
|
+
|
47
|
+
def parse_extraction_config(include_path: Path):
|
48
|
+
config = {}
|
49
|
+
test_cases = load_test_cases_config(include_path)
|
50
|
+
for p in include_path.glob("extractions/*/config.yaml"):
|
51
|
+
config.update(yaml.safe_load(p.open()))
|
52
|
+
|
53
|
+
return {k: Extractor(k, v, include_path, test_cases=test_cases.get(v.get('test_cases'))) for k, v in config.items()}
|
54
|
+
|
55
|
+
def load_test_cases_config(include_path: Path) -> dict[str, dict[str, list[str]]]:
|
56
|
+
config_file = include_path/'tests/test_cases.yaml'
|
57
|
+
if not config_file.exists():
|
58
|
+
return {}
|
59
|
+
return yaml.safe_load(config_file.open())
|
File without changes
|