txt2detection 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of txt2detection might be problematic. Click here for more details.
- txt2detection/__main__.py +219 -68
- txt2detection/ai_extractor/base.py +41 -13
- txt2detection/ai_extractor/models.py +34 -0
- txt2detection/ai_extractor/openai.py +1 -3
- txt2detection/ai_extractor/openrouter.py +4 -4
- txt2detection/ai_extractor/prompts.py +130 -3
- txt2detection/attack_flow.py +233 -0
- txt2detection/bundler.py +174 -87
- txt2detection/credential_checker.py +11 -9
- txt2detection/models.py +86 -49
- txt2detection/observables.py +0 -1
- txt2detection/utils.py +24 -12
- {txt2detection-1.0.7.dist-info → txt2detection-1.0.9.dist-info}/METADATA +7 -8
- txt2detection-1.0.9.dist-info/RECORD +24 -0
- txt2detection-1.0.7.dist-info/RECORD +0 -22
- {txt2detection-1.0.7.dist-info → txt2detection-1.0.9.dist-info}/WHEEL +0 -0
- {txt2detection-1.0.7.dist-info → txt2detection-1.0.9.dist-info}/entry_points.txt +0 -0
- {txt2detection-1.0.7.dist-info → txt2detection-1.0.9.dist-info}/licenses/LICENSE +0 -0
txt2detection/bundler.py
CHANGED
|
@@ -15,8 +15,15 @@ from stix2 import (
|
|
|
15
15
|
from stix2.serialization import serialize
|
|
16
16
|
import hashlib
|
|
17
17
|
|
|
18
|
-
from txt2detection import observables
|
|
19
|
-
from txt2detection.models import
|
|
18
|
+
from txt2detection import attack_flow, observables
|
|
19
|
+
from txt2detection.models import (
|
|
20
|
+
AIDetection,
|
|
21
|
+
BaseDetection,
|
|
22
|
+
DataContainer,
|
|
23
|
+
DetectionContainer,
|
|
24
|
+
UUID_NAMESPACE,
|
|
25
|
+
SigmaRuleDetection,
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
from datetime import UTC, datetime as dt
|
|
22
29
|
import uuid
|
|
@@ -28,56 +35,57 @@ from txt2detection.utils import STATUSES, remove_rule_specific_tags
|
|
|
28
35
|
|
|
29
36
|
logger = logging.getLogger("txt2detection.bundler")
|
|
30
37
|
|
|
38
|
+
|
|
31
39
|
class Bundler:
|
|
32
40
|
identity = None
|
|
33
41
|
object_marking_refs = []
|
|
34
42
|
uuid = None
|
|
35
43
|
id_map = dict()
|
|
36
|
-
|
|
44
|
+
data: DataContainer
|
|
37
45
|
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
|
|
38
|
-
default_identity = Identity(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"technology"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
46
|
+
default_identity = Identity(
|
|
47
|
+
**{
|
|
48
|
+
"type": "identity",
|
|
49
|
+
"spec_version": "2.1",
|
|
50
|
+
"id": "identity--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
51
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
52
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
53
|
+
"modified": "2020-01-01T00:00:00.000Z",
|
|
54
|
+
"name": "txt2detection",
|
|
55
|
+
"description": "https://github.com/muchdogesec/txt2detection",
|
|
56
|
+
"identity_class": "system",
|
|
57
|
+
"sectors": ["technology"],
|
|
58
|
+
"contact_information": "https://www.dogesec.com/contact/",
|
|
59
|
+
"object_marking_refs": [
|
|
60
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
61
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
|
62
|
+
],
|
|
63
|
+
}
|
|
64
|
+
)
|
|
57
65
|
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/marking-definition/txt2detection.json
|
|
58
|
-
default_marking = MarkingDefinition(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
66
|
+
default_marking = MarkingDefinition(
|
|
67
|
+
**{
|
|
68
|
+
"type": "marking-definition",
|
|
69
|
+
"spec_version": "2.1",
|
|
70
|
+
"id": "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
71
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
72
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
73
|
+
"definition_type": "statement",
|
|
74
|
+
"definition": {
|
|
75
|
+
"statement": "This object was created using: https://github.com/muchdogesec/txt2detection"
|
|
76
|
+
},
|
|
77
|
+
"object_marking_refs": [
|
|
78
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
79
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
|
80
|
+
],
|
|
81
|
+
}
|
|
82
|
+
)
|
|
73
83
|
|
|
74
84
|
@classmethod
|
|
75
85
|
def generate_report_id(cls, created_by_ref, created, name):
|
|
76
86
|
if not created_by_ref:
|
|
77
|
-
created_by_ref = cls.default_identity[
|
|
78
|
-
return str(
|
|
79
|
-
uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}")
|
|
80
|
-
)
|
|
87
|
+
created_by_ref = cls.default_identity["id"]
|
|
88
|
+
return str(uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}"))
|
|
81
89
|
|
|
82
90
|
def __init__(
|
|
83
91
|
self,
|
|
@@ -89,7 +97,7 @@ class Bundler:
|
|
|
89
97
|
created=None,
|
|
90
98
|
modified=None,
|
|
91
99
|
report_id=None,
|
|
92
|
-
external_refs: list=None,
|
|
100
|
+
external_refs: list = None,
|
|
93
101
|
reference_urls=None,
|
|
94
102
|
license=None,
|
|
95
103
|
**kwargs,
|
|
@@ -97,15 +105,25 @@ class Bundler:
|
|
|
97
105
|
self.created = created or dt.now(UTC)
|
|
98
106
|
self.modified = modified or self.created
|
|
99
107
|
self.identity = identity or self.default_identity
|
|
100
|
-
self.tlp_level = TLP_LEVEL.get(tlp_level or
|
|
101
|
-
self.uuid = report_id or self.generate_report_id(
|
|
108
|
+
self.tlp_level = TLP_LEVEL.get(tlp_level or "clear")
|
|
109
|
+
self.uuid = report_id or self.generate_report_id(
|
|
110
|
+
self.identity.id, self.created, name
|
|
111
|
+
)
|
|
102
112
|
self.reference_urls = reference_urls or []
|
|
103
113
|
self.labels = labels or []
|
|
104
114
|
self.license = license
|
|
105
115
|
|
|
106
116
|
self.job_id = f"report--{self.uuid}"
|
|
107
|
-
self.external_refs = (external_refs or []) + [
|
|
108
|
-
|
|
117
|
+
self.external_refs = (external_refs or []) + [
|
|
118
|
+
dict(
|
|
119
|
+
source_name="txt2detection",
|
|
120
|
+
url=url,
|
|
121
|
+
description="txt2detection-reference",
|
|
122
|
+
)
|
|
123
|
+
for url in self.reference_urls
|
|
124
|
+
]
|
|
125
|
+
self.data = DataContainer.model_construct()
|
|
126
|
+
|
|
109
127
|
self.report = Report(
|
|
110
128
|
created_by_ref=self.identity.id,
|
|
111
129
|
name=name,
|
|
@@ -124,7 +142,8 @@ class Bundler:
|
|
|
124
142
|
source_name="description_md5_hash",
|
|
125
143
|
external_id=hashlib.md5((description or "").encode()).hexdigest(),
|
|
126
144
|
)
|
|
127
|
-
]
|
|
145
|
+
]
|
|
146
|
+
+ self.external_refs,
|
|
128
147
|
)
|
|
129
148
|
self.report.object_refs.clear() # clear object refs
|
|
130
149
|
self.set_defaults()
|
|
@@ -150,13 +169,15 @@ class Bundler:
|
|
|
150
169
|
self.all_objects.add(sdo_id)
|
|
151
170
|
|
|
152
171
|
def add_rule_indicator(self, detection: SigmaRuleDetection):
|
|
153
|
-
indicator_types = getattr(detection,
|
|
172
|
+
indicator_types = getattr(detection, "indicator_types", None)
|
|
154
173
|
if isinstance(detection, AIDetection):
|
|
155
174
|
detection = detection.to_sigma_rule_detection(self)
|
|
156
|
-
assert isinstance(
|
|
175
|
+
assert isinstance(
|
|
176
|
+
detection, SigmaRuleDetection
|
|
177
|
+
), f"detection of type {type(detection)} not supported"
|
|
157
178
|
indicator = {
|
|
158
179
|
"type": "indicator",
|
|
159
|
-
"id": "indicator--"+str(detection.detection_id),
|
|
180
|
+
"id": "indicator--" + str(detection.detection_id),
|
|
160
181
|
"spec_version": "2.1",
|
|
161
182
|
"created_by_ref": self.report.created_by_ref,
|
|
162
183
|
"created": self.report.created,
|
|
@@ -165,62 +186,90 @@ class Bundler:
|
|
|
165
186
|
"name": detection.title,
|
|
166
187
|
"description": detection.description,
|
|
167
188
|
"labels": remove_rule_specific_tags(self.labels),
|
|
168
|
-
"pattern_type":
|
|
189
|
+
"pattern_type": "sigma",
|
|
169
190
|
"pattern": detection.make_rule(self),
|
|
170
191
|
"valid_from": self.report.created,
|
|
171
192
|
"object_marking_refs": self.report.object_marking_refs,
|
|
172
193
|
"external_references": self.external_refs + detection.external_references,
|
|
173
194
|
}
|
|
174
|
-
indicator[
|
|
195
|
+
indicator["external_references"].append(
|
|
175
196
|
{
|
|
176
|
-
|
|
177
|
-
|
|
197
|
+
"source_name": "rule_md5_hash",
|
|
198
|
+
"external_id": hashlib.md5(indicator["pattern"].encode()).hexdigest(),
|
|
178
199
|
}
|
|
179
200
|
)
|
|
201
|
+
logsource = detection.make_data_source()
|
|
180
202
|
|
|
181
203
|
logger.debug(f"===== rule {detection.detection_id} =====")
|
|
182
|
-
logger.debug("```yaml\n"+indicator[
|
|
204
|
+
logger.debug("```yaml\n" + indicator["pattern"] + "\n```")
|
|
183
205
|
logger.debug(f" =================== end of rule =================== ")
|
|
184
206
|
|
|
207
|
+
self.data.attacks = dict.fromkeys(detection.mitre_attack_ids, "Not found")
|
|
185
208
|
for obj in self.get_attack_objects(detection.mitre_attack_ids):
|
|
186
209
|
self.add_ref(obj)
|
|
187
210
|
self.add_relation(indicator, obj)
|
|
211
|
+
self.data.attacks[obj["external_references"][0]["external_id"]] = obj["id"]
|
|
188
212
|
|
|
213
|
+
self.data.cves = dict.fromkeys(detection.cve_ids, "Not found")
|
|
189
214
|
for obj in self.get_cve_objects(detection.cve_ids):
|
|
190
215
|
self.add_ref(obj)
|
|
191
216
|
self.add_relation(indicator, obj)
|
|
217
|
+
self.data.cves[obj["name"]] = obj["id"]
|
|
192
218
|
|
|
193
219
|
self.add_ref(parse_stix(indicator, allow_custom=True), append_report=True)
|
|
220
|
+
self.add_ref(logsource, append_report=True)
|
|
221
|
+
self.add_relation(
|
|
222
|
+
indicator,
|
|
223
|
+
logsource,
|
|
224
|
+
description=f'{indicator["name"]} is created from {make_logsouce_string(logsource)}',
|
|
225
|
+
)
|
|
194
226
|
|
|
195
|
-
|
|
227
|
+
self.data.observables = []
|
|
228
|
+
for ob_type, ob_value in set(
|
|
229
|
+
observables.find_stix_observables(detection.detection)
|
|
230
|
+
):
|
|
231
|
+
self.data.observables.append(dict(type=ob_type, value=ob_value))
|
|
196
232
|
try:
|
|
197
233
|
obj = observables.to_stix_object(ob_type, ob_value)
|
|
198
234
|
self.add_ref(obj)
|
|
199
|
-
self.add_relation(indicator, obj,
|
|
200
|
-
except:
|
|
235
|
+
self.add_relation(indicator, obj, "related-to", target_name=ob_value)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
self.data.observables[-1]["error"] = str(e)
|
|
201
238
|
logger.exception(f"failed to process observable {ob_type}/{ob_value}")
|
|
202
239
|
|
|
203
|
-
|
|
204
|
-
|
|
240
|
+
def add_relation(
|
|
241
|
+
self,
|
|
242
|
+
indicator,
|
|
243
|
+
target_object,
|
|
244
|
+
relationship_type="related-to",
|
|
245
|
+
target_name=None,
|
|
246
|
+
description=None,
|
|
247
|
+
):
|
|
205
248
|
ext_refs = []
|
|
206
249
|
|
|
207
250
|
with contextlib.suppress(Exception):
|
|
208
|
-
indicator[
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
251
|
+
indicator["external_references"].append(
|
|
252
|
+
target_object["external_references"][0]
|
|
253
|
+
)
|
|
254
|
+
ext_refs = [target_object["external_references"][0]]
|
|
212
255
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
256
|
+
if not description:
|
|
257
|
+
target_name = (
|
|
258
|
+
target_name
|
|
259
|
+
or f"{target_object['external_references'][0]['external_id']} ({target_object['name']})"
|
|
260
|
+
)
|
|
261
|
+
description = f"{indicator['name']} {relationship_type} {target_name}"
|
|
262
|
+
|
|
263
|
+
rel = Relationship(
|
|
264
|
+
id="relationship--"
|
|
265
|
+
+ str(
|
|
266
|
+
uuid.uuid5(UUID_NAMESPACE, f"{indicator['id']}+{target_object['id']}")
|
|
218
267
|
),
|
|
219
|
-
source_ref=indicator[
|
|
220
|
-
target_ref=target_object[
|
|
268
|
+
source_ref=indicator["id"],
|
|
269
|
+
target_ref=target_object["id"],
|
|
221
270
|
relationship_type=relationship_type,
|
|
222
271
|
created_by_ref=self.report.created_by_ref,
|
|
223
|
-
description=
|
|
272
|
+
description=description,
|
|
224
273
|
created=self.report.created,
|
|
225
274
|
modified=self.report.modified,
|
|
226
275
|
object_marking_refs=self.report.object_marking_refs,
|
|
@@ -240,44 +289,82 @@ class Bundler:
|
|
|
240
289
|
if not attack_ids:
|
|
241
290
|
return []
|
|
242
291
|
logger.debug(f"retrieving attack objects: {attack_ids}")
|
|
243
|
-
endpoint = urljoin(
|
|
292
|
+
endpoint = urljoin(
|
|
293
|
+
os.environ["CTIBUTLER_BASE_URL"] + "/",
|
|
294
|
+
f"v1/attack-enterprise/objects/?attack_id=" + ",".join(attack_ids),
|
|
295
|
+
)
|
|
244
296
|
|
|
245
297
|
headers = {}
|
|
246
|
-
if api_key := os.environ.get(
|
|
247
|
-
headers[
|
|
298
|
+
if api_key := os.environ.get("CTIBUTLER_API_KEY"):
|
|
299
|
+
headers["API-KEY"] = api_key
|
|
248
300
|
|
|
249
301
|
return self._get_objects(endpoint, headers)
|
|
250
302
|
|
|
251
|
-
|
|
303
|
+
@classmethod
|
|
304
|
+
def get_attack_tactics(cls):
|
|
305
|
+
headers = {}
|
|
306
|
+
api_root = os.environ["CTIBUTLER_BASE_URL"] + "/"
|
|
307
|
+
if api_key := os.environ.get("CTIBUTLER_API_KEY"):
|
|
308
|
+
headers["API-KEY"] = api_key
|
|
309
|
+
|
|
310
|
+
endpoint = urljoin(
|
|
311
|
+
api_root, f"v1/attack-enterprise/objects/?attack_type=Tactic"
|
|
312
|
+
)
|
|
313
|
+
version_url = urljoin(api_root, f"v1/attack-enterprise/versions/installed/")
|
|
314
|
+
tactics = cls._get_objects(endpoint, headers=headers)
|
|
315
|
+
retval = dict(
|
|
316
|
+
version=requests.get(version_url, headers=headers).json()["latest"]
|
|
317
|
+
)
|
|
318
|
+
for tac in tactics:
|
|
319
|
+
retval[tac["x_mitre_shortname"]] = tac
|
|
320
|
+
retval[tac["external_references"][0]["external_id"]] = tac
|
|
321
|
+
return retval
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def get_cve_objects(cls, cve_ids):
|
|
252
325
|
if not cve_ids:
|
|
253
326
|
return []
|
|
254
327
|
logger.debug(f"retrieving cve objects: {cve_ids}")
|
|
255
|
-
endpoint = urljoin(
|
|
328
|
+
endpoint = urljoin(
|
|
329
|
+
os.environ["VULMATCH_BASE_URL"] + "/",
|
|
330
|
+
f"v1/cve/objects/?cve_id=" + ",".join(cve_ids),
|
|
331
|
+
)
|
|
256
332
|
headers = {}
|
|
257
|
-
if api_key := os.environ.get(
|
|
258
|
-
headers[
|
|
333
|
+
if api_key := os.environ.get("VULMATCH_API_KEY"):
|
|
334
|
+
headers["API-KEY"] = api_key
|
|
259
335
|
|
|
260
|
-
return
|
|
336
|
+
return cls._get_objects(endpoint, headers)
|
|
261
337
|
|
|
262
|
-
|
|
338
|
+
@classmethod
|
|
339
|
+
def _get_objects(cls, endpoint, headers):
|
|
263
340
|
data = []
|
|
264
341
|
page = 1
|
|
265
342
|
while True:
|
|
266
|
-
resp = requests.get(
|
|
343
|
+
resp = requests.get(
|
|
344
|
+
endpoint, params=dict(page=page, page_size=1000), headers=headers
|
|
345
|
+
)
|
|
267
346
|
if resp.status_code != 200:
|
|
268
347
|
break
|
|
269
348
|
d = resp.json()
|
|
270
|
-
if len(d[
|
|
349
|
+
if len(d["objects"]) == 0:
|
|
271
350
|
break
|
|
272
|
-
data.extend(d[
|
|
273
|
-
page+=1
|
|
274
|
-
if d[
|
|
351
|
+
data.extend(d["objects"])
|
|
352
|
+
page += 1
|
|
353
|
+
if d["page_results_count"] < d["page_size"]:
|
|
275
354
|
break
|
|
276
355
|
return data
|
|
277
356
|
|
|
278
357
|
def bundle_detections(self, container: DetectionContainer):
|
|
279
|
-
self.
|
|
358
|
+
self.data = DataContainer(detections=container)
|
|
280
359
|
if not container.success:
|
|
281
360
|
return
|
|
282
361
|
for d in container.detections:
|
|
283
362
|
self.add_rule_indicator(d)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def make_logsouce_string(source: dict):
|
|
366
|
+
d = [
|
|
367
|
+
f"{k}={v}" for k, v in source.items() if k in ["product", "service", "category"]
|
|
368
|
+
]
|
|
369
|
+
d_str = ", ".join(d)
|
|
370
|
+
return "log-source {" + d_str + "}"
|
|
@@ -5,7 +5,6 @@ from urllib.parse import urljoin
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
def check_llms():
|
|
10
9
|
from txt2detection.__main__ import parse_model
|
|
11
10
|
|
|
@@ -23,14 +22,17 @@ def check_llms():
|
|
|
23
22
|
|
|
24
23
|
def check_ctibutler_vulmatch(service):
|
|
25
24
|
session = requests.Session()
|
|
26
|
-
if service ==
|
|
27
|
-
base_url = os.getenv(
|
|
28
|
-
url = urljoin(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
25
|
+
if service == "vulmatch":
|
|
26
|
+
base_url = os.getenv("VULMATCH_BASE_URL")
|
|
27
|
+
url = urljoin(
|
|
28
|
+
base_url,
|
|
29
|
+
"v1/cve/objects/vulnerability--f552f6f4-39da-48dc-8717-323772c99588/",
|
|
30
|
+
)
|
|
31
|
+
session.headers["API-KEY"] = os.environ.get("VULMATCH_API_KEY")
|
|
32
|
+
elif service == "ctibutler":
|
|
33
|
+
base_url = os.getenv("CTIBUTLER_BASE_URL")
|
|
34
|
+
url = urljoin(base_url, "v1/location/versions/available/")
|
|
35
|
+
session.headers["API-KEY"] = os.environ.get("CTIBUTLER_API_KEY")
|
|
34
36
|
|
|
35
37
|
try:
|
|
36
38
|
resp = session.get(url)
|