txt2detection 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of txt2detection might be problematic. Click here for more details.
- txt2detection/__main__.py +219 -68
- txt2detection/ai_extractor/base.py +41 -13
- txt2detection/ai_extractor/models.py +34 -0
- txt2detection/ai_extractor/openai.py +1 -3
- txt2detection/ai_extractor/openrouter.py +4 -4
- txt2detection/ai_extractor/prompts.py +130 -3
- txt2detection/attack_flow.py +233 -0
- txt2detection/bundler.py +165 -91
- txt2detection/credential_checker.py +11 -9
- txt2detection/models.py +11 -0
- txt2detection/observables.py +0 -1
- txt2detection/utils.py +24 -12
- {txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/METADATA +6 -8
- txt2detection-1.0.9.dist-info/RECORD +24 -0
- txt2detection-1.0.8.dist-info/RECORD +0 -22
- {txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/WHEEL +0 -0
- {txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/entry_points.txt +0 -0
- {txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/licenses/LICENSE +0 -0
txt2detection/bundler.py
CHANGED
|
@@ -15,8 +15,15 @@ from stix2 import (
|
|
|
15
15
|
from stix2.serialization import serialize
|
|
16
16
|
import hashlib
|
|
17
17
|
|
|
18
|
-
from txt2detection import observables
|
|
19
|
-
from txt2detection.models import
|
|
18
|
+
from txt2detection import attack_flow, observables
|
|
19
|
+
from txt2detection.models import (
|
|
20
|
+
AIDetection,
|
|
21
|
+
BaseDetection,
|
|
22
|
+
DataContainer,
|
|
23
|
+
DetectionContainer,
|
|
24
|
+
UUID_NAMESPACE,
|
|
25
|
+
SigmaRuleDetection,
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
from datetime import UTC, datetime as dt
|
|
22
29
|
import uuid
|
|
@@ -28,56 +35,57 @@ from txt2detection.utils import STATUSES, remove_rule_specific_tags
|
|
|
28
35
|
|
|
29
36
|
logger = logging.getLogger("txt2detection.bundler")
|
|
30
37
|
|
|
38
|
+
|
|
31
39
|
class Bundler:
|
|
32
40
|
identity = None
|
|
33
41
|
object_marking_refs = []
|
|
34
42
|
uuid = None
|
|
35
43
|
id_map = dict()
|
|
36
|
-
|
|
44
|
+
data: DataContainer
|
|
37
45
|
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
|
|
38
|
-
default_identity = Identity(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
"technology"
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
46
|
+
default_identity = Identity(
|
|
47
|
+
**{
|
|
48
|
+
"type": "identity",
|
|
49
|
+
"spec_version": "2.1",
|
|
50
|
+
"id": "identity--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
51
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
52
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
53
|
+
"modified": "2020-01-01T00:00:00.000Z",
|
|
54
|
+
"name": "txt2detection",
|
|
55
|
+
"description": "https://github.com/muchdogesec/txt2detection",
|
|
56
|
+
"identity_class": "system",
|
|
57
|
+
"sectors": ["technology"],
|
|
58
|
+
"contact_information": "https://www.dogesec.com/contact/",
|
|
59
|
+
"object_marking_refs": [
|
|
60
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
61
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
|
62
|
+
],
|
|
63
|
+
}
|
|
64
|
+
)
|
|
57
65
|
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/marking-definition/txt2detection.json
|
|
58
|
-
default_marking = MarkingDefinition(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
66
|
+
default_marking = MarkingDefinition(
|
|
67
|
+
**{
|
|
68
|
+
"type": "marking-definition",
|
|
69
|
+
"spec_version": "2.1",
|
|
70
|
+
"id": "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
71
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
72
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
73
|
+
"definition_type": "statement",
|
|
74
|
+
"definition": {
|
|
75
|
+
"statement": "This object was created using: https://github.com/muchdogesec/txt2detection"
|
|
76
|
+
},
|
|
77
|
+
"object_marking_refs": [
|
|
78
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
79
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
|
|
80
|
+
],
|
|
81
|
+
}
|
|
82
|
+
)
|
|
73
83
|
|
|
74
84
|
@classmethod
|
|
75
85
|
def generate_report_id(cls, created_by_ref, created, name):
|
|
76
86
|
if not created_by_ref:
|
|
77
|
-
created_by_ref = cls.default_identity[
|
|
78
|
-
return str(
|
|
79
|
-
uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}")
|
|
80
|
-
)
|
|
87
|
+
created_by_ref = cls.default_identity["id"]
|
|
88
|
+
return str(uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}"))
|
|
81
89
|
|
|
82
90
|
def __init__(
|
|
83
91
|
self,
|
|
@@ -89,7 +97,7 @@ class Bundler:
|
|
|
89
97
|
created=None,
|
|
90
98
|
modified=None,
|
|
91
99
|
report_id=None,
|
|
92
|
-
external_refs: list=None,
|
|
100
|
+
external_refs: list = None,
|
|
93
101
|
reference_urls=None,
|
|
94
102
|
license=None,
|
|
95
103
|
**kwargs,
|
|
@@ -97,14 +105,24 @@ class Bundler:
|
|
|
97
105
|
self.created = created or dt.now(UTC)
|
|
98
106
|
self.modified = modified or self.created
|
|
99
107
|
self.identity = identity or self.default_identity
|
|
100
|
-
self.tlp_level = TLP_LEVEL.get(tlp_level or
|
|
101
|
-
self.uuid = report_id or self.generate_report_id(
|
|
108
|
+
self.tlp_level = TLP_LEVEL.get(tlp_level or "clear")
|
|
109
|
+
self.uuid = report_id or self.generate_report_id(
|
|
110
|
+
self.identity.id, self.created, name
|
|
111
|
+
)
|
|
102
112
|
self.reference_urls = reference_urls or []
|
|
103
113
|
self.labels = labels or []
|
|
104
114
|
self.license = license
|
|
105
115
|
|
|
106
116
|
self.job_id = f"report--{self.uuid}"
|
|
107
|
-
self.external_refs = (external_refs or []) + [
|
|
117
|
+
self.external_refs = (external_refs or []) + [
|
|
118
|
+
dict(
|
|
119
|
+
source_name="txt2detection",
|
|
120
|
+
url=url,
|
|
121
|
+
description="txt2detection-reference",
|
|
122
|
+
)
|
|
123
|
+
for url in self.reference_urls
|
|
124
|
+
]
|
|
125
|
+
self.data = DataContainer.model_construct()
|
|
108
126
|
|
|
109
127
|
self.report = Report(
|
|
110
128
|
created_by_ref=self.identity.id,
|
|
@@ -124,7 +142,8 @@ class Bundler:
|
|
|
124
142
|
source_name="description_md5_hash",
|
|
125
143
|
external_id=hashlib.md5((description or "").encode()).hexdigest(),
|
|
126
144
|
)
|
|
127
|
-
]
|
|
145
|
+
]
|
|
146
|
+
+ self.external_refs,
|
|
128
147
|
)
|
|
129
148
|
self.report.object_refs.clear() # clear object refs
|
|
130
149
|
self.set_defaults()
|
|
@@ -150,13 +169,15 @@ class Bundler:
|
|
|
150
169
|
self.all_objects.add(sdo_id)
|
|
151
170
|
|
|
152
171
|
def add_rule_indicator(self, detection: SigmaRuleDetection):
|
|
153
|
-
indicator_types = getattr(detection,
|
|
172
|
+
indicator_types = getattr(detection, "indicator_types", None)
|
|
154
173
|
if isinstance(detection, AIDetection):
|
|
155
174
|
detection = detection.to_sigma_rule_detection(self)
|
|
156
|
-
assert isinstance(
|
|
175
|
+
assert isinstance(
|
|
176
|
+
detection, SigmaRuleDetection
|
|
177
|
+
), f"detection of type {type(detection)} not supported"
|
|
157
178
|
indicator = {
|
|
158
179
|
"type": "indicator",
|
|
159
|
-
"id": "indicator--"+str(detection.detection_id),
|
|
180
|
+
"id": "indicator--" + str(detection.detection_id),
|
|
160
181
|
"spec_version": "2.1",
|
|
161
182
|
"created_by_ref": self.report.created_by_ref,
|
|
162
183
|
"created": self.report.created,
|
|
@@ -165,66 +186,87 @@ class Bundler:
|
|
|
165
186
|
"name": detection.title,
|
|
166
187
|
"description": detection.description,
|
|
167
188
|
"labels": remove_rule_specific_tags(self.labels),
|
|
168
|
-
"pattern_type":
|
|
189
|
+
"pattern_type": "sigma",
|
|
169
190
|
"pattern": detection.make_rule(self),
|
|
170
191
|
"valid_from": self.report.created,
|
|
171
192
|
"object_marking_refs": self.report.object_marking_refs,
|
|
172
193
|
"external_references": self.external_refs + detection.external_references,
|
|
173
194
|
}
|
|
174
|
-
indicator[
|
|
195
|
+
indicator["external_references"].append(
|
|
175
196
|
{
|
|
176
|
-
|
|
177
|
-
|
|
197
|
+
"source_name": "rule_md5_hash",
|
|
198
|
+
"external_id": hashlib.md5(indicator["pattern"].encode()).hexdigest(),
|
|
178
199
|
}
|
|
179
200
|
)
|
|
180
201
|
logsource = detection.make_data_source()
|
|
181
202
|
|
|
182
203
|
logger.debug(f"===== rule {detection.detection_id} =====")
|
|
183
|
-
logger.debug("```yaml\n"+indicator[
|
|
204
|
+
logger.debug("```yaml\n" + indicator["pattern"] + "\n```")
|
|
184
205
|
logger.debug(f" =================== end of rule =================== ")
|
|
185
206
|
|
|
207
|
+
self.data.attacks = dict.fromkeys(detection.mitre_attack_ids, "Not found")
|
|
186
208
|
for obj in self.get_attack_objects(detection.mitre_attack_ids):
|
|
187
209
|
self.add_ref(obj)
|
|
188
210
|
self.add_relation(indicator, obj)
|
|
211
|
+
self.data.attacks[obj["external_references"][0]["external_id"]] = obj["id"]
|
|
189
212
|
|
|
213
|
+
self.data.cves = dict.fromkeys(detection.cve_ids, "Not found")
|
|
190
214
|
for obj in self.get_cve_objects(detection.cve_ids):
|
|
191
215
|
self.add_ref(obj)
|
|
192
216
|
self.add_relation(indicator, obj)
|
|
217
|
+
self.data.cves[obj["name"]] = obj["id"]
|
|
193
218
|
|
|
194
219
|
self.add_ref(parse_stix(indicator, allow_custom=True), append_report=True)
|
|
195
|
-
print('everywhere')
|
|
196
220
|
self.add_ref(logsource, append_report=True)
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
221
|
+
self.add_relation(
|
|
222
|
+
indicator,
|
|
223
|
+
logsource,
|
|
224
|
+
description=f'{indicator["name"]} is created from {make_logsouce_string(logsource)}',
|
|
225
|
+
)
|
|
200
226
|
|
|
201
|
-
|
|
227
|
+
self.data.observables = []
|
|
228
|
+
for ob_type, ob_value in set(
|
|
229
|
+
observables.find_stix_observables(detection.detection)
|
|
230
|
+
):
|
|
231
|
+
self.data.observables.append(dict(type=ob_type, value=ob_value))
|
|
202
232
|
try:
|
|
203
233
|
obj = observables.to_stix_object(ob_type, ob_value)
|
|
204
234
|
self.add_ref(obj)
|
|
205
|
-
self.add_relation(indicator, obj,
|
|
206
|
-
except:
|
|
235
|
+
self.add_relation(indicator, obj, "related-to", target_name=ob_value)
|
|
236
|
+
except Exception as e:
|
|
237
|
+
self.data.observables[-1]["error"] = str(e)
|
|
207
238
|
logger.exception(f"failed to process observable {ob_type}/{ob_value}")
|
|
208
239
|
|
|
209
|
-
def add_relation(
|
|
240
|
+
def add_relation(
|
|
241
|
+
self,
|
|
242
|
+
indicator,
|
|
243
|
+
target_object,
|
|
244
|
+
relationship_type="related-to",
|
|
245
|
+
target_name=None,
|
|
246
|
+
description=None,
|
|
247
|
+
):
|
|
210
248
|
ext_refs = []
|
|
211
249
|
|
|
212
250
|
with contextlib.suppress(Exception):
|
|
213
|
-
indicator[
|
|
214
|
-
|
|
251
|
+
indicator["external_references"].append(
|
|
252
|
+
target_object["external_references"][0]
|
|
253
|
+
)
|
|
254
|
+
ext_refs = [target_object["external_references"][0]]
|
|
215
255
|
|
|
216
256
|
if not description:
|
|
217
|
-
target_name =
|
|
257
|
+
target_name = (
|
|
258
|
+
target_name
|
|
259
|
+
or f"{target_object['external_references'][0]['external_id']} ({target_object['name']})"
|
|
260
|
+
)
|
|
218
261
|
description = f"{indicator['name']} {relationship_type} {target_name}"
|
|
219
262
|
|
|
220
|
-
rel =
|
|
221
|
-
id="relationship--"
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
)
|
|
263
|
+
rel = Relationship(
|
|
264
|
+
id="relationship--"
|
|
265
|
+
+ str(
|
|
266
|
+
uuid.uuid5(UUID_NAMESPACE, f"{indicator['id']}+{target_object['id']}")
|
|
225
267
|
),
|
|
226
|
-
source_ref=indicator[
|
|
227
|
-
target_ref=target_object[
|
|
268
|
+
source_ref=indicator["id"],
|
|
269
|
+
target_ref=target_object["id"],
|
|
228
270
|
relationship_type=relationship_type,
|
|
229
271
|
created_by_ref=self.report.created_by_ref,
|
|
230
272
|
description=description,
|
|
@@ -247,50 +289,82 @@ class Bundler:
|
|
|
247
289
|
if not attack_ids:
|
|
248
290
|
return []
|
|
249
291
|
logger.debug(f"retrieving attack objects: {attack_ids}")
|
|
250
|
-
endpoint = urljoin(
|
|
292
|
+
endpoint = urljoin(
|
|
293
|
+
os.environ["CTIBUTLER_BASE_URL"] + "/",
|
|
294
|
+
f"v1/attack-enterprise/objects/?attack_id=" + ",".join(attack_ids),
|
|
295
|
+
)
|
|
251
296
|
|
|
252
297
|
headers = {}
|
|
253
|
-
if api_key := os.environ.get(
|
|
254
|
-
headers[
|
|
298
|
+
if api_key := os.environ.get("CTIBUTLER_API_KEY"):
|
|
299
|
+
headers["API-KEY"] = api_key
|
|
255
300
|
|
|
256
301
|
return self._get_objects(endpoint, headers)
|
|
257
302
|
|
|
258
|
-
|
|
303
|
+
@classmethod
|
|
304
|
+
def get_attack_tactics(cls):
|
|
305
|
+
headers = {}
|
|
306
|
+
api_root = os.environ["CTIBUTLER_BASE_URL"] + "/"
|
|
307
|
+
if api_key := os.environ.get("CTIBUTLER_API_KEY"):
|
|
308
|
+
headers["API-KEY"] = api_key
|
|
309
|
+
|
|
310
|
+
endpoint = urljoin(
|
|
311
|
+
api_root, f"v1/attack-enterprise/objects/?attack_type=Tactic"
|
|
312
|
+
)
|
|
313
|
+
version_url = urljoin(api_root, f"v1/attack-enterprise/versions/installed/")
|
|
314
|
+
tactics = cls._get_objects(endpoint, headers=headers)
|
|
315
|
+
retval = dict(
|
|
316
|
+
version=requests.get(version_url, headers=headers).json()["latest"]
|
|
317
|
+
)
|
|
318
|
+
for tac in tactics:
|
|
319
|
+
retval[tac["x_mitre_shortname"]] = tac
|
|
320
|
+
retval[tac["external_references"][0]["external_id"]] = tac
|
|
321
|
+
return retval
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def get_cve_objects(cls, cve_ids):
|
|
259
325
|
if not cve_ids:
|
|
260
326
|
return []
|
|
261
327
|
logger.debug(f"retrieving cve objects: {cve_ids}")
|
|
262
|
-
endpoint = urljoin(
|
|
328
|
+
endpoint = urljoin(
|
|
329
|
+
os.environ["VULMATCH_BASE_URL"] + "/",
|
|
330
|
+
f"v1/cve/objects/?cve_id=" + ",".join(cve_ids),
|
|
331
|
+
)
|
|
263
332
|
headers = {}
|
|
264
|
-
if api_key := os.environ.get(
|
|
265
|
-
headers[
|
|
333
|
+
if api_key := os.environ.get("VULMATCH_API_KEY"):
|
|
334
|
+
headers["API-KEY"] = api_key
|
|
266
335
|
|
|
267
|
-
return
|
|
336
|
+
return cls._get_objects(endpoint, headers)
|
|
268
337
|
|
|
269
|
-
|
|
338
|
+
@classmethod
|
|
339
|
+
def _get_objects(cls, endpoint, headers):
|
|
270
340
|
data = []
|
|
271
341
|
page = 1
|
|
272
342
|
while True:
|
|
273
|
-
resp = requests.get(
|
|
343
|
+
resp = requests.get(
|
|
344
|
+
endpoint, params=dict(page=page, page_size=1000), headers=headers
|
|
345
|
+
)
|
|
274
346
|
if resp.status_code != 200:
|
|
275
347
|
break
|
|
276
348
|
d = resp.json()
|
|
277
|
-
if len(d[
|
|
349
|
+
if len(d["objects"]) == 0:
|
|
278
350
|
break
|
|
279
|
-
data.extend(d[
|
|
280
|
-
page+=1
|
|
281
|
-
if d[
|
|
351
|
+
data.extend(d["objects"])
|
|
352
|
+
page += 1
|
|
353
|
+
if d["page_results_count"] < d["page_size"]:
|
|
282
354
|
break
|
|
283
355
|
return data
|
|
284
356
|
|
|
285
357
|
def bundle_detections(self, container: DetectionContainer):
|
|
286
|
-
self.
|
|
358
|
+
self.data = DataContainer(detections=container)
|
|
287
359
|
if not container.success:
|
|
288
360
|
return
|
|
289
361
|
for d in container.detections:
|
|
290
362
|
self.add_rule_indicator(d)
|
|
291
363
|
|
|
364
|
+
|
|
292
365
|
def make_logsouce_string(source: dict):
|
|
293
|
-
d = [
|
|
294
|
-
if k in [
|
|
295
|
-
|
|
296
|
-
|
|
366
|
+
d = [
|
|
367
|
+
f"{k}={v}" for k, v in source.items() if k in ["product", "service", "category"]
|
|
368
|
+
]
|
|
369
|
+
d_str = ", ".join(d)
|
|
370
|
+
return "log-source {" + d_str + "}"
|
|
@@ -5,7 +5,6 @@ from urllib.parse import urljoin
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
def check_llms():
|
|
10
9
|
from txt2detection.__main__ import parse_model
|
|
11
10
|
|
|
@@ -23,14 +22,17 @@ def check_llms():
|
|
|
23
22
|
|
|
24
23
|
def check_ctibutler_vulmatch(service):
|
|
25
24
|
session = requests.Session()
|
|
26
|
-
if service ==
|
|
27
|
-
base_url = os.getenv(
|
|
28
|
-
url = urljoin(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
25
|
+
if service == "vulmatch":
|
|
26
|
+
base_url = os.getenv("VULMATCH_BASE_URL")
|
|
27
|
+
url = urljoin(
|
|
28
|
+
base_url,
|
|
29
|
+
"v1/cve/objects/vulnerability--f552f6f4-39da-48dc-8717-323772c99588/",
|
|
30
|
+
)
|
|
31
|
+
session.headers["API-KEY"] = os.environ.get("VULMATCH_API_KEY")
|
|
32
|
+
elif service == "ctibutler":
|
|
33
|
+
base_url = os.getenv("CTIBUTLER_BASE_URL")
|
|
34
|
+
url = urljoin(base_url, "v1/location/versions/available/")
|
|
35
|
+
session.headers["API-KEY"] = os.environ.get("CTIBUTLER_API_KEY")
|
|
34
36
|
|
|
35
37
|
try:
|
|
36
38
|
resp = session.get(url)
|
txt2detection/models.py
CHANGED
|
@@ -19,6 +19,8 @@ from stix2 import (
|
|
|
19
19
|
MarkingDefinition,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
+
from txt2detection.ai_extractor.models import AttackFlowList
|
|
23
|
+
|
|
22
24
|
if typing.TYPE_CHECKING:
|
|
23
25
|
from txt2detection.bundler import Bundler
|
|
24
26
|
|
|
@@ -398,6 +400,15 @@ class DetectionContainer(BaseModel):
|
|
|
398
400
|
detections: list[Union[BaseDetection, AIDetection, SigmaRuleDetection]]
|
|
399
401
|
|
|
400
402
|
|
|
403
|
+
class DataContainer(BaseModel):
|
|
404
|
+
detections: DetectionContainer
|
|
405
|
+
attack_flow: AttackFlowList = Field(default=None)
|
|
406
|
+
navigator_layer: list = Field(default=None)
|
|
407
|
+
observables: list[dict] = Field(default=None)
|
|
408
|
+
cves: dict[str, str] = Field(default=None)
|
|
409
|
+
attacks: dict[str, str] = Field(default=None)
|
|
410
|
+
|
|
411
|
+
|
|
401
412
|
def tlp_from_tags(tags: list[SigmaTag]):
|
|
402
413
|
for tag in tags:
|
|
403
414
|
ns, _, level = tag.partition(".")
|
txt2detection/observables.py
CHANGED
txt2detection/utils.py
CHANGED
|
@@ -17,11 +17,14 @@ from .models import UUID_NAMESPACE
|
|
|
17
17
|
class DetectionLanguage(SimpleNamespace):
|
|
18
18
|
pass
|
|
19
19
|
|
|
20
|
+
|
|
20
21
|
def parse_model(value: str):
|
|
21
|
-
splits = value.split(
|
|
22
|
+
splits = value.split(":", 1)
|
|
22
23
|
provider = splits[0]
|
|
23
24
|
if provider not in ALL_AI_EXTRACTORS:
|
|
24
|
-
raise NotImplementedError(
|
|
25
|
+
raise NotImplementedError(
|
|
26
|
+
f"invalid AI provider in `{value}`, must be one of {list(ALL_AI_EXTRACTORS)}"
|
|
27
|
+
)
|
|
25
28
|
provider = ALL_AI_EXTRACTORS[provider]
|
|
26
29
|
try:
|
|
27
30
|
if len(splits) == 2:
|
|
@@ -30,8 +33,10 @@ def parse_model(value: str):
|
|
|
30
33
|
except Exception as e:
|
|
31
34
|
raise ModelError(f"Unable to initialize model `{value}`") from e
|
|
32
35
|
|
|
36
|
+
|
|
33
37
|
def make_identity(name, namespace=None, created_by_ref=None, object_marking_refs=None):
|
|
34
38
|
from .bundler import Bundler
|
|
39
|
+
|
|
35
40
|
if isinstance(namespace, str):
|
|
36
41
|
namespace = uuid.UUID(namespace)
|
|
37
42
|
namespace = namespace or UUID_NAMESPACE
|
|
@@ -41,25 +46,31 @@ def make_identity(name, namespace=None, created_by_ref=None, object_marking_refs
|
|
|
41
46
|
created_by_ref=created_by_ref or Bundler.default_identity.id,
|
|
42
47
|
created=datetime(2020, 1, 1),
|
|
43
48
|
modified=datetime(2020, 1, 1),
|
|
44
|
-
object_marking_refs=object_marking_refs
|
|
49
|
+
object_marking_refs=object_marking_refs
|
|
50
|
+
or [
|
|
45
51
|
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
46
|
-
"marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7"
|
|
52
|
+
"marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
47
53
|
],
|
|
48
54
|
)
|
|
49
55
|
|
|
50
56
|
|
|
51
57
|
def validate_token_count(max_tokens, input, extractor: BaseAIExtractor):
|
|
52
|
-
logging.info(
|
|
58
|
+
logging.info("INPUT_TOKEN_LIMIT = %d", max_tokens)
|
|
53
59
|
token_count = extractor.count_tokens(input)
|
|
54
|
-
logging.info(
|
|
55
|
-
if
|
|
56
|
-
raise Exception(
|
|
60
|
+
logging.info("TOKEN COUNT FOR %s: %d", extractor.extractor_name, token_count)
|
|
61
|
+
if token_count > max_tokens:
|
|
62
|
+
raise Exception(
|
|
63
|
+
f"{extractor.extractor_name}: input_file token count ({token_count}) exceeds INPUT_TOKEN_LIMIT ({max_tokens})"
|
|
64
|
+
)
|
|
57
65
|
|
|
58
66
|
|
|
59
67
|
@lru_cache(maxsize=5)
|
|
60
68
|
def get_licenses(date):
|
|
61
|
-
resp = requests.get(
|
|
62
|
-
|
|
69
|
+
resp = requests.get(
|
|
70
|
+
"https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
|
|
71
|
+
)
|
|
72
|
+
return {l["licenseId"]: l["name"] for l in resp.json()["licenses"]}
|
|
73
|
+
|
|
63
74
|
|
|
64
75
|
def valid_licenses():
|
|
65
76
|
return get_licenses(datetime.now().date().isoformat())
|
|
@@ -75,9 +86,10 @@ def remove_rule_specific_tags(tags):
|
|
|
75
86
|
return labels
|
|
76
87
|
|
|
77
88
|
|
|
78
|
-
def as_date(d:
|
|
89
|
+
def as_date(d: "date|datetime"):
|
|
79
90
|
if isinstance(d, datetime):
|
|
80
91
|
return d.date()
|
|
81
92
|
return d
|
|
82
93
|
|
|
83
|
-
|
|
94
|
+
|
|
95
|
+
STATUSES = ["stable", "test", "experimental", "deprecated", "unsupported"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: txt2detection
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.9
|
|
4
4
|
Summary: A command line tool that takes a txt file containing threat intelligence and turns it into a detection rule.
|
|
5
5
|
Project-URL: Homepage, https://github.com/muchdogesec/txt2detection
|
|
6
6
|
Project-URL: Issues, https://github.com/muchdogesec/txt2detection/issues
|
|
@@ -72,12 +72,6 @@ txt2detection allows a user to enter some threat intelligence as a file to consi
|
|
|
72
72
|
2. Based on the user input, AI prompts structured and sent to produce an intelligence rule
|
|
73
73
|
3. Rules converted into STIX objects
|
|
74
74
|
|
|
75
|
-
## tl;dr
|
|
76
|
-
|
|
77
|
-
[](https://www.youtube.com/watch?v=uJWXYKyu3Xg)
|
|
78
|
-
|
|
79
|
-
[Watch the demo](https://www.youtube.com/watch?v=uJWXYKyu3Xg).
|
|
80
|
-
|
|
81
75
|
## Usage
|
|
82
76
|
|
|
83
77
|
### Setup
|
|
@@ -162,12 +156,14 @@ Use this mode to generate a set of rules from an input text file;
|
|
|
162
156
|
* `--license` (optional): [License of the rule according the SPDX ID specification](https://spdx.org/licenses/). Will be added to the rule.
|
|
163
157
|
* `--reference_urls` (optional): A list of URLs to be added as `references` in the Sigma Rule property and in the `external_references` property of the Indicator and Report STIX object created. e.g `"https://www.google.com/" "https://www.facebook.com/"`
|
|
164
158
|
* `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
|
|
165
|
-
*
|
|
159
|
+
* `--ai_provider` (required): defines the `provider:model` to be used to generate the rule. Select one option. Currently supports:
|
|
166
160
|
* Provider (env var required `OPENROUTER_API_KEY`): `openrouter:`, providers/models `openai/gpt-4o`, `deepseek/deepseek-chat` ([More here](https://openrouter.ai/models))
|
|
167
161
|
* Provider (env var required `OPENAI_API_KEY`): `openai:`, models e.g.: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` ([More here](https://platform.openai.com/docs/models))
|
|
168
162
|
* Provider (env var required `ANTHROPIC_API_KEY`): `anthropic:`, models e.g.: `claude-3-5-sonnet-latest`, `claude-3-5-haiku-latest`, `claude-3-opus-latest` ([More here](https://docs.anthropic.com/en/docs/about-claude/models))
|
|
169
163
|
* Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
170
164
|
* Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
|
|
165
|
+
* `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
|
|
166
|
+
* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
|
|
171
167
|
|
|
172
168
|
Note, in this mode, the following values will be automatically assigned to the rule
|
|
173
169
|
|
|
@@ -194,6 +190,8 @@ Note, in this mode you should be aware of a few things;
|
|
|
194
190
|
* `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
|
|
195
191
|
* `status` (optional): either `stable`, `test`, `experimental`, `deprecated`, `unsupported`. If passed, will overwrite any existing `status` recorded in the rule
|
|
196
192
|
* `level` (optional): either `informational`, `low`, `medium`, `high`, `critical`. If passed, will overwrite any existing `level` recorded in the rule
|
|
193
|
+
* `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
|
|
194
|
+
* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
|
|
197
195
|
|
|
198
196
|
### A note on observable extraction
|
|
199
197
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
|
|
2
|
+
txt2detection/__main__.py,sha256=s5XcIctE59ALjys6Y8lRIqS_pQWi1mlNo2gyG8_XS5s,11622
|
|
3
|
+
txt2detection/attack_flow.py,sha256=1Ns98ZEoiN8kH-iSo7d6zYtplm11QkhPQAvSZsW4WXQ,8853
|
|
4
|
+
txt2detection/bundler.py,sha256=eHyr6jlnd4ZvynHkyy5Hposkp_XqEAxEwGzlViSq1xU,13319
|
|
5
|
+
txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
|
|
6
|
+
txt2detection/models.py,sha256=_-sR03FEWI46OUZdL7U0tibNn909B0NU9LWNzopBtiY,12888
|
|
7
|
+
txt2detection/observables.py,sha256=RxgJchvk6_Z2pBxJ6MAGsx00gj8TyRt9W2BTQTb1F9o,6762
|
|
8
|
+
txt2detection/utils.py,sha256=EJ5lMhnghUgW0JbcRmeiDXYwm5GaB6XrG4cUjru-52g,2812
|
|
9
|
+
txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
|
|
10
|
+
txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
|
|
11
|
+
txt2detection/ai_extractor/base.py,sha256=2C3d4BoH7I4fnvp6cLxbtjiFVPm4WJLFwnS_lAppHr8,3210
|
|
12
|
+
txt2detection/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
|
|
13
|
+
txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
|
|
14
|
+
txt2detection/ai_extractor/models.py,sha256=xMTvUHoxIflbBA4mkGLTjwf657DVEOxd6gqLpEUciQ4,963
|
|
15
|
+
txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
|
|
16
|
+
txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
|
|
17
|
+
txt2detection/ai_extractor/prompts.py,sha256=xI82PelsTidnRzi5wnNbEC4lmkio92YUDd8SZu4CQiE,10961
|
|
18
|
+
txt2detection/ai_extractor/utils.py,sha256=SUxyPhkGp5yDbX_H_E018i93R8IbyLsQ00PIBDecfuc,540
|
|
19
|
+
txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
|
|
20
|
+
txt2detection-1.0.9.dist-info/METADATA,sha256=UHkUnaL9wEt78RNw0EmQenodg2qxZ3gsTDkmVC2W7IE,15869
|
|
21
|
+
txt2detection-1.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
+
txt2detection-1.0.9.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
|
|
23
|
+
txt2detection-1.0.9.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
|
|
24
|
+
txt2detection-1.0.9.dist-info/RECORD,,
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
|
|
2
|
-
txt2detection/__main__.py,sha256=R9TgWWGzA8rxF39rZr2MNOrQubhItdRAgP2nd8Tfb78,9337
|
|
3
|
-
txt2detection/bundler.py,sha256=VaU7pv4NYR2gQD2anzMw2CnOWHphWVJaeD8nrkHBFPg,11416
|
|
4
|
-
txt2detection/credential_checker.py,sha256=YoOe1ABjNfAJIcNE6PRAZtvznTybUKHNBB57DPQhZsU,2564
|
|
5
|
-
txt2detection/models.py,sha256=uOWRShMnVUOEPtKusAdthk3bFnh5T6HJkuBEEkedDHo,12508
|
|
6
|
-
txt2detection/observables.py,sha256=NNnwF_gOsPmAbfgk5fj1rcluMsShZOHssAGy2VJgvmo,6763
|
|
7
|
-
txt2detection/utils.py,sha256=rLBFzpSepksXkONnqWkRqiMr8R4LTp4j8OrashFVUPc,2741
|
|
8
|
-
txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
|
|
9
|
-
txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
|
|
10
|
-
txt2detection/ai_extractor/base.py,sha256=urZe_kpYu3BwXyKJsQ0GQIEtTasUQYp4dFzuz34Hai8,2336
|
|
11
|
-
txt2detection/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
|
|
12
|
-
txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
|
|
13
|
-
txt2detection/ai_extractor/openai.py,sha256=e5Of3i-T2CvUSx1T_v7wHOuewHK2IoImxZXfXeZc3Ds,625
|
|
14
|
-
txt2detection/ai_extractor/openrouter.py,sha256=-KcdcyKPpaeiGfvqJB4L7vMmcXTDhml3Mr0T6kwANZA,645
|
|
15
|
-
txt2detection/ai_extractor/prompts.py,sha256=ACYFWUafdHXHBXz7fq_RSooA4PJ-mBdaBzqsOOSFpVg,5918
|
|
16
|
-
txt2detection/ai_extractor/utils.py,sha256=SUxyPhkGp5yDbX_H_E018i93R8IbyLsQ00PIBDecfuc,540
|
|
17
|
-
txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
|
|
18
|
-
txt2detection-1.0.8.dist-info/METADATA,sha256=NVfK0XlFBnmwCZXSl5aGFNSuRW6plQSLr4hTK_gqqYA,14520
|
|
19
|
-
txt2detection-1.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
20
|
-
txt2detection-1.0.8.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
|
|
21
|
-
txt2detection-1.0.8.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
|
|
22
|
-
txt2detection-1.0.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|