txt2detection 0.0.2rc12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of txt2detection might be problematic. Click here for more details.
- txt2detection/__init__.py +2 -0
- txt2detection/__main__.py +189 -0
- txt2detection/ai_extractor/__init__.py +16 -0
- txt2detection/ai_extractor/anthropic.py +12 -0
- txt2detection/ai_extractor/base.py +57 -0
- txt2detection/ai_extractor/deepseek.py +19 -0
- txt2detection/ai_extractor/gemini.py +18 -0
- txt2detection/ai_extractor/openai.py +20 -0
- txt2detection/ai_extractor/openrouter.py +20 -0
- txt2detection/ai_extractor/prompts.py +116 -0
- txt2detection/ai_extractor/utils.py +19 -0
- txt2detection/bundler.py +283 -0
- txt2detection/config/detection_languages.yaml +14 -0
- txt2detection/models.py +391 -0
- txt2detection/observables.py +186 -0
- txt2detection/utils.py +81 -0
- txt2detection-0.0.2rc12.dist-info/METADATA +199 -0
- txt2detection-0.0.2rc12.dist-info/RECORD +21 -0
- txt2detection-0.0.2rc12.dist-info/WHEEL +4 -0
- txt2detection-0.0.2rc12.dist-info/entry_points.txt +2 -0
- txt2detection-0.0.2rc12.dist-info/licenses/LICENSE +202 -0
txt2detection/bundler.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import enum
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from urllib.parse import urljoin
|
|
7
|
+
import requests
|
|
8
|
+
from stix2 import (
|
|
9
|
+
Report,
|
|
10
|
+
Identity,
|
|
11
|
+
MarkingDefinition,
|
|
12
|
+
Relationship,
|
|
13
|
+
Bundle,
|
|
14
|
+
)
|
|
15
|
+
from stix2.serialization import serialize
|
|
16
|
+
import hashlib
|
|
17
|
+
|
|
18
|
+
from txt2detection import observables
|
|
19
|
+
from txt2detection.models import AIDetection, BaseDetection, DetectionContainer, UUID_NAMESPACE, SigmaRuleDetection
|
|
20
|
+
|
|
21
|
+
from datetime import UTC, datetime as dt
|
|
22
|
+
import uuid
|
|
23
|
+
from stix2 import parse as parse_stix
|
|
24
|
+
|
|
25
|
+
from txt2detection.models import TLP_LEVEL
|
|
26
|
+
from txt2detection.utils import STATUSES, remove_rule_specific_tags
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger("txt2detection.bundler")
|
|
30
|
+
|
|
31
|
+
class Bundler:
|
|
32
|
+
identity = None
|
|
33
|
+
object_marking_refs = []
|
|
34
|
+
uuid = None
|
|
35
|
+
id_map = dict()
|
|
36
|
+
detections: DetectionContainer
|
|
37
|
+
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
|
|
38
|
+
default_identity = Identity(**{
|
|
39
|
+
"type": "identity",
|
|
40
|
+
"spec_version": "2.1",
|
|
41
|
+
"id": "identity--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
42
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
43
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
44
|
+
"modified": "2020-01-01T00:00:00.000Z",
|
|
45
|
+
"name": "txt2detection",
|
|
46
|
+
"description": "https://github.com/muchdogesec/txt2detection",
|
|
47
|
+
"identity_class": "system",
|
|
48
|
+
"sectors": [
|
|
49
|
+
"technology"
|
|
50
|
+
],
|
|
51
|
+
"contact_information": "https://www.dogesec.com/contact/",
|
|
52
|
+
"object_marking_refs": [
|
|
53
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
54
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb"
|
|
55
|
+
]
|
|
56
|
+
})
|
|
57
|
+
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/marking-definition/txt2detection.json
|
|
58
|
+
default_marking = MarkingDefinition(**{
|
|
59
|
+
"type": "marking-definition",
|
|
60
|
+
"spec_version": "2.1",
|
|
61
|
+
"id": "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
|
|
62
|
+
"created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
|
|
63
|
+
"created": "2020-01-01T00:00:00.000Z",
|
|
64
|
+
"definition_type": "statement",
|
|
65
|
+
"definition": {
|
|
66
|
+
"statement": "This object was created using: https://github.com/muchdogesec/txt2detection"
|
|
67
|
+
},
|
|
68
|
+
"object_marking_refs": [
|
|
69
|
+
"marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
70
|
+
"marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb"
|
|
71
|
+
]
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def generate_report_id(cls, created_by_ref, created, name):
|
|
76
|
+
if not created_by_ref:
|
|
77
|
+
created_by_ref = cls.default_identity['id']
|
|
78
|
+
return str(
|
|
79
|
+
uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}")
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
name,
|
|
85
|
+
identity,
|
|
86
|
+
tlp_level,
|
|
87
|
+
description,
|
|
88
|
+
labels,
|
|
89
|
+
created=None,
|
|
90
|
+
modified=None,
|
|
91
|
+
report_id=None,
|
|
92
|
+
external_refs: list=None,
|
|
93
|
+
reference_urls=None,
|
|
94
|
+
license=None,
|
|
95
|
+
**kwargs,
|
|
96
|
+
) -> None:
|
|
97
|
+
self.created = created or dt.now(UTC)
|
|
98
|
+
self.modified = modified or self.created
|
|
99
|
+
self.identity = identity or self.default_identity
|
|
100
|
+
self.tlp_level = TLP_LEVEL.get(tlp_level or 'clear')
|
|
101
|
+
self.uuid = report_id or self.generate_report_id(self.identity.id, self.created, name)
|
|
102
|
+
self.reference_urls = reference_urls or []
|
|
103
|
+
self.labels = labels or []
|
|
104
|
+
self.license = license
|
|
105
|
+
|
|
106
|
+
self.job_id = f"report--{self.uuid}"
|
|
107
|
+
self.external_refs = (external_refs or []) + [dict(source_name='txt2detection', url=url, description='txt2detection-reference') for url in self.reference_urls]
|
|
108
|
+
|
|
109
|
+
self.report = Report(
|
|
110
|
+
created_by_ref=self.identity.id,
|
|
111
|
+
name=name,
|
|
112
|
+
id=self.job_id,
|
|
113
|
+
description=description,
|
|
114
|
+
object_refs=[
|
|
115
|
+
f"note--{self.uuid}"
|
|
116
|
+
], # won't allow creation with empty object_refs
|
|
117
|
+
created=self.created,
|
|
118
|
+
modified=self.modified,
|
|
119
|
+
object_marking_refs=[self.tlp_level.value.id],
|
|
120
|
+
labels=remove_rule_specific_tags(self.labels),
|
|
121
|
+
published=self.created,
|
|
122
|
+
external_references=[
|
|
123
|
+
dict(
|
|
124
|
+
source_name="description_md5_hash",
|
|
125
|
+
external_id=hashlib.md5((description or "").encode()).hexdigest(),
|
|
126
|
+
)
|
|
127
|
+
] + self.external_refs,
|
|
128
|
+
)
|
|
129
|
+
self.report.object_refs.clear() # clear object refs
|
|
130
|
+
self.set_defaults()
|
|
131
|
+
self.all_objects = set()
|
|
132
|
+
if not description:
|
|
133
|
+
self.report.external_references.pop(0)
|
|
134
|
+
|
|
135
|
+
def set_defaults(self):
|
|
136
|
+
# self.value.extend(TLP_LEVEL.values()) # adds all tlp levels
|
|
137
|
+
self.bundle = Bundle(objects=[self.tlp_level.value], id=f"bundle--{self.uuid}")
|
|
138
|
+
|
|
139
|
+
self.bundle.objects.extend([self.default_marking, self.identity, self.report])
|
|
140
|
+
# add default STIX 2.1 marking definition for txt2detection
|
|
141
|
+
self.report.object_marking_refs.append(self.default_marking.id)
|
|
142
|
+
|
|
143
|
+
def add_ref(self, sdo, append_report=False):
|
|
144
|
+
sdo_id = sdo["id"]
|
|
145
|
+
if sdo_id in self.all_objects:
|
|
146
|
+
return
|
|
147
|
+
self.bundle.objects.append(sdo)
|
|
148
|
+
if sdo_id not in self.report.object_refs and append_report:
|
|
149
|
+
self.report.object_refs.append(sdo_id)
|
|
150
|
+
self.all_objects.add(sdo_id)
|
|
151
|
+
|
|
152
|
+
def add_rule_indicator(self, detection: SigmaRuleDetection):
|
|
153
|
+
indicator_types = getattr(detection, 'indicator_types', None)
|
|
154
|
+
if isinstance(detection, AIDetection):
|
|
155
|
+
detection = detection.to_sigma_rule_detection(self)
|
|
156
|
+
assert isinstance(detection, SigmaRuleDetection), f"detection of type {type(detection)} not supported"
|
|
157
|
+
indicator = {
|
|
158
|
+
"type": "indicator",
|
|
159
|
+
"id": "indicator--"+str(detection.detection_id),
|
|
160
|
+
"spec_version": "2.1",
|
|
161
|
+
"created_by_ref": self.report.created_by_ref,
|
|
162
|
+
"created": self.report.created,
|
|
163
|
+
"modified": self.report.modified,
|
|
164
|
+
"indicator_types": indicator_types,
|
|
165
|
+
"name": detection.title,
|
|
166
|
+
"description": detection.description,
|
|
167
|
+
"labels": remove_rule_specific_tags(self.labels),
|
|
168
|
+
"pattern_type": 'sigma',
|
|
169
|
+
"pattern": detection.make_rule(self),
|
|
170
|
+
"valid_from": self.report.created,
|
|
171
|
+
"object_marking_refs": self.report.object_marking_refs,
|
|
172
|
+
"external_references": self.external_refs + detection.external_references,
|
|
173
|
+
}
|
|
174
|
+
indicator['external_references'].append(
|
|
175
|
+
{
|
|
176
|
+
"source_name": "rule_md5_hash",
|
|
177
|
+
"external_id": hashlib.md5(indicator['pattern'].encode()).hexdigest()
|
|
178
|
+
}
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
logger.debug(f"===== rule {detection.detection_id} =====")
|
|
182
|
+
logger.debug("```yaml\n"+indicator['pattern']+"\n```")
|
|
183
|
+
logger.debug(f" =================== end of rule =================== ")
|
|
184
|
+
|
|
185
|
+
for obj in self.get_attack_objects(detection.mitre_attack_ids):
|
|
186
|
+
self.add_ref(obj)
|
|
187
|
+
self.add_relation(indicator, obj)
|
|
188
|
+
|
|
189
|
+
for obj in self.get_cve_objects(detection.cve_ids):
|
|
190
|
+
self.add_ref(obj)
|
|
191
|
+
self.add_relation(indicator, obj)
|
|
192
|
+
|
|
193
|
+
self.add_ref(parse_stix(indicator, allow_custom=True), append_report=True)
|
|
194
|
+
|
|
195
|
+
for ob_type, ob_value in set(observables.find_stix_observables(detection.detection)):
|
|
196
|
+
try:
|
|
197
|
+
obj = observables.to_stix_object(ob_type, ob_value)
|
|
198
|
+
self.add_ref(obj)
|
|
199
|
+
self.add_relation(indicator, obj, 'detects', target_name=ob_value)
|
|
200
|
+
except:
|
|
201
|
+
logger.exception(f"failed to process observable {ob_type}/{ob_value}")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def add_relation(self, indicator, target_object, relationship_type='detects', target_name=None):
|
|
205
|
+
ext_refs = []
|
|
206
|
+
|
|
207
|
+
with contextlib.suppress(Exception):
|
|
208
|
+
indicator['external_references'].append(target_object['external_references'][0])
|
|
209
|
+
ext_refs = [target_object['external_references'][0]]
|
|
210
|
+
|
|
211
|
+
target_name = target_name or f"{target_object['external_references'][0]['external_id']} ({target_object['name']})"
|
|
212
|
+
|
|
213
|
+
rel = Relationship(
|
|
214
|
+
id="relationship--" + str(
|
|
215
|
+
uuid.uuid5(
|
|
216
|
+
UUID_NAMESPACE, f"{indicator['id']}+{target_object['id']}"
|
|
217
|
+
)
|
|
218
|
+
),
|
|
219
|
+
source_ref=indicator['id'],
|
|
220
|
+
target_ref=target_object['id'],
|
|
221
|
+
relationship_type=relationship_type,
|
|
222
|
+
created_by_ref=self.report.created_by_ref,
|
|
223
|
+
description=f"{indicator['name']} {relationship_type} {target_name}",
|
|
224
|
+
created=self.report.created,
|
|
225
|
+
modified=self.report.modified,
|
|
226
|
+
object_marking_refs=self.report.object_marking_refs,
|
|
227
|
+
external_references=ext_refs,
|
|
228
|
+
allow_custom=True,
|
|
229
|
+
)
|
|
230
|
+
self.add_ref(rel)
|
|
231
|
+
|
|
232
|
+
def to_json(self):
|
|
233
|
+
return serialize(self.bundle, indent=4)
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def bundle_dict(self):
|
|
237
|
+
return json.loads(self.to_json())
|
|
238
|
+
|
|
239
|
+
def get_attack_objects(self, attack_ids):
|
|
240
|
+
if not attack_ids:
|
|
241
|
+
return []
|
|
242
|
+
logger.debug(f"retrieving attack objects: {attack_ids}")
|
|
243
|
+
endpoint = urljoin(os.environ['CTIBUTLER_BASE_URL'] + '/', f"v1/attack-enterprise/objects/?attack_id="+','.join(attack_ids))
|
|
244
|
+
|
|
245
|
+
headers = {}
|
|
246
|
+
if api_key := os.environ.get('CTIBUTLER_API_KEY'):
|
|
247
|
+
headers['Authorization'] = "Bearer " + api_key
|
|
248
|
+
|
|
249
|
+
return self._get_objects(endpoint, headers)
|
|
250
|
+
|
|
251
|
+
def get_cve_objects(self, cve_ids):
|
|
252
|
+
if not cve_ids:
|
|
253
|
+
return []
|
|
254
|
+
logger.debug(f"retrieving cve objects: {cve_ids}")
|
|
255
|
+
endpoint = urljoin(os.environ['VULMATCH_BASE_URL'] + '/', f"v1/cve/objects/?cve_id="+','.join(cve_ids))
|
|
256
|
+
headers = {}
|
|
257
|
+
if api_key := os.environ.get('VULMATCH_API_KEY'):
|
|
258
|
+
headers['Authorization'] = "Bearer " + api_key
|
|
259
|
+
|
|
260
|
+
return self._get_objects(endpoint, headers)
|
|
261
|
+
|
|
262
|
+
def _get_objects(self, endpoint, headers):
|
|
263
|
+
data = []
|
|
264
|
+
page = 1
|
|
265
|
+
while True:
|
|
266
|
+
resp = requests.get(endpoint, params=dict(page=page, page_size=1000), headers=headers)
|
|
267
|
+
if resp.status_code != 200:
|
|
268
|
+
break
|
|
269
|
+
d = resp.json()
|
|
270
|
+
if len(d['objects']) == 0:
|
|
271
|
+
break
|
|
272
|
+
data.extend(d['objects'])
|
|
273
|
+
page+=1
|
|
274
|
+
if d['page_results_count'] < d['page_size']:
|
|
275
|
+
break
|
|
276
|
+
return data
|
|
277
|
+
|
|
278
|
+
def bundle_detections(self, container: DetectionContainer):
|
|
279
|
+
self.detections = container
|
|
280
|
+
if not container.success:
|
|
281
|
+
return
|
|
282
|
+
for d in container.detections:
|
|
283
|
+
self.add_rule_indicator(d)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# currently only supports Sigma
|
|
2
|
+
## ===== Sigma =====
|
|
3
|
+
|
|
4
|
+
sigma:
|
|
5
|
+
type: "detection_language"
|
|
6
|
+
name: "Sigma"
|
|
7
|
+
description: "https://sigmahq.io/docs/basics/rules.html"
|
|
8
|
+
products:
|
|
9
|
+
-
|
|
10
|
+
documentation: ""
|
|
11
|
+
created: 2020-01-01
|
|
12
|
+
modified: 2020-01-01
|
|
13
|
+
created_by: DOGESEC
|
|
14
|
+
version: 1.0.0
|
txt2detection/models.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
import typing
|
|
5
|
+
import uuid
|
|
6
|
+
from slugify import slugify
|
|
7
|
+
from datetime import date as dt_date
|
|
8
|
+
from typing import Any, List, Literal, Optional, Union
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
import jsonschema
|
|
12
|
+
from pydantic import BaseModel, Field, computed_field, field_validator
|
|
13
|
+
from pydantic_core import PydanticCustomError, core_schema
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
from stix2 import (
|
|
17
|
+
MarkingDefinition,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
if typing.TYPE_CHECKING:
|
|
21
|
+
from txt2detection.bundler import Bundler
|
|
22
|
+
|
|
23
|
+
UUID_NAMESPACE = uuid.UUID("a4d70b75-6f4a-5d19-9137-da863edd33d7")
|
|
24
|
+
|
|
25
|
+
TAG_PATTERN = re.compile(r"^[a-z0-9_-]+\.[a-z0-9._-]+$")
|
|
26
|
+
|
|
27
|
+
MITRE_TACTIC_MAP = {
|
|
28
|
+
"initial-access": "TA0001",
|
|
29
|
+
"execution": "TA0002",
|
|
30
|
+
"persistence": "TA0003",
|
|
31
|
+
"privilege-escalation": "TA0004",
|
|
32
|
+
"defense-evasion": "TA0005",
|
|
33
|
+
"credential-access": "TA0006",
|
|
34
|
+
"discovery": "TA0007",
|
|
35
|
+
"lateral-movement": "TA0008",
|
|
36
|
+
"collection": "TA0009",
|
|
37
|
+
"exfiltration": "TA0010",
|
|
38
|
+
"command-and-control": "TA0011",
|
|
39
|
+
"impact": "TA0040",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TLP_LEVEL(enum.Enum):
|
|
44
|
+
CLEAR = MarkingDefinition(
|
|
45
|
+
spec_version="2.1",
|
|
46
|
+
id="marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
|
|
47
|
+
created="2022-10-01T00:00:00.000Z",
|
|
48
|
+
definition_type="TLP:CLEAR",
|
|
49
|
+
extensions={
|
|
50
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
|
51
|
+
"extension_type": "property-extension",
|
|
52
|
+
"tlp_2_0": "clear",
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
GREEN = MarkingDefinition(
|
|
57
|
+
spec_version="2.1",
|
|
58
|
+
id="marking-definition--bab4a63c-aed9-4cf5-a766-dfca5abac2bb",
|
|
59
|
+
created="2022-10-01T00:00:00.000Z",
|
|
60
|
+
definition_type="TLP:GREEN",
|
|
61
|
+
extensions={
|
|
62
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
|
63
|
+
"extension_type": "property-extension",
|
|
64
|
+
"tlp_2_0": "green",
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
)
|
|
68
|
+
AMBER = MarkingDefinition(
|
|
69
|
+
spec_version="2.1",
|
|
70
|
+
id="marking-definition--55d920b0-5e8b-4f79-9ee9-91f868d9b421",
|
|
71
|
+
created="2022-10-01T00:00:00.000Z",
|
|
72
|
+
definition_type="TLP:AMBER",
|
|
73
|
+
extensions={
|
|
74
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
|
75
|
+
"extension_type": "property-extension",
|
|
76
|
+
"tlp_2_0": "amber",
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
)
|
|
80
|
+
AMBER_STRICT = MarkingDefinition(
|
|
81
|
+
spec_version="2.1",
|
|
82
|
+
id="marking-definition--939a9414-2ddd-4d32-a0cd-375ea402b003",
|
|
83
|
+
created="2022-10-01T00:00:00.000Z",
|
|
84
|
+
definition_type="TLP:AMBER+STRICT",
|
|
85
|
+
extensions={
|
|
86
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
|
87
|
+
"extension_type": "property-extension",
|
|
88
|
+
"tlp_2_0": "amber+strict",
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
RED = MarkingDefinition(
|
|
93
|
+
spec_version="2.1",
|
|
94
|
+
id="marking-definition--e828b379-4e03-4974-9ac4-e53a884c97c1",
|
|
95
|
+
created="2022-10-01T00:00:00.000Z",
|
|
96
|
+
definition_type="TLP:RED",
|
|
97
|
+
extensions={
|
|
98
|
+
"extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
|
|
99
|
+
"extension_type": "property-extension",
|
|
100
|
+
"tlp_2_0": "red",
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def levels(cls):
|
|
107
|
+
return dict(
|
|
108
|
+
clear=cls.CLEAR,
|
|
109
|
+
green=cls.GREEN,
|
|
110
|
+
amber=cls.AMBER,
|
|
111
|
+
amber_strict=cls.AMBER_STRICT,
|
|
112
|
+
red=cls.RED,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def values(cls):
|
|
117
|
+
return [
|
|
118
|
+
cls.CLEAR.value,
|
|
119
|
+
cls.GREEN.value,
|
|
120
|
+
cls.AMBER.value,
|
|
121
|
+
cls.AMBER_STRICT.value,
|
|
122
|
+
cls.RED.value,
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def get(cls, level: 'str|TLP_LEVEL'):
|
|
127
|
+
if isinstance(level, cls):
|
|
128
|
+
return level
|
|
129
|
+
level = level.lower()
|
|
130
|
+
level = level.replace('+', '_').replace('-', '_')
|
|
131
|
+
if level not in cls.levels():
|
|
132
|
+
raise Exception(f"unsupported tlp level: `{level}`")
|
|
133
|
+
return cls.levels()[level]
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def name(self):
|
|
137
|
+
return super().name.lower()
|
|
138
|
+
|
|
139
|
+
class Statuses(enum.StrEnum):
|
|
140
|
+
stable = enum.auto()
|
|
141
|
+
test = enum.auto()
|
|
142
|
+
experimental = enum.auto()
|
|
143
|
+
deprecated = enum.auto()
|
|
144
|
+
unsupported = enum.auto()
|
|
145
|
+
|
|
146
|
+
class Level(enum.StrEnum):
|
|
147
|
+
informational = enum.auto()
|
|
148
|
+
low = enum.auto()
|
|
149
|
+
medium = enum.auto()
|
|
150
|
+
high = enum.auto()
|
|
151
|
+
critical = enum.auto()
|
|
152
|
+
|
|
153
|
+
class SigmaTag(str):
|
|
154
|
+
@classmethod
|
|
155
|
+
def __get_pydantic_core_schema__(
|
|
156
|
+
cls,
|
|
157
|
+
_source: type[Any],
|
|
158
|
+
_handler,
|
|
159
|
+
) -> core_schema.CoreSchema:
|
|
160
|
+
return core_schema.no_info_after_validator_function(cls._validate, core_schema.str_schema())
|
|
161
|
+
|
|
162
|
+
@classmethod
|
|
163
|
+
def __get_pydantic_json_schema__(
|
|
164
|
+
cls, core_schema: core_schema.CoreSchema, handler
|
|
165
|
+
):
|
|
166
|
+
field_schema = handler(core_schema)
|
|
167
|
+
field_schema.update(type='string', pattern=TAG_PATTERN.pattern, format='sigma-tag')
|
|
168
|
+
return field_schema
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def _validate(cls, input_value: str, /) -> str:
|
|
172
|
+
if not TAG_PATTERN.match(input_value):
|
|
173
|
+
raise PydanticCustomError(
|
|
174
|
+
'value_error',
|
|
175
|
+
'value is not a valid SIGMA tag: {reason}',
|
|
176
|
+
{'reason': f'Must be in format namespace.value and match pattern {TAG_PATTERN.pattern}'},
|
|
177
|
+
)
|
|
178
|
+
return input_value
|
|
179
|
+
|
|
180
|
+
class RelatedRule(BaseModel):
|
|
181
|
+
id: UUID
|
|
182
|
+
type: Literal[
|
|
183
|
+
"derived", "obsolete", "merged", "renamed", "similar"
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
class BaseDetection(BaseModel):
|
|
187
|
+
title: str
|
|
188
|
+
description: str
|
|
189
|
+
detection: dict
|
|
190
|
+
logsource: dict
|
|
191
|
+
status: Statuses = Statuses.experimental
|
|
192
|
+
falsepositives: list[str]
|
|
193
|
+
tags: list[str]
|
|
194
|
+
level: Level
|
|
195
|
+
_custom_id = None
|
|
196
|
+
_extra_data: dict
|
|
197
|
+
|
|
198
|
+
def model_post_init(self, __context):
|
|
199
|
+
self.tags = self.tags or []
|
|
200
|
+
self._extra_data = dict()
|
|
201
|
+
return super().model_post_init(__context)
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def detection_id(self):
|
|
205
|
+
return str(self._custom_id or getattr(self, "id", None) or uuid.uuid4())
|
|
206
|
+
|
|
207
|
+
@detection_id.setter
|
|
208
|
+
def detection_id(self, custom_id):
|
|
209
|
+
self._custom_id = custom_id.split("--")[-1]
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def tlp_level(self):
|
|
213
|
+
return tlp_from_tags(self.tags)
|
|
214
|
+
|
|
215
|
+
@tlp_level.setter
|
|
216
|
+
def tlp_level(self, level):
|
|
217
|
+
set_tlp_level_in_tags(self.tags, level)
|
|
218
|
+
|
|
219
|
+
def set_labels(self, labels):
|
|
220
|
+
self.tags.extend(labels)
|
|
221
|
+
|
|
222
|
+
def set_extra_data_from_bundler(self, bundler: "Bundler"):
|
|
223
|
+
raise NotImplementedError('this class should no longer be in use')
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def make_rule(self, bundler: "Bundler"):
|
|
227
|
+
self.set_extra_data_from_bundler(bundler)
|
|
228
|
+
self.tags = list(dict.fromkeys(self.tags))
|
|
229
|
+
|
|
230
|
+
rule = dict(
|
|
231
|
+
id=self.detection_id,
|
|
232
|
+
**self.model_dump(
|
|
233
|
+
exclude=["indicator_types", "id"],
|
|
234
|
+
mode="json",
|
|
235
|
+
by_alias=True
|
|
236
|
+
),
|
|
237
|
+
)
|
|
238
|
+
for k, v in list(rule.items()):
|
|
239
|
+
if not v:
|
|
240
|
+
rule.pop(k, None)
|
|
241
|
+
|
|
242
|
+
self.validate_rule_with_json_schema(rule)
|
|
243
|
+
if getattr(self, 'date', 0):
|
|
244
|
+
rule.update(date=self.date)
|
|
245
|
+
if getattr(self, 'modified', 0):
|
|
246
|
+
rule.update(modified=self.modified)
|
|
247
|
+
return yaml.dump(rule, sort_keys=False, indent=4)
|
|
248
|
+
|
|
249
|
+
def validate_rule_with_json_schema(self, rule):
|
|
250
|
+
jsonschema.validate(
|
|
251
|
+
rule,
|
|
252
|
+
{
|
|
253
|
+
"$ref": "https://github.com/SigmaHQ/sigma-specification/raw/refs/heads/main/json-schema/sigma-detection-rule-schema.json"
|
|
254
|
+
},
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def external_references(self):
|
|
259
|
+
refs = []
|
|
260
|
+
for attr in ['level', 'status', 'license']:
|
|
261
|
+
if attr_val := getattr(self, attr, None):
|
|
262
|
+
refs.append(dict(source_name=f'sigma-{attr}', description=attr_val))
|
|
263
|
+
return refs
|
|
264
|
+
|
|
265
|
+
@property
|
|
266
|
+
def mitre_attack_ids(self):
|
|
267
|
+
retval = []
|
|
268
|
+
for label in self.tags:
|
|
269
|
+
namespace, _, label_id = label.partition(".")
|
|
270
|
+
if namespace == "attack":
|
|
271
|
+
retval.append(MITRE_TACTIC_MAP.get(label_id, label_id.upper()))
|
|
272
|
+
return retval
|
|
273
|
+
|
|
274
|
+
@property
|
|
275
|
+
def cve_ids(self):
|
|
276
|
+
retval = []
|
|
277
|
+
for label in self.tags:
|
|
278
|
+
namespace, _, label_id = label.partition(".")
|
|
279
|
+
if namespace == "cve":
|
|
280
|
+
retval.append(namespace.upper() + "-" + label_id)
|
|
281
|
+
return retval
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class AIDetection(BaseDetection):
|
|
285
|
+
indicator_types: list[str] = Field(default_factory=list)
|
|
286
|
+
|
|
287
|
+
def to_sigma_rule_detection(self, bundler):
|
|
288
|
+
rule_dict = {
|
|
289
|
+
**self.model_dump(exclude=['indicator_types']),
|
|
290
|
+
**dict(date=bundler.report.created.date(), modified=bundler.report.modified.date(), id=uuid.uuid4())
|
|
291
|
+
}
|
|
292
|
+
try:
|
|
293
|
+
return SigmaRuleDetection.model_validate(rule_dict)
|
|
294
|
+
except Exception as e:
|
|
295
|
+
raise ValueError(dict(message='validate ai output failed', error=e, content=rule_dict))
|
|
296
|
+
|
|
297
|
+
class SigmaRuleDetection(BaseDetection):
|
|
298
|
+
title: str
|
|
299
|
+
id: Optional[UUID] = None
|
|
300
|
+
related: Optional[list[RelatedRule]] = None
|
|
301
|
+
name: Optional[str] = None
|
|
302
|
+
taxonomy: Optional[str] = None
|
|
303
|
+
status: Optional[Statuses] = None
|
|
304
|
+
description: Optional[str] = None
|
|
305
|
+
license: Optional[str] = None
|
|
306
|
+
author: Optional[str] = None
|
|
307
|
+
references: Optional[List[str]] = Field(default_factory=list)
|
|
308
|
+
date: Optional["dt_date"] = Field(alias="date", default=None)
|
|
309
|
+
modified: Optional["dt_date"] = None
|
|
310
|
+
logsource: dict
|
|
311
|
+
detection: dict
|
|
312
|
+
fields: Optional[List[str]] = None
|
|
313
|
+
falsepositives: Optional[List[str]] = None
|
|
314
|
+
level: Optional[Level] = None
|
|
315
|
+
tags: Optional[List[SigmaTag]] = Field(default_factory=[])
|
|
316
|
+
scope: Optional[List[str]] = None
|
|
317
|
+
_indicator_types: list = None
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def detection_id(self):
|
|
321
|
+
return str(self.id)
|
|
322
|
+
|
|
323
|
+
@property
|
|
324
|
+
def indicator_types(self):
|
|
325
|
+
return self._indicator_types
|
|
326
|
+
|
|
327
|
+
@indicator_types.setter
|
|
328
|
+
def indicator_types(self, types):
|
|
329
|
+
self._indicator_types = types
|
|
330
|
+
|
|
331
|
+
@detection_id.setter
|
|
332
|
+
def detection_id(self, new_id):
|
|
333
|
+
if self.id and str(self.id) != str(new_id):
|
|
334
|
+
self.related = self.related or []
|
|
335
|
+
self.related.append(RelatedRule(id=self.id, type="renamed"))
|
|
336
|
+
self.id = new_id
|
|
337
|
+
|
|
338
|
+
@field_validator('tags', mode='after')
|
|
339
|
+
@classmethod
|
|
340
|
+
def validate_tlp(cls, tags: list[str]):
|
|
341
|
+
tlps = []
|
|
342
|
+
for tag in tags:
|
|
343
|
+
if tag.startswith('tlp.'):
|
|
344
|
+
tlps.append(tag)
|
|
345
|
+
if len(tlps) > 1:
|
|
346
|
+
raise ValueError(f'tag must not contain more than one tag in tlp namespace. Got {tlps}')
|
|
347
|
+
return tags
|
|
348
|
+
|
|
349
|
+
@field_validator('modified', mode='after')
|
|
350
|
+
@classmethod
|
|
351
|
+
def validate_modified(cls, modified, info):
|
|
352
|
+
if info.data.get('date') == modified:
|
|
353
|
+
return None
|
|
354
|
+
return modified
|
|
355
|
+
|
|
356
|
+
def set_extra_data_from_bundler(self, bundler: "Bundler"):
|
|
357
|
+
if not bundler:
|
|
358
|
+
return
|
|
359
|
+
|
|
360
|
+
if not self.date:
|
|
361
|
+
from .utils import as_date
|
|
362
|
+
self.date = as_date(bundler.created)
|
|
363
|
+
|
|
364
|
+
self.set_labels(bundler.labels)
|
|
365
|
+
self.tlp_level = bundler.tlp_level.name
|
|
366
|
+
self.author = bundler.report.created_by_ref
|
|
367
|
+
self.license = bundler.license
|
|
368
|
+
self.references = bundler.reference_urls
|
|
369
|
+
|
|
370
|
+
class DetectionContainer(BaseModel):
|
|
371
|
+
success: bool
|
|
372
|
+
detections: list[Union[BaseDetection , AIDetection, SigmaRuleDetection]]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def tlp_from_tags(tags: list[SigmaTag]):
|
|
377
|
+
for tag in tags:
|
|
378
|
+
ns, _, level = tag.partition(".")
|
|
379
|
+
if ns != "tlp":
|
|
380
|
+
continue
|
|
381
|
+
if tlp_level := TLP_LEVEL.get(level.replace("-", "_")):
|
|
382
|
+
return tlp_level
|
|
383
|
+
return None
|
|
384
|
+
|
|
385
|
+
def set_tlp_level_in_tags(tags: list[SigmaTag], level):
|
|
386
|
+
level = str(level)
|
|
387
|
+
for i, tag in enumerate(tags):
|
|
388
|
+
if tag.startswith('tlp.'):
|
|
389
|
+
tags.remove(tag)
|
|
390
|
+
tags.append('tlp.'+level.replace("_", "-"))
|
|
391
|
+
return tags
|