txt2detection 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of txt2detection might be problematic. Click here for more details.
- txt2detection/__main__.py +31 -36
- txt2detection/ai_extractor/base.py +0 -23
- txt2detection/ai_extractor/deepseek.py +5 -4
- txt2detection/ai_extractor/prompts.py +7 -129
- txt2detection/ai_extractor/utils.py +1 -1
- txt2detection/attack_navigator.py +66 -0
- txt2detection/bundler.py +66 -36
- txt2detection/models.py +6 -7
- txt2detection/observables.py +0 -24
- txt2detection/utils.py +5 -0
- {txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/METADATA +3 -5
- txt2detection-1.0.12.dist-info/RECORD +23 -0
- txt2detection/ai_extractor/models.py +0 -34
- txt2detection/attack_flow.py +0 -231
- txt2detection-1.0.10.dist-info/RECORD +0 -24
- {txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/WHEEL +0 -0
- {txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/entry_points.txt +0 -0
- {txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/licenses/LICENSE +0 -0
txt2detection/__main__.py
CHANGED
|
@@ -10,10 +10,11 @@ import logging
|
|
|
10
10
|
import re
|
|
11
11
|
import sys
|
|
12
12
|
import uuid
|
|
13
|
+
from pydantic import ValidationError
|
|
13
14
|
from stix2 import Identity
|
|
14
15
|
import yaml
|
|
15
16
|
|
|
16
|
-
from txt2detection import
|
|
17
|
+
from txt2detection import credential_checker
|
|
17
18
|
from txt2detection.ai_extractor.base import BaseAIExtractor
|
|
18
19
|
from txt2detection.models import (
|
|
19
20
|
TAG_PATTERN,
|
|
@@ -185,17 +186,11 @@ def parse_args():
|
|
|
185
186
|
choices=valid_licenses(),
|
|
186
187
|
)
|
|
187
188
|
mode_parser.add_argument(
|
|
188
|
-
"--
|
|
189
|
+
"--create_attack_navigator_layer",
|
|
189
190
|
help="Create navigator layer",
|
|
190
191
|
action="store_true",
|
|
191
192
|
default=False,
|
|
192
193
|
)
|
|
193
|
-
mode_parser.add_argument(
|
|
194
|
-
"--ai_create_attack_flow",
|
|
195
|
-
help="Create attack flow",
|
|
196
|
-
action="store_true",
|
|
197
|
-
default=False,
|
|
198
|
-
)
|
|
199
194
|
|
|
200
195
|
file.add_argument(
|
|
201
196
|
"--input_file",
|
|
@@ -228,11 +223,6 @@ def parse_args():
|
|
|
228
223
|
if args.mode != "sigma":
|
|
229
224
|
assert args.ai_provider, "--ai_provider is required in file or txt mode"
|
|
230
225
|
|
|
231
|
-
if args.ai_create_attack_navigator_layer or args.ai_create_attack_flow:
|
|
232
|
-
assert (
|
|
233
|
-
args.ai_provider
|
|
234
|
-
), "--ai_provider is required when --ai_create_attack_navigator_layer/--ai_create_attack_flow is passed"
|
|
235
|
-
|
|
236
226
|
if args.mode == "file":
|
|
237
227
|
args.input_text = args.input_file
|
|
238
228
|
|
|
@@ -253,21 +243,16 @@ def run_txt2detection(
|
|
|
253
243
|
labels: list[str],
|
|
254
244
|
report_id: str | uuid.UUID,
|
|
255
245
|
ai_provider: BaseAIExtractor,
|
|
256
|
-
|
|
257
|
-
ai_create_attack_navigator_layer=False,
|
|
246
|
+
create_attack_navigator_layer=False,
|
|
258
247
|
**kwargs,
|
|
259
248
|
) -> Bundler:
|
|
260
|
-
if (
|
|
261
|
-
kwargs.get("sigma_file") != "sigma_file"
|
|
262
|
-
or ai_create_attack_flow
|
|
263
|
-
or ai_create_attack_navigator_layer
|
|
264
|
-
):
|
|
249
|
+
if not kwargs.get("sigma_file"):
|
|
265
250
|
validate_token_count(
|
|
266
251
|
int(os.getenv("INPUT_TOKEN_LIMIT", 0)), input_text, ai_provider
|
|
267
252
|
)
|
|
268
253
|
|
|
269
254
|
if sigma := kwargs.get("sigma_file"):
|
|
270
|
-
detection = get_sigma_detections(sigma)
|
|
255
|
+
detection = get_sigma_detections(sigma, name=name)
|
|
271
256
|
if not identity and detection.author:
|
|
272
257
|
identity = make_identity(detection.author)
|
|
273
258
|
kwargs.update(
|
|
@@ -303,22 +288,19 @@ def run_txt2detection(
|
|
|
303
288
|
)
|
|
304
289
|
detections = ai_provider.get_detections(input_text)
|
|
305
290
|
bundler.bundle_detections(detections)
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
bundler.data.attack_flow, bundler.data.navigator_layer = (
|
|
309
|
-
attack_flow.extract_attack_flow_and_navigator(
|
|
310
|
-
bundler,
|
|
311
|
-
bundler.report.description,
|
|
312
|
-
ai_create_attack_flow,
|
|
313
|
-
ai_create_attack_navigator_layer,
|
|
314
|
-
ai_provider,
|
|
315
|
-
)
|
|
316
|
-
)
|
|
291
|
+
if create_attack_navigator_layer:
|
|
292
|
+
bundler.create_attack_navigator()
|
|
317
293
|
return bundler
|
|
318
294
|
|
|
319
295
|
|
|
320
|
-
def get_sigma_detections(sigma: str) -> SigmaRuleDetection:
|
|
296
|
+
def get_sigma_detections(sigma: str, name=None) -> SigmaRuleDetection:
|
|
321
297
|
obj = yaml.safe_load(io.StringIO(sigma))
|
|
298
|
+
if not isinstance(obj, dict):
|
|
299
|
+
raise ValueError(
|
|
300
|
+
f"bad sigma input file. expected object/dict, got {type(obj)}."
|
|
301
|
+
)
|
|
302
|
+
if name:
|
|
303
|
+
obj["title"] = name
|
|
322
304
|
return SigmaRuleDetection.model_validate(obj)
|
|
323
305
|
|
|
324
306
|
|
|
@@ -328,7 +310,14 @@ def main(args: Args):
|
|
|
328
310
|
logging.info(f"starting argument: {json.dumps(sys.argv[1:])}")
|
|
329
311
|
kwargs = args.__dict__
|
|
330
312
|
kwargs["identity"] = args.use_identity
|
|
331
|
-
|
|
313
|
+
try:
|
|
314
|
+
bundler = run_txt2detection(**kwargs)
|
|
315
|
+
except (ValidationError, ValueError) as e:
|
|
316
|
+
logging.error(f"Validate sigma file failed: {str(e)}")
|
|
317
|
+
if isinstance(e, ValidationError):
|
|
318
|
+
full_error = e.json(indent=4)
|
|
319
|
+
logging.debug(f"Validate sigma file failed: {full_error}", exc_info=True)
|
|
320
|
+
sys.exit(19)
|
|
332
321
|
|
|
333
322
|
output_dir = Path("./output") / str(bundler.bundle.id)
|
|
334
323
|
shutil.rmtree(output_dir, ignore_errors=True)
|
|
@@ -342,6 +331,12 @@ def main(args: Args):
|
|
|
342
331
|
for obj in bundler.bundle["objects"]:
|
|
343
332
|
if obj["type"] != "indicator" or obj["pattern_type"] != "sigma":
|
|
344
333
|
continue
|
|
345
|
-
|
|
346
|
-
|
|
334
|
+
rule_id: str = obj["id"].replace("indicator--", "")
|
|
335
|
+
rule_path = rules_dir / ("rule--" + rule_id + ".yml")
|
|
336
|
+
nav_path = rules_dir / f"attack-enterprise-navigator-layer-rule--{rule_id}.json"
|
|
337
|
+
rule_path.write_text(obj["pattern"])
|
|
338
|
+
if rule_nav := (
|
|
339
|
+
bundler.data.navigator_layer and bundler.data.navigator_layer.get(rule_id)
|
|
340
|
+
):
|
|
341
|
+
nav_path.write_text(json.dumps(rule_nav, indent=4))
|
|
347
342
|
logging.info(f"Writing bundle output to `{output_path}`")
|
|
@@ -7,7 +7,6 @@ from llama_index.core.llms.llm import LLM
|
|
|
7
7
|
|
|
8
8
|
from txt2detection.ai_extractor import prompts
|
|
9
9
|
|
|
10
|
-
from txt2detection.ai_extractor.models import AttackFlowList
|
|
11
10
|
from txt2detection.ai_extractor.utils import ParserWithLogging
|
|
12
11
|
from txt2detection.models import DetectionContainer, DetectionContainer
|
|
13
12
|
from llama_index.core.utils import get_tokenizer
|
|
@@ -62,28 +61,6 @@ class BaseAIExtractor:
|
|
|
62
61
|
def extractor_name(self):
|
|
63
62
|
return f"{self.provider}:{self.llm.model}"
|
|
64
63
|
|
|
65
|
-
def _get_attack_flow_program(self):
|
|
66
|
-
return LLMTextCompletionProgram.from_defaults(
|
|
67
|
-
output_parser=ParserWithLogging(AttackFlowList),
|
|
68
|
-
prompt=prompts.ATTACK_FLOW_PROMPT_TEMPL,
|
|
69
|
-
verbose=True,
|
|
70
|
-
llm=self.llm,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
def extract_attack_flow(self, input_text, techniques) -> AttackFlowList:
|
|
74
|
-
extracted_techniques = []
|
|
75
|
-
for t in techniques.values():
|
|
76
|
-
extracted_techniques.append(
|
|
77
|
-
dict(
|
|
78
|
-
id=t["id"],
|
|
79
|
-
name=t["name"],
|
|
80
|
-
possible_tactics=list(t["possible_tactics"].keys()),
|
|
81
|
-
)
|
|
82
|
-
)
|
|
83
|
-
return self._get_attack_flow_program()(
|
|
84
|
-
document=input_text, extracted_techniques=extracted_techniques
|
|
85
|
-
)
|
|
86
|
-
|
|
87
64
|
def check_credential(self):
|
|
88
65
|
try:
|
|
89
66
|
return "authorized" if self._check_credential() else "unauthorized"
|
|
@@ -4,10 +4,11 @@ import os
|
|
|
4
4
|
from .base import BaseAIExtractor
|
|
5
5
|
from llama_index.llms.deepseek import DeepSeek
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
class DeepseekExtractor(BaseAIExtractor, provider="deepseek"):
|
|
8
9
|
def __init__(self, **kwargs) -> None:
|
|
9
|
-
kwargs.setdefault(
|
|
10
|
-
kwargs.setdefault(
|
|
10
|
+
kwargs.setdefault("temperature", float(os.environ.get("TEMPERATURE", 0.0)))
|
|
11
|
+
kwargs.setdefault("model", "deepseek-chat")
|
|
11
12
|
self.llm = DeepSeek(system_prompt=self.system_prompt, **kwargs)
|
|
12
13
|
super().__init__()
|
|
13
14
|
|
|
@@ -16,4 +17,4 @@ class DeepseekExtractor(BaseAIExtractor, provider='deepseek'):
|
|
|
16
17
|
return len(self.llm._tokenizer.encode(text))
|
|
17
18
|
except Exception as e:
|
|
18
19
|
logging.warning(e)
|
|
19
|
-
return super().count_tokens(text)
|
|
20
|
+
return super().count_tokens(text)
|
|
@@ -3,8 +3,10 @@ import textwrap
|
|
|
3
3
|
from llama_index.core.base.llms.types import ChatMessage, MessageRole
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
SIEMRULES_PROMPT = ChatPromptTemplate(
|
|
7
|
-
|
|
6
|
+
SIEMRULES_PROMPT = ChatPromptTemplate(
|
|
7
|
+
[
|
|
8
|
+
ChatMessage.from_str(
|
|
9
|
+
"""
|
|
8
10
|
**Persona:**
|
|
9
11
|
|
|
10
12
|
You are an expert in cybersecurity threat detection. Given a structured security report, generate a Sigma rule following the Sigma specification.
|
|
@@ -108,136 +110,12 @@ Make sure your response follows this format and adheres to the rules above.
|
|
|
108
110
|
## **Additional Instructions**
|
|
109
111
|
- Ensure the `"tags"` field includes relevant ATT&CK and CVE references based on the report content.
|
|
110
112
|
- Return a **valid JSON output** without YAML formatting for seamless processing.
|
|
111
|
-
"""
|
|
112
|
-
ChatMessage.from_str("Taking the entire input of my next message, analyze and return appropriate response", MessageRole.USER),
|
|
113
|
-
ChatMessage.from_str("{document}", MessageRole.USER),
|
|
114
|
-
])
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
ATTACK_FLOW_PROMPT_TEMPL = ChatPromptTemplate(
|
|
118
|
-
[
|
|
119
|
-
ChatMessage.from_str(
|
|
120
|
-
"""You are a cybersecurity threat intelligence analyst.
|
|
121
|
-
|
|
122
|
-
Your task is to analyze structured cybersecurity incident reports (e.g., malware analysis, APTs, data breaches, vulnerabilities) and extract and organize MITRE ATT&CK techniques as part of an attack flow analysis. This analysis helps defenders understand adversary behavior using the MITRE Attack Flow model maintained by the MITRE Center for Threat-Informed Defense.""",
|
|
123
|
-
MessageRole.SYSTEM,
|
|
124
|
-
),
|
|
125
|
-
ChatMessage.from_str(
|
|
126
|
-
"Hello. Please provide the document for analysis. Only include the full document text in your response.",
|
|
127
|
-
MessageRole.ASSISTANT,
|
|
113
|
+
"""
|
|
128
114
|
),
|
|
129
|
-
ChatMessage.from_str("{document}", MessageRole.USER),
|
|
130
115
|
ChatMessage.from_str(
|
|
131
|
-
"
|
|
132
|
-
MessageRole.ASSISTANT,
|
|
133
|
-
),
|
|
134
|
-
ChatMessage.from_str(
|
|
135
|
-
"<extracted_techniques>\n\n{extracted_techniques}\n\n</extracted_techniques>",
|
|
136
|
-
MessageRole.USER,
|
|
137
|
-
),
|
|
138
|
-
ChatMessage.from_str(
|
|
139
|
-
"Let's begin with tactic selection. What should I do with the techniques and possible tactics?",
|
|
140
|
-
MessageRole.ASSISTANT,
|
|
141
|
-
),
|
|
142
|
-
# PART 1: Tactic Selection Phase
|
|
143
|
-
ChatMessage.from_str(
|
|
144
|
-
"""
|
|
145
|
-
PART 1: TACTIC SELECTION
|
|
146
|
-
|
|
147
|
-
For each of the technique in `<extracted_techniques>`, return [technique_id, tactic_name], where
|
|
148
|
-
- technique id = `technique.id`
|
|
149
|
-
- tactic_name = choice from `technique.possible_tactics`, where choice is selected based on the **most contextually appropriate** tactic name for each technique based on how it's used in the document.
|
|
150
|
-
|
|
151
|
-
📌 Output only the tactic assignments in this format:
|
|
152
|
-
<code>
|
|
153
|
-
{
|
|
154
|
-
"tactic_selection": [
|
|
155
|
-
["Txxxx", "impact"],
|
|
156
|
-
["Tyyyy", "discovery"],
|
|
157
|
-
...
|
|
158
|
-
]
|
|
159
|
-
}
|
|
160
|
-
</code>
|
|
161
|
-
|
|
162
|
-
⚠️ Constraints:
|
|
163
|
-
- Use **only** the `possible_tactics` provided with each technique.
|
|
164
|
-
- Do **not** invent or infer any technique or tactic name beyond what’s given in <extracted_techniques>.
|
|
165
|
-
- Ensure **every** technique in `<extracted_techniques>` appears in `tactic_selection`, even if uncertain — choose the best fit.
|
|
166
|
-
- Technique IDs in `tactic_selection` must match exactly from <extracted_techniques> (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
|
|
167
|
-
- Must include every technique in `<extracted_techniques>`
|
|
168
|
-
""",
|
|
169
|
-
MessageRole.USER,
|
|
170
|
-
),
|
|
171
|
-
ChatMessage.from_str(
|
|
172
|
-
"Thanks. Now let's continue with the attack flow. How should I proceed?",
|
|
173
|
-
MessageRole.ASSISTANT,
|
|
174
|
-
),
|
|
175
|
-
# PART 2: Attack Flow Construction Phase
|
|
176
|
-
ChatMessage.from_str(
|
|
177
|
-
"""
|
|
178
|
-
PART 2: ATTACK FLOW CONSTRUCTION
|
|
179
|
-
|
|
180
|
-
Using the `<extracted_techniques>` and the incident details in the document, construct a sequence of MITRE ATT&CK techniques that represent the adversary’s logical progression through the attack.
|
|
181
|
-
|
|
182
|
-
For each technique:
|
|
183
|
-
- Use the `technique.id` exactly as provided
|
|
184
|
-
- Assign:
|
|
185
|
-
- `name`: a short, context-based phrase describing how the technique is used
|
|
186
|
-
- `description`: a longer explanation of how the technique operates in this specific incident, based only on the document
|
|
187
|
-
- `position`: the step in the logical or chronological attack sequence (starting at 0)
|
|
188
|
-
|
|
189
|
-
⚠️ Constraints:
|
|
190
|
-
- Use **only** technique IDs provided in `<extracted_techniques>` — do **not** invent or infer new ones
|
|
191
|
-
- Ensure all included technique IDs exactly match `technique.id` from `<extracted_techniques>` (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
|
|
192
|
-
|
|
193
|
-
📤 Output Format:
|
|
194
|
-
<code>
|
|
195
|
-
{
|
|
196
|
-
"items": [
|
|
197
|
-
{
|
|
198
|
-
"position": 0,
|
|
199
|
-
"attack_technique_id": "Txxxx",
|
|
200
|
-
"name": "Short contextual name",
|
|
201
|
-
"description": "Detailed contextual explanation"
|
|
202
|
-
},
|
|
203
|
-
...
|
|
204
|
-
],
|
|
205
|
-
"success": true
|
|
206
|
-
}
|
|
207
|
-
</code>
|
|
208
|
-
|
|
209
|
-
Your goal is to tell the story of how the adversary moved through the attack using the extracted ATT&CK techniques, in the correct sequence, with clear context for defenders.
|
|
210
|
-
""",
|
|
211
|
-
MessageRole.USER,
|
|
212
|
-
),
|
|
213
|
-
# PART 3: Combination phase
|
|
214
|
-
ChatMessage.from_str(
|
|
215
|
-
"""
|
|
216
|
-
📤 Final Output Format:
|
|
217
|
-
<code>
|
|
218
|
-
{
|
|
219
|
-
"tactic_selection": [...], // Use your previous output
|
|
220
|
-
"items": [
|
|
221
|
-
{
|
|
222
|
-
"position": 0,
|
|
223
|
-
"attack_technique_id": "Txxxx",
|
|
224
|
-
"name": "Short contextual name",
|
|
225
|
-
"description": "Detailed contextual explanation"
|
|
226
|
-
},
|
|
227
|
-
...
|
|
228
|
-
],
|
|
229
|
-
"success": true
|
|
230
|
-
}
|
|
231
|
-
</code>
|
|
232
|
-
|
|
233
|
-
⚠️ Constraints:
|
|
234
|
-
- All `attack_technique_id` values in `items` must come from `<extracted_techniques>`
|
|
235
|
-
- The `position` field should reflect the **chronological or logical** execution order of the attack
|
|
236
|
-
- Do **not** introduce new technique IDs
|
|
237
|
-
|
|
238
|
-
✅ Your goal is to build a realistic, document-based attack flow using MITRE ATT&CK technique–tactic pairs.
|
|
239
|
-
""",
|
|
116
|
+
"Taking the entire input of my next message, analyze and return appropriate response",
|
|
240
117
|
MessageRole.USER,
|
|
241
118
|
),
|
|
119
|
+
ChatMessage.from_str("{document}", MessageRole.USER),
|
|
242
120
|
]
|
|
243
121
|
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
if typing.TYPE_CHECKING:
|
|
4
|
+
from .bundler import Bundler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def map_technique_tactic(obj, report_tactics, rule_tactics):
|
|
8
|
+
"""
|
|
9
|
+
Return first matching tactics in the same rule
|
|
10
|
+
If no tactic match, try to return from all the tactics in report
|
|
11
|
+
If none exist, return nothing
|
|
12
|
+
"""
|
|
13
|
+
technique_name = obj["external_references"][0]["external_id"]
|
|
14
|
+
tactic_name = None
|
|
15
|
+
tactic_names = set()
|
|
16
|
+
for phase in obj["kill_chain_phases"]:
|
|
17
|
+
if not set(phase["kill_chain_name"].split("-")).issuperset(["mitre", "attack"]):
|
|
18
|
+
continue
|
|
19
|
+
tactic_names.add(phase["phase_name"])
|
|
20
|
+
tactic_obj = None
|
|
21
|
+
if s := tactic_names.intersection(rule_tactics):
|
|
22
|
+
tactic_obj = rule_tactics[s.pop()]
|
|
23
|
+
elif tactic_names.intersection(report_tactics):
|
|
24
|
+
tactic_obj = report_tactics[s.pop()]
|
|
25
|
+
if tactic_obj:
|
|
26
|
+
tactic_name = tactic_obj["external_references"][0]["external_id"]
|
|
27
|
+
return technique_name, tactic_name
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def create_navigator_layer(report, indicator, technique_mapping, mitre_version):
|
|
31
|
+
techniques = []
|
|
32
|
+
for technique_id, tactic in technique_mapping.items():
|
|
33
|
+
technique_item = dict(
|
|
34
|
+
techniqueID=technique_id,
|
|
35
|
+
score=100,
|
|
36
|
+
showSubtechniques=True,
|
|
37
|
+
)
|
|
38
|
+
if tactic:
|
|
39
|
+
technique_item["tactic"] = tactic
|
|
40
|
+
techniques.append(technique_item)
|
|
41
|
+
return {
|
|
42
|
+
"name": indicator["name"],
|
|
43
|
+
"domain": "enterprise-attack",
|
|
44
|
+
"versions": {
|
|
45
|
+
"layer": "4.5",
|
|
46
|
+
"attack": mitre_version,
|
|
47
|
+
"navigator": "5.1.0",
|
|
48
|
+
},
|
|
49
|
+
"techniques": techniques,
|
|
50
|
+
"gradient": {
|
|
51
|
+
"colors": ["#ffffff", "#ff6666"],
|
|
52
|
+
"minValue": 0,
|
|
53
|
+
"maxValue": 100,
|
|
54
|
+
},
|
|
55
|
+
"legendItems": [],
|
|
56
|
+
"metadata": [
|
|
57
|
+
{"name": "report_id", "value": report.id, "rule_id": indicator["id"]}
|
|
58
|
+
],
|
|
59
|
+
"links": [
|
|
60
|
+
{
|
|
61
|
+
"label": "Generated using txt2detection",
|
|
62
|
+
"url": "https://github.com/muchdogesec/txt2detection/",
|
|
63
|
+
}
|
|
64
|
+
],
|
|
65
|
+
"layout": {"layout": "side"},
|
|
66
|
+
}
|
txt2detection/bundler.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
import enum
|
|
3
|
+
import itertools
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
@@ -15,7 +16,7 @@ from stix2 import (
|
|
|
15
16
|
from stix2.serialization import serialize
|
|
16
17
|
import hashlib
|
|
17
18
|
|
|
18
|
-
from txt2detection import
|
|
19
|
+
from txt2detection import attack_navigator, observables
|
|
19
20
|
from txt2detection.models import (
|
|
20
21
|
AIDetection,
|
|
21
22
|
BaseDetection,
|
|
@@ -30,7 +31,11 @@ import uuid
|
|
|
30
31
|
from stix2 import parse as parse_stix
|
|
31
32
|
|
|
32
33
|
from txt2detection.models import TLP_LEVEL
|
|
33
|
-
from txt2detection.utils import
|
|
34
|
+
from txt2detection.utils import (
|
|
35
|
+
STATUSES,
|
|
36
|
+
load_stix_object_from_url,
|
|
37
|
+
remove_rule_specific_tags,
|
|
38
|
+
)
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
logger = logging.getLogger("txt2detection.bundler")
|
|
@@ -42,7 +47,6 @@ class Bundler:
|
|
|
42
47
|
uuid = None
|
|
43
48
|
id_map = dict()
|
|
44
49
|
data: DataContainer
|
|
45
|
-
ATTACK_FLOW_SMO_URL = "https://github.com/muchdogesec/stix2extensions/raw/refs/heads/main/remote-definitions/attack-flow.json"
|
|
46
50
|
# https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
|
|
47
51
|
default_identity = Identity(
|
|
48
52
|
**{
|
|
@@ -82,6 +86,10 @@ class Bundler:
|
|
|
82
86
|
}
|
|
83
87
|
)
|
|
84
88
|
|
|
89
|
+
extension_definition = load_stix_object_from_url(
|
|
90
|
+
"https://raw.githubusercontent.com/muchdogesec/stix2extensions/refs/heads/main/extension-definitions/properties/indicator-sigma_rule.json"
|
|
91
|
+
)
|
|
92
|
+
|
|
85
93
|
@classmethod
|
|
86
94
|
def generate_report_id(cls, created_by_ref, created, name):
|
|
87
95
|
if not created_by_ref:
|
|
@@ -114,6 +122,7 @@ class Bundler:
|
|
|
114
122
|
self.labels = labels or []
|
|
115
123
|
self.license = license
|
|
116
124
|
|
|
125
|
+
self.all_objects = set()
|
|
117
126
|
self.job_id = f"report--{self.uuid}"
|
|
118
127
|
self.external_refs = (external_refs or []) + [
|
|
119
128
|
dict(
|
|
@@ -124,6 +133,8 @@ class Bundler:
|
|
|
124
133
|
for url in self.reference_urls
|
|
125
134
|
]
|
|
126
135
|
self.data = DataContainer.model_construct()
|
|
136
|
+
self.tactics = {}
|
|
137
|
+
self.techniques = {}
|
|
127
138
|
|
|
128
139
|
self.report = Report(
|
|
129
140
|
created_by_ref=self.identity.id,
|
|
@@ -148,7 +159,6 @@ class Bundler:
|
|
|
148
159
|
)
|
|
149
160
|
self.report.object_refs.clear() # clear object refs
|
|
150
161
|
self.set_defaults()
|
|
151
|
-
self.all_objects = set()
|
|
152
162
|
if not description:
|
|
153
163
|
self.report.external_references.pop(0)
|
|
154
164
|
|
|
@@ -159,6 +169,7 @@ class Bundler:
|
|
|
159
169
|
self.bundle.objects.extend([self.default_marking, self.identity, self.report])
|
|
160
170
|
# add default STIX 2.1 marking definition for txt2detection
|
|
161
171
|
self.report.object_marking_refs.append(self.default_marking.id)
|
|
172
|
+
self.add_ref(self.extension_definition)
|
|
162
173
|
|
|
163
174
|
def add_ref(self, sdo, append_report=False):
|
|
164
175
|
sdo_id = sdo["id"]
|
|
@@ -191,7 +202,19 @@ class Bundler:
|
|
|
191
202
|
"pattern": detection.make_rule(self),
|
|
192
203
|
"valid_from": self.report.created,
|
|
193
204
|
"object_marking_refs": self.report.object_marking_refs,
|
|
194
|
-
"external_references": self.external_refs
|
|
205
|
+
"external_references": self.external_refs,
|
|
206
|
+
"extensions": {
|
|
207
|
+
self.extension_definition["id"]: {
|
|
208
|
+
"extension_type": "toplevel-property-extension"
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
"x_sigma_type": "base",
|
|
212
|
+
"x_sigma_level": detection.level,
|
|
213
|
+
"x_sigma_status": detection.status,
|
|
214
|
+
"x_sigma_license": detection.license,
|
|
215
|
+
"x_sigma_fields": detection.fields,
|
|
216
|
+
"x_sigma_falsepositives": detection.falsepositives,
|
|
217
|
+
"x_sigma_scope": detection.scope,
|
|
195
218
|
}
|
|
196
219
|
indicator["external_references"].append(
|
|
197
220
|
{
|
|
@@ -205,13 +228,19 @@ class Bundler:
|
|
|
205
228
|
logger.debug("```yaml\n" + indicator["pattern"] + "\n```")
|
|
206
229
|
logger.debug(f" =================== end of rule =================== ")
|
|
207
230
|
|
|
208
|
-
self.data.attacks
|
|
231
|
+
self.data.attacks.update(dict.fromkeys(detection.mitre_attack_ids, "Not found"))
|
|
232
|
+
tactics = self.tactics[detection.id] = {}
|
|
233
|
+
techniques = self.techniques[detection.id] = []
|
|
209
234
|
for obj in self.get_attack_objects(detection.mitre_attack_ids):
|
|
210
235
|
self.add_ref(obj)
|
|
211
236
|
self.add_relation(indicator, obj)
|
|
212
237
|
self.data.attacks[obj["external_references"][0]["external_id"]] = obj["id"]
|
|
238
|
+
if obj["type"] == "x-mitre-tactic":
|
|
239
|
+
tactics[obj["x_mitre_shortname"]] = obj
|
|
240
|
+
else:
|
|
241
|
+
techniques.append(obj)
|
|
213
242
|
|
|
214
|
-
self.data.cves
|
|
243
|
+
self.data.cves.update(dict.fromkeys(detection.cve_ids, "Not found"))
|
|
215
244
|
for obj in self.get_cve_objects(detection.cve_ids):
|
|
216
245
|
self.add_ref(obj)
|
|
217
246
|
self.add_relation(indicator, obj)
|
|
@@ -302,24 +331,13 @@ class Bundler:
|
|
|
302
331
|
return self._get_objects(endpoint, headers)
|
|
303
332
|
|
|
304
333
|
@classmethod
|
|
305
|
-
def
|
|
334
|
+
def get_attack_version(cls):
|
|
306
335
|
headers = {}
|
|
307
336
|
api_root = os.environ["CTIBUTLER_BASE_URL"] + "/"
|
|
308
337
|
if api_key := os.environ.get("CTIBUTLER_API_KEY"):
|
|
309
338
|
headers["API-KEY"] = api_key
|
|
310
|
-
|
|
311
|
-
endpoint = urljoin(
|
|
312
|
-
api_root, f"v1/attack-enterprise/objects/?attack_type=Tactic"
|
|
313
|
-
)
|
|
314
339
|
version_url = urljoin(api_root, f"v1/attack-enterprise/versions/installed/")
|
|
315
|
-
|
|
316
|
-
retval = dict(
|
|
317
|
-
version=requests.get(version_url, headers=headers).json()["latest"]
|
|
318
|
-
)
|
|
319
|
-
for tac in tactics:
|
|
320
|
-
retval[tac["x_mitre_shortname"]] = tac
|
|
321
|
-
retval[tac["external_references"][0]["external_id"]] = tac
|
|
322
|
-
return retval
|
|
340
|
+
return requests.get(version_url, headers=headers).json()["latest"]
|
|
323
341
|
|
|
324
342
|
@classmethod
|
|
325
343
|
def get_cve_objects(cls, cve_ids):
|
|
@@ -356,28 +374,40 @@ class Bundler:
|
|
|
356
374
|
return data
|
|
357
375
|
|
|
358
376
|
def bundle_detections(self, container: DetectionContainer):
|
|
359
|
-
self.data =
|
|
377
|
+
self.data.detections = container
|
|
360
378
|
if not container.success:
|
|
361
379
|
return
|
|
362
380
|
for d in container.detections:
|
|
363
381
|
self.add_rule_indicator(d)
|
|
364
382
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
objects.extend(smo_objects)
|
|
374
|
-
for obj in objects:
|
|
375
|
-
if obj["id"] == self.report.id:
|
|
383
|
+
def create_attack_navigator(self):
|
|
384
|
+
self.mitre_version = self.get_attack_version()
|
|
385
|
+
all_tactics = dict(
|
|
386
|
+
itertools.chain(*map(lambda x: x.items(), self.tactics.values()))
|
|
387
|
+
)
|
|
388
|
+
self.data.navigator_layer = {}
|
|
389
|
+
for detection_id, techniques in self.techniques.items():
|
|
390
|
+
if not techniques:
|
|
376
391
|
continue
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
392
|
+
tactics = self.tactics[detection_id]
|
|
393
|
+
mapping = dict(
|
|
394
|
+
[
|
|
395
|
+
attack_navigator.map_technique_tactic(
|
|
396
|
+
technique, all_tactics, tactics
|
|
397
|
+
)
|
|
398
|
+
for technique in techniques
|
|
399
|
+
]
|
|
400
|
+
)
|
|
401
|
+
indicator = [
|
|
402
|
+
f
|
|
403
|
+
for f in self.bundle.objects
|
|
404
|
+
if str(f["id"]).endswith(detection_id) and f["type"] == "indicator"
|
|
405
|
+
][0]
|
|
406
|
+
self.data.navigator_layer[detection_id] = (
|
|
407
|
+
attack_navigator.create_navigator_layer(
|
|
408
|
+
self.report, indicator, mapping, self.mitre_version
|
|
409
|
+
)
|
|
410
|
+
)
|
|
381
411
|
|
|
382
412
|
|
|
383
413
|
def make_logsouce_string(source: dict):
|
txt2detection/models.py
CHANGED
|
@@ -19,7 +19,6 @@ from stix2 import (
|
|
|
19
19
|
MarkingDefinition,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
-
from txt2detection.ai_extractor.models import AttackFlowList
|
|
23
22
|
|
|
24
23
|
if typing.TYPE_CHECKING:
|
|
25
24
|
from txt2detection.bundler import Bundler
|
|
@@ -274,7 +273,8 @@ class BaseDetection(BaseModel):
|
|
|
274
273
|
@property
|
|
275
274
|
def mitre_attack_ids(self):
|
|
276
275
|
retval = []
|
|
277
|
-
for label in self.tags:
|
|
276
|
+
for i, label in enumerate(self.tags):
|
|
277
|
+
label = label.replace("_", "-").lower()
|
|
278
278
|
namespace, _, label_id = label.partition(".")
|
|
279
279
|
if namespace == "attack":
|
|
280
280
|
retval.append(MITRE_TACTIC_MAP.get(label_id, label_id.upper()))
|
|
@@ -336,7 +336,7 @@ class SigmaRuleDetection(BaseDetection):
|
|
|
336
336
|
fields: Optional[List[str]] = None
|
|
337
337
|
falsepositives: Optional[List[str]] = None
|
|
338
338
|
level: Optional[Level] = None
|
|
339
|
-
tags: Optional[List[SigmaTag]] = Field(default_factory=
|
|
339
|
+
tags: Optional[List[SigmaTag]] = Field(default_factory=list)
|
|
340
340
|
scope: Optional[List[str]] = None
|
|
341
341
|
_indicator_types: list = None
|
|
342
342
|
|
|
@@ -402,11 +402,10 @@ class DetectionContainer(BaseModel):
|
|
|
402
402
|
|
|
403
403
|
class DataContainer(BaseModel):
|
|
404
404
|
detections: DetectionContainer
|
|
405
|
-
|
|
406
|
-
navigator_layer: list = Field(default=None)
|
|
405
|
+
navigator_layer: dict = Field(default=None)
|
|
407
406
|
observables: list[dict] = Field(default=None)
|
|
408
|
-
cves: dict[str, str] = Field(
|
|
409
|
-
attacks: dict[str, str] = Field(
|
|
407
|
+
cves: dict[str, str] = Field(default_factory=dict)
|
|
408
|
+
attacks: dict[str, str] = Field(default_factory=dict)
|
|
410
409
|
|
|
411
410
|
|
|
412
411
|
def tlp_from_tags(tags: list[SigmaTag]):
|
txt2detection/observables.py
CHANGED
|
@@ -159,27 +159,3 @@ def to_stix_object(observable_type: str, value):
|
|
|
159
159
|
)
|
|
160
160
|
)
|
|
161
161
|
return None
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
# example_detection = {
|
|
165
|
-
# "selection": {
|
|
166
|
-
# "source_ip": "192.168.1.10",
|
|
167
|
-
# "destination_email": "attacker@example.com",
|
|
168
|
-
# "url_path": "http://malicious.example.com/payload.exe",
|
|
169
|
-
# "file_hash_md5": "44d88612fea8a8f36de82e1278abb02f",
|
|
170
|
-
# "mac_address": "00:1A:2B:3C:4D:5E",
|
|
171
|
-
# "username": "CORP\\jdoe",
|
|
172
|
-
# "registry_key": "HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run",
|
|
173
|
-
# "registry_key2": "HK_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run",
|
|
174
|
-
# "certificate": "-----BEGIN CERTIFICATE-----FAKECERT-----END CERTIFICATE-----",
|
|
175
|
-
# "ip": " 192.167.1.1",
|
|
176
|
-
# },
|
|
177
|
-
# "condition": "selection",
|
|
178
|
-
# }
|
|
179
|
-
|
|
180
|
-
# # Usage
|
|
181
|
-
# observables = find_stix_observables(example_detection)
|
|
182
|
-
# print(observables)
|
|
183
|
-
|
|
184
|
-
# for a, b in observables:
|
|
185
|
-
# print(to_stix_object(a, b))
|
txt2detection/utils.py
CHANGED
|
@@ -85,6 +85,11 @@ def remove_rule_specific_tags(tags):
|
|
|
85
85
|
labels.append(tag)
|
|
86
86
|
return labels
|
|
87
87
|
|
|
88
|
+
@lru_cache()
|
|
89
|
+
def load_stix_object_from_url(url):
|
|
90
|
+
resp = requests.get(url)
|
|
91
|
+
return resp.json()
|
|
92
|
+
|
|
88
93
|
|
|
89
94
|
def as_date(d: "date|datetime"):
|
|
90
95
|
if isinstance(d, datetime):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: txt2detection
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.12
|
|
4
4
|
Summary: A command line tool that takes a txt file containing threat intelligence and turns it into a detection rule.
|
|
5
5
|
Project-URL: Homepage, https://github.com/muchdogesec/txt2detection
|
|
6
6
|
Project-URL: Issues, https://github.com/muchdogesec/txt2detection/issues
|
|
@@ -162,8 +162,7 @@ Use this mode to generate a set of rules from an input text file;
|
|
|
162
162
|
* Provider (env var required `ANTHROPIC_API_KEY`): `anthropic:`, models e.g.: `claude-3-5-sonnet-latest`, `claude-3-5-haiku-latest`, `claude-3-opus-latest` ([More here](https://docs.anthropic.com/en/docs/about-claude/models))
|
|
163
163
|
* Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
164
164
|
* Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
|
|
165
|
-
* `--
|
|
166
|
-
* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
|
|
165
|
+
* `--create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only.
|
|
167
166
|
|
|
168
167
|
Note, in this mode, the following values will be automatically assigned to the rule
|
|
169
168
|
|
|
@@ -190,8 +189,7 @@ Note, in this mode you should be aware of a few things;
|
|
|
190
189
|
* `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
|
|
191
190
|
* `status` (optional): either `stable`, `test`, `experimental`, `deprecated`, `unsupported`. If passed, will overwrite any existing `status` recorded in the rule
|
|
192
191
|
* `level` (optional): either `informational`, `low`, `medium`, `high`, `critical`. If passed, will overwrite any existing `level` recorded in the rule
|
|
193
|
-
* `--
|
|
194
|
-
* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
|
|
192
|
+
* `--create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags.
|
|
195
193
|
|
|
196
194
|
### A note on observable extraction
|
|
197
195
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
|
|
2
|
+
txt2detection/__main__.py,sha256=felrmY7q1tLK4wc17Lx0Ip3WJ23GicoIj0k7pelCqLM,11610
|
|
3
|
+
txt2detection/attack_navigator.py,sha256=CEph4Q3N49ASC0b0eXzTgbBU_JBUHUrBWag7dn_TGbg,2135
|
|
4
|
+
txt2detection/bundler.py,sha256=QnuVdyL0J2CC2rIgTLCt3HclDAxXtK7kdLtv8AGMsJ0,15151
|
|
5
|
+
txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
|
|
6
|
+
txt2detection/models.py,sha256=9nEmbyRNIM5ZhM6asJymKlcUA2Bj2mbFEtWX5OPTU2s,12857
|
|
7
|
+
txt2detection/observables.py,sha256=FuOfq7TsQykeHbE5waakx-rh1JacAq3G8mUqi-3Aw_o,5896
|
|
8
|
+
txt2detection/utils.py,sha256=ZLpFbu9AXsEBaqY1Kjy0mEClaq8hLN8axuOc6bPD-3U,2914
|
|
9
|
+
txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
|
|
10
|
+
txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
|
|
11
|
+
txt2detection/ai_extractor/base.py,sha256=g69o_CsabqL2Y3KxwXLOZU6f98NUjBJwhlPhBgjXSks,2358
|
|
12
|
+
txt2detection/ai_extractor/deepseek.py,sha256=uRbPWmbnu4dzXaBxWPOsKt68v5NJeugGyvOoedjCve0,662
|
|
13
|
+
txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
|
|
14
|
+
txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
|
|
15
|
+
txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
|
|
16
|
+
txt2detection/ai_extractor/prompts.py,sha256=u8PyFcyqrr-MTo2uwa4cDOhh7FbvSnmc0sceaKzThsw,5996
|
|
17
|
+
txt2detection/ai_extractor/utils.py,sha256=CHsyVylMIldFATXPcmRNciruO-4nqh68n076lABRaFk,536
|
|
18
|
+
txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
|
|
19
|
+
txt2detection-1.0.12.dist-info/METADATA,sha256=V23xCbPkNx7btdIwmi9VZEeDsfZYUJIIOnx6EmV-YLA,14797
|
|
20
|
+
txt2detection-1.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
21
|
+
txt2detection-1.0.12.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
|
|
22
|
+
txt2detection-1.0.12.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
|
|
23
|
+
txt2detection-1.0.12.dist-info/RECORD,,
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import io
|
|
2
|
-
import json
|
|
3
|
-
import logging
|
|
4
|
-
|
|
5
|
-
import dotenv
|
|
6
|
-
import textwrap
|
|
7
|
-
|
|
8
|
-
from pydantic import BaseModel, Field, RootModel
|
|
9
|
-
from llama_index.core.output_parsers import PydanticOutputParser
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class AttackFlowItem(BaseModel):
|
|
13
|
-
position: int = Field(description="order of object starting at 0")
|
|
14
|
-
attack_technique_id: str
|
|
15
|
-
name: str
|
|
16
|
-
description: str
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class AttackFlowList(BaseModel):
|
|
20
|
-
tactic_selection: list[tuple[str, str]] = Field(
|
|
21
|
-
description="attack technique id to attack tactic id mapping using possible_tactics"
|
|
22
|
-
)
|
|
23
|
-
# additional_tactic_mapping: list[tuple[str, str]] = Field(description="the rest of tactic_mapping")
|
|
24
|
-
items: list[AttackFlowItem]
|
|
25
|
-
success: bool = Field(
|
|
26
|
-
description="determines if there's any valid flow in <extractions>"
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
def model_post_init(self, context):
|
|
30
|
-
return super().model_post_init(context)
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def tactic_mapping(self):
|
|
34
|
-
return dict(self.tactic_selection)
|
txt2detection/attack_flow.py
DELETED
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import uuid
|
|
4
|
-
from stix2 import Relationship
|
|
5
|
-
|
|
6
|
-
from txt2detection.ai_extractor.models import AttackFlowList
|
|
7
|
-
from .bundler import Bundler
|
|
8
|
-
|
|
9
|
-
from .ai_extractor.base import BaseAIExtractor
|
|
10
|
-
from .models import UUID_NAMESPACE
|
|
11
|
-
from stix2extensions.attack_action import AttackAction, AttackFlow
|
|
12
|
-
|
|
13
|
-
def parse_flow(report, flow: AttackFlowList, techniques, tactics):
|
|
14
|
-
logging.info(f"flow.success = {flow.success}")
|
|
15
|
-
if not flow.success:
|
|
16
|
-
return []
|
|
17
|
-
objects = [report]
|
|
18
|
-
for domain in ["enterprise-attack", "mobile-attack", "ics-attack"]:
|
|
19
|
-
flow_objects = parse_domain_flow(report, flow, techniques, tactics, domain)
|
|
20
|
-
objects.extend(flow_objects)
|
|
21
|
-
return objects
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def parse_domain_flow(report, flow: AttackFlowList, techniques, tactics, domain):
|
|
25
|
-
flow_objects = []
|
|
26
|
-
flow_obj = None
|
|
27
|
-
last_action = None
|
|
28
|
-
for i, item in enumerate(flow.items):
|
|
29
|
-
try:
|
|
30
|
-
technique = techniques[item.attack_technique_id]
|
|
31
|
-
if technique["domain"] != domain:
|
|
32
|
-
continue
|
|
33
|
-
tactic_id = technique["possible_tactics"][
|
|
34
|
-
flow.tactic_mapping[item.attack_technique_id]
|
|
35
|
-
]
|
|
36
|
-
technique_obj = technique["stix_obj"]
|
|
37
|
-
|
|
38
|
-
tactic_obj = tactics[tactic_id]
|
|
39
|
-
action_obj = AttackAction(
|
|
40
|
-
**{
|
|
41
|
-
"id": flow_id(report["id"], item.attack_technique_id, tactic_id),
|
|
42
|
-
"effect_refs": [f"attack-action--{str(uuid.uuid4())}"],
|
|
43
|
-
"technique_id": item.attack_technique_id,
|
|
44
|
-
"technique_ref": technique_obj["id"],
|
|
45
|
-
"tactic_id": tactic_id,
|
|
46
|
-
"tactic_ref": tactic_obj["id"],
|
|
47
|
-
"name": item.name,
|
|
48
|
-
"description": item.description,
|
|
49
|
-
},
|
|
50
|
-
allow_custom=True,
|
|
51
|
-
)
|
|
52
|
-
action_obj.effect_refs.clear()
|
|
53
|
-
if not flow_obj:
|
|
54
|
-
flow_obj = {
|
|
55
|
-
"type": "attack-flow",
|
|
56
|
-
"id": "attack-flow--"
|
|
57
|
-
+ str(
|
|
58
|
-
uuid.uuid5(UUID_NAMESPACE, f"attack-flow+{domain}+{report.id}")
|
|
59
|
-
),
|
|
60
|
-
"spec_version": "2.1",
|
|
61
|
-
"created": report.created,
|
|
62
|
-
"modified": report.modified,
|
|
63
|
-
"created_by_ref": report.created_by_ref,
|
|
64
|
-
"start_refs": [action_obj["id"]],
|
|
65
|
-
"name": f"[{domain.split('-')[0].upper()}] {report.name}",
|
|
66
|
-
"description": report.description,
|
|
67
|
-
"scope": "malware",
|
|
68
|
-
"external_references": report.external_references,
|
|
69
|
-
"object_marking_refs": report.object_marking_refs,
|
|
70
|
-
}
|
|
71
|
-
flow_objects.append(AttackFlow(**flow_obj))
|
|
72
|
-
flow_objects.append(
|
|
73
|
-
Relationship(
|
|
74
|
-
type="relationship",
|
|
75
|
-
spec_version="2.1",
|
|
76
|
-
id="relationship--"
|
|
77
|
-
+ str(
|
|
78
|
-
uuid.uuid5(
|
|
79
|
-
UUID_NAMESPACE,
|
|
80
|
-
f"attack-flow+{report.id}+{flow_obj['id']}",
|
|
81
|
-
)
|
|
82
|
-
),
|
|
83
|
-
created_by_ref=report.created_by_ref,
|
|
84
|
-
created=report.created,
|
|
85
|
-
modified=report.modified,
|
|
86
|
-
relationship_type="attack-flow",
|
|
87
|
-
description=f"Attack Flow for {report.name}",
|
|
88
|
-
source_ref=report.id,
|
|
89
|
-
target_ref=flow_obj["id"],
|
|
90
|
-
external_references=report.external_references,
|
|
91
|
-
object_marking_refs=report.object_marking_refs,
|
|
92
|
-
)
|
|
93
|
-
)
|
|
94
|
-
else:
|
|
95
|
-
last_action["effect_refs"].append(action_obj["id"])
|
|
96
|
-
flow_objects.append(tactic_obj)
|
|
97
|
-
flow_objects.append(technique_obj)
|
|
98
|
-
flow_objects.append(action_obj)
|
|
99
|
-
last_action = action_obj
|
|
100
|
-
except Exception as e:
|
|
101
|
-
if flow_objects == 2:
|
|
102
|
-
logging.exception("FATAL: create attack flow object failed")
|
|
103
|
-
return []
|
|
104
|
-
logging.debug("create attack-action failed", exc_info=True)
|
|
105
|
-
raise
|
|
106
|
-
|
|
107
|
-
return flow_objects
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def flow_id(report_id, technique_id, tactic_id):
|
|
111
|
-
return "attack-action--" + str(
|
|
112
|
-
uuid.uuid5(
|
|
113
|
-
uuid.UUID(report_id.split("--")[-1]),
|
|
114
|
-
f"{report_id}+{technique_id}+{tactic_id}",
|
|
115
|
-
)
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def get_techniques_from_extracted_objects(objects: dict, tactics: dict):
|
|
120
|
-
techniques = {}
|
|
121
|
-
for obj in objects:
|
|
122
|
-
if (
|
|
123
|
-
obj["type"] == "attack-pattern"
|
|
124
|
-
and obj.get("external_references", [{"source_name": None}])[0][
|
|
125
|
-
"source_name"
|
|
126
|
-
]
|
|
127
|
-
== "mitre-attack"
|
|
128
|
-
):
|
|
129
|
-
domain = obj["x_mitre_domains"][0]
|
|
130
|
-
technique = dict(
|
|
131
|
-
domain=domain,
|
|
132
|
-
name=obj["name"],
|
|
133
|
-
possible_tactics={},
|
|
134
|
-
id=obj["external_references"][0]["external_id"],
|
|
135
|
-
platforms=[
|
|
136
|
-
platform
|
|
137
|
-
for platform in obj["x_mitre_platforms"]
|
|
138
|
-
if platform != "None"
|
|
139
|
-
],
|
|
140
|
-
stix_obj=obj,
|
|
141
|
-
)
|
|
142
|
-
for phase in obj["kill_chain_phases"]:
|
|
143
|
-
if not set(phase["kill_chain_name"].split("-")).issuperset(
|
|
144
|
-
["mitre", "attack"]
|
|
145
|
-
):
|
|
146
|
-
continue
|
|
147
|
-
tactic_name = phase["phase_name"]
|
|
148
|
-
tactic_obj = tactics[tactic_name]
|
|
149
|
-
tactic_id = tactic_obj["external_references"][0]["external_id"]
|
|
150
|
-
technique["possible_tactics"][tactic_name] = tactic_id
|
|
151
|
-
techniques[technique["id"]] = technique
|
|
152
|
-
return techniques
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def create_navigator_layer(report, flow: AttackFlowList, techniques, tactics):
|
|
156
|
-
domains = {}
|
|
157
|
-
comments = {item.attack_technique_id: item.description for item in flow.items}
|
|
158
|
-
for technique in techniques.values():
|
|
159
|
-
domain_techniques = domains.setdefault(technique["domain"], [])
|
|
160
|
-
technique_id = technique["id"]
|
|
161
|
-
if technique_id not in flow.tactic_mapping:
|
|
162
|
-
continue
|
|
163
|
-
technique_item = dict(
|
|
164
|
-
techniqueID=technique_id,
|
|
165
|
-
tactic=flow.tactic_mapping[technique_id],
|
|
166
|
-
score=100,
|
|
167
|
-
showSubtechniques=True,
|
|
168
|
-
)
|
|
169
|
-
if comment := comments.get(technique_id):
|
|
170
|
-
technique_item["comment"] = comment
|
|
171
|
-
domain_techniques.append(technique_item)
|
|
172
|
-
|
|
173
|
-
retval = []
|
|
174
|
-
|
|
175
|
-
for domain, domain_techniques in domains.items():
|
|
176
|
-
retval.append(
|
|
177
|
-
{
|
|
178
|
-
"versions": {
|
|
179
|
-
"layer": "4.5",
|
|
180
|
-
"attack": tactics["version"],
|
|
181
|
-
"navigator": "5.1.0",
|
|
182
|
-
},
|
|
183
|
-
"name": report.name,
|
|
184
|
-
"domain": domain,
|
|
185
|
-
"techniques": domain_techniques,
|
|
186
|
-
"gradient": {
|
|
187
|
-
"colors": ["#ffffff", "#ff6666"],
|
|
188
|
-
"minValue": 0,
|
|
189
|
-
"maxValue": 100,
|
|
190
|
-
},
|
|
191
|
-
"legendItems": [],
|
|
192
|
-
"metadata": [{"name": "report_id", "value": report.id}],
|
|
193
|
-
"links": [
|
|
194
|
-
{
|
|
195
|
-
"label": "Generated using txt2detection",
|
|
196
|
-
"url": "https://github.com/muchdogesec/txt2detection/",
|
|
197
|
-
}
|
|
198
|
-
],
|
|
199
|
-
"layout": {"layout": "side"},
|
|
200
|
-
}
|
|
201
|
-
)
|
|
202
|
-
return retval
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def extract_attack_flow_and_navigator(
|
|
206
|
-
bundler: Bundler,
|
|
207
|
-
preprocessed_text,
|
|
208
|
-
ai_create_attack_flow,
|
|
209
|
-
ai_create_attack_navigator_layer,
|
|
210
|
-
ai_settings_relationships,
|
|
211
|
-
):
|
|
212
|
-
ex: BaseAIExtractor = ai_settings_relationships
|
|
213
|
-
tactics = bundler.get_attack_tactics()
|
|
214
|
-
techniques = get_techniques_from_extracted_objects(bundler.bundle.objects, tactics)
|
|
215
|
-
if not techniques:
|
|
216
|
-
return None, None
|
|
217
|
-
|
|
218
|
-
logged_techniques = [
|
|
219
|
-
{k: v for k, v in t.items() if k != "stix_obj"} for t in techniques.values()
|
|
220
|
-
]
|
|
221
|
-
logging.debug(f"parsed techniques: {logged_techniques}")
|
|
222
|
-
|
|
223
|
-
flow = ex.extract_attack_flow(preprocessed_text, techniques)
|
|
224
|
-
navigator = None
|
|
225
|
-
if ai_create_attack_flow:
|
|
226
|
-
logging.info("creating attack-flow bundle")
|
|
227
|
-
bundler.flow_objects = parse_flow(bundler.report, flow, techniques, tactics)
|
|
228
|
-
|
|
229
|
-
if ai_create_attack_navigator_layer:
|
|
230
|
-
navigator = create_navigator_layer(bundler.report, flow, techniques, tactics)
|
|
231
|
-
return flow, navigator
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
|
|
2
|
-
txt2detection/__main__.py,sha256=s5XcIctE59ALjys6Y8lRIqS_pQWi1mlNo2gyG8_XS5s,11622
|
|
3
|
-
txt2detection/attack_flow.py,sha256=x6GhDZZ8xOzugfMELvHvrhclcIqozGIt9_mzyr2KKnA,8741
|
|
4
|
-
txt2detection/bundler.py,sha256=eGCIwLY0J_SVyOI_1IFsm_8RgvaE_32t5MIc_UyJwm0,13994
|
|
5
|
-
txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
|
|
6
|
-
txt2detection/models.py,sha256=_-sR03FEWI46OUZdL7U0tibNn909B0NU9LWNzopBtiY,12888
|
|
7
|
-
txt2detection/observables.py,sha256=RxgJchvk6_Z2pBxJ6MAGsx00gj8TyRt9W2BTQTb1F9o,6762
|
|
8
|
-
txt2detection/utils.py,sha256=EJ5lMhnghUgW0JbcRmeiDXYwm5GaB6XrG4cUjru-52g,2812
|
|
9
|
-
txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
|
|
10
|
-
txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
|
|
11
|
-
txt2detection/ai_extractor/base.py,sha256=2C3d4BoH7I4fnvp6cLxbtjiFVPm4WJLFwnS_lAppHr8,3210
|
|
12
|
-
txt2detection/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
|
|
13
|
-
txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
|
|
14
|
-
txt2detection/ai_extractor/models.py,sha256=xMTvUHoxIflbBA4mkGLTjwf657DVEOxd6gqLpEUciQ4,963
|
|
15
|
-
txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
|
|
16
|
-
txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
|
|
17
|
-
txt2detection/ai_extractor/prompts.py,sha256=xI82PelsTidnRzi5wnNbEC4lmkio92YUDd8SZu4CQiE,10961
|
|
18
|
-
txt2detection/ai_extractor/utils.py,sha256=SUxyPhkGp5yDbX_H_E018i93R8IbyLsQ00PIBDecfuc,540
|
|
19
|
-
txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
|
|
20
|
-
txt2detection-1.0.10.dist-info/METADATA,sha256=CHTRZrV_v6gfyAyEW6hfNaQutVpSv5yM7w084u_x7U4,15870
|
|
21
|
-
txt2detection-1.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
-
txt2detection-1.0.10.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
|
|
23
|
-
txt2detection-1.0.10.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
|
|
24
|
-
txt2detection-1.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|