txt2detection 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of txt2detection might be problematic. Click here for more details.

@@ -1,10 +1,8 @@
1
-
2
1
  from llama_index.core import PromptTemplate, ChatPromptTemplate
3
2
  import textwrap
4
3
  from llama_index.core.base.llms.types import ChatMessage, MessageRole
5
4
 
6
5
 
7
-
8
6
  SIEMRULES_PROMPT = ChatPromptTemplate([
9
7
  ChatMessage.from_str("""
10
8
  **Persona:**
@@ -113,4 +111,133 @@ Make sure your response follows this format and adheres to the rules above.
113
111
  """),
114
112
  ChatMessage.from_str("Taking the entire input of my next message, analyze and return appropriate response", MessageRole.USER),
115
113
  ChatMessage.from_str("{document}", MessageRole.USER),
116
- ])
114
+ ])
115
+
116
+
117
+ ATTACK_FLOW_PROMPT_TEMPL = ChatPromptTemplate(
118
+ [
119
+ ChatMessage.from_str(
120
+ """You are a cybersecurity threat intelligence analyst.
121
+
122
+ Your task is to analyze structured cybersecurity incident reports (e.g., malware analysis, APTs, data breaches, vulnerabilities) and extract and organize MITRE ATT&CK techniques as part of an attack flow analysis. This analysis helps defenders understand adversary behavior using the MITRE Attack Flow model maintained by the MITRE Center for Threat-Informed Defense.""",
123
+ MessageRole.SYSTEM,
124
+ ),
125
+ ChatMessage.from_str(
126
+ "Hello. Please provide the document for analysis. Only include the full document text in your response.",
127
+ MessageRole.ASSISTANT,
128
+ ),
129
+ ChatMessage.from_str("{document}", MessageRole.USER),
130
+ ChatMessage.from_str(
131
+ "What ATT&CK techniques and related metadata were extracted from this document?",
132
+ MessageRole.ASSISTANT,
133
+ ),
134
+ ChatMessage.from_str(
135
+ "<extracted_techniques>\n\n{extracted_techniques}\n\n</extracted_techniques>",
136
+ MessageRole.USER,
137
+ ),
138
+ ChatMessage.from_str(
139
+ "Let's begin with tactic selection. What should I do with the techniques and possible tactics?",
140
+ MessageRole.ASSISTANT,
141
+ ),
142
+ # PART 1: Tactic Selection Phase
143
+ ChatMessage.from_str(
144
+ """
145
+ PART 1: TACTIC SELECTION
146
+
147
+ For each of the technique in `<extracted_techniques>`, return [technique_id, tactic_name], where
148
+ - technique id = `technique.id`
149
+ - tactic_name = choice from `technique.possible_tactics`, where choice is selected based on the **most contextually appropriate** tactic name for each technique based on how it's used in the document.
150
+
151
+ 📌 Output only the tactic assignments in this format:
152
+ <code>
153
+ {
154
+ "tactic_selection": [
155
+ ["Txxxx", "impact"],
156
+ ["Tyyyy", "discovery"],
157
+ ...
158
+ ]
159
+ }
160
+ </code>
161
+
162
+ ⚠️ Constraints:
163
+ - Use **only** the `possible_tactics` provided with each technique.
164
+ - Do **not** invent or infer any technique or tactic name beyond what’s given in <extracted_techniques>.
165
+ - Ensure **every** technique in `<extracted_techniques>` appears in `tactic_selection`, even if uncertain — choose the best fit.
166
+ - Technique IDs in `tactic_selection` must match exactly from <extracted_techniques> (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
167
+ - Must include every technique in `<extracted_techniques>`
168
+ """,
169
+ MessageRole.USER,
170
+ ),
171
+ ChatMessage.from_str(
172
+ "Thanks. Now let's continue with the attack flow. How should I proceed?",
173
+ MessageRole.ASSISTANT,
174
+ ),
175
+ # PART 2: Attack Flow Construction Phase
176
+ ChatMessage.from_str(
177
+ """
178
+ PART 2: ATTACK FLOW CONSTRUCTION
179
+
180
+ Using the `<extracted_techniques>` and the incident details in the document, construct a sequence of MITRE ATT&CK techniques that represent the adversary’s logical progression through the attack.
181
+
182
+ For each technique:
183
+ - Use the `technique.id` exactly as provided
184
+ - Assign:
185
+ - `name`: a short, context-based phrase describing how the technique is used
186
+ - `description`: a longer explanation of how the technique operates in this specific incident, based only on the document
187
+ - `position`: the step in the logical or chronological attack sequence (starting at 0)
188
+
189
+ ⚠️ Constraints:
190
+ - Use **only** technique IDs provided in `<extracted_techniques>` — do **not** invent or infer new ones
191
+ - Ensure all included technique IDs exactly match `technique.id` from `<extracted_techniques>` (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
192
+
193
+ 📤 Output Format:
194
+ <code>
195
+ {
196
+ "items": [
197
+ {
198
+ "position": 0,
199
+ "attack_technique_id": "Txxxx",
200
+ "name": "Short contextual name",
201
+ "description": "Detailed contextual explanation"
202
+ },
203
+ ...
204
+ ],
205
+ "success": true
206
+ }
207
+ </code>
208
+
209
+ Your goal is to tell the story of how the adversary moved through the attack using the extracted ATT&CK techniques, in the correct sequence, with clear context for defenders.
210
+ """,
211
+ MessageRole.USER,
212
+ ),
213
+ # PART 3: Combination phase
214
+ ChatMessage.from_str(
215
+ """
216
+ 📤 Final Output Format:
217
+ <code>
218
+ {
219
+ "tactic_selection": [...], // Use your previous output
220
+ "items": [
221
+ {
222
+ "position": 0,
223
+ "attack_technique_id": "Txxxx",
224
+ "name": "Short contextual name",
225
+ "description": "Detailed contextual explanation"
226
+ },
227
+ ...
228
+ ],
229
+ "success": true
230
+ }
231
+ </code>
232
+
233
+ ⚠️ Constraints:
234
+ - All `attack_technique_id` values in `items` must come from `<extracted_techniques>`
235
+ - The `position` field should reflect the **chronological or logical** execution order of the attack
236
+ - Do **not** introduce new technique IDs
237
+
238
+ ✅ Your goal is to build a realistic, document-based attack flow using MITRE ATT&CK technique–tactic pairs.
239
+ """,
240
+ MessageRole.USER,
241
+ ),
242
+ ]
243
+ )
@@ -0,0 +1,233 @@
1
+ import json
2
+ import logging
3
+ import uuid
4
+ from stix2 import Relationship
5
+
6
+ from txt2detection.ai_extractor.models import AttackFlowList
7
+ from .bundler import Bundler
8
+
9
+ from .ai_extractor.base import BaseAIExtractor
10
+ from .models import UUID_NAMESPACE
11
+ from stix2extensions.attack_action import AttackAction, AttackFlow
12
+ from stix2extensions._extensions import attack_flow_ExtensionDefinitionSMO
13
+
14
+
15
+ def parse_flow(report, flow: AttackFlowList, techniques, tactics):
16
+ logging.info(f"flow.success = {flow.success}")
17
+ if not flow.success:
18
+ return []
19
+ objects = [report, attack_flow_ExtensionDefinitionSMO]
20
+ for domain in ["enterprise-attack", "mobile-attack", "ics-attack"]:
21
+ flow_objects = parse_domain_flow(report, flow, techniques, tactics, domain)
22
+ objects.extend(flow_objects)
23
+ return objects
24
+
25
+
26
+ def parse_domain_flow(report, flow: AttackFlowList, techniques, tactics, domain):
27
+ flow_objects = []
28
+ flow_obj = None
29
+ last_action = None
30
+ for i, item in enumerate(flow.items):
31
+ try:
32
+ technique = techniques[item.attack_technique_id]
33
+ if technique["domain"] != domain:
34
+ continue
35
+ tactic_id = technique["possible_tactics"][
36
+ flow.tactic_mapping[item.attack_technique_id]
37
+ ]
38
+ technique_obj = technique["stix_obj"]
39
+
40
+ tactic_obj = tactics[tactic_id]
41
+ action_obj = AttackAction(
42
+ **{
43
+ "id": flow_id(report["id"], item.attack_technique_id, tactic_id),
44
+ "effect_refs": [f"attack-action--{str(uuid.uuid4())}"],
45
+ "technique_id": item.attack_technique_id,
46
+ "technique_ref": technique_obj["id"],
47
+ "tactic_id": tactic_id,
48
+ "tactic_ref": tactic_obj["id"],
49
+ "name": item.name,
50
+ "description": item.description,
51
+ },
52
+ allow_custom=True,
53
+ )
54
+ action_obj.effect_refs.clear()
55
+ if not flow_obj:
56
+ flow_obj = {
57
+ "type": "attack-flow",
58
+ "id": "attack-flow--"
59
+ + str(
60
+ uuid.uuid5(UUID_NAMESPACE, f"attack-flow+{domain}+{report.id}")
61
+ ),
62
+ "spec_version": "2.1",
63
+ "created": report.created,
64
+ "modified": report.modified,
65
+ "created_by_ref": report.created_by_ref,
66
+ "start_refs": [action_obj["id"]],
67
+ "name": f"[{domain.split('-')[0].upper()}] {report.name}",
68
+ "description": report.description,
69
+ "scope": "malware",
70
+ "external_references": report.external_references,
71
+ "object_marking_refs": report.object_marking_refs,
72
+ }
73
+ flow_objects.append(AttackFlow(**flow_obj))
74
+ flow_objects.append(
75
+ Relationship(
76
+ type="relationship",
77
+ spec_version="2.1",
78
+ id="relationship--"
79
+ + str(
80
+ uuid.uuid5(
81
+ UUID_NAMESPACE,
82
+ f"attack-flow+{report.id}+{flow_obj['id']}",
83
+ )
84
+ ),
85
+ created_by_ref=report.created_by_ref,
86
+ created=report.created,
87
+ modified=report.modified,
88
+ relationship_type="attack-flow",
89
+ description=f"Attack Flow for {report.name}",
90
+ source_ref=report.id,
91
+ target_ref=flow_obj["id"],
92
+ external_references=report.external_references,
93
+ object_marking_refs=report.object_marking_refs,
94
+ )
95
+ )
96
+ else:
97
+ last_action["effect_refs"].append(action_obj["id"])
98
+ flow_objects.append(tactic_obj)
99
+ flow_objects.append(technique_obj)
100
+ flow_objects.append(action_obj)
101
+ last_action = action_obj
102
+ except Exception as e:
103
+ if flow_objects == 2:
104
+ logging.exception("FATAL: create attack flow object failed")
105
+ return []
106
+ logging.debug("create attack-action failed", exc_info=True)
107
+ raise
108
+
109
+ return flow_objects
110
+
111
+
112
+ def flow_id(report_id, technique_id, tactic_id):
113
+ return "attack-action--" + str(
114
+ uuid.uuid5(
115
+ uuid.UUID(report_id.split("--")[-1]),
116
+ f"{report_id}+{technique_id}+{tactic_id}",
117
+ )
118
+ )
119
+
120
+
121
+ def get_techniques_from_extracted_objects(objects: dict, tactics: dict):
122
+ techniques = {}
123
+ for obj in objects:
124
+ if (
125
+ obj["type"] == "attack-pattern"
126
+ and obj.get("external_references", [{"source_name": None}])[0][
127
+ "source_name"
128
+ ]
129
+ == "mitre-attack"
130
+ ):
131
+ domain = obj["x_mitre_domains"][0]
132
+ technique = dict(
133
+ domain=domain,
134
+ name=obj["name"],
135
+ possible_tactics={},
136
+ id=obj["external_references"][0]["external_id"],
137
+ platforms=[
138
+ platform
139
+ for platform in obj["x_mitre_platforms"]
140
+ if platform != "None"
141
+ ],
142
+ stix_obj=obj,
143
+ )
144
+ for phase in obj["kill_chain_phases"]:
145
+ if not set(phase["kill_chain_name"].split("-")).issuperset(
146
+ ["mitre", "attack"]
147
+ ):
148
+ continue
149
+ tactic_name = phase["phase_name"]
150
+ tactic_obj = tactics[tactic_name]
151
+ tactic_id = tactic_obj["external_references"][0]["external_id"]
152
+ technique["possible_tactics"][tactic_name] = tactic_id
153
+ techniques[technique["id"]] = technique
154
+ return techniques
155
+
156
+
157
+ def create_navigator_layer(report, flow: AttackFlowList, techniques, tactics):
158
+ domains = {}
159
+ comments = {item.attack_technique_id: item.description for item in flow.items}
160
+ for technique in techniques.values():
161
+ domain_techniques = domains.setdefault(technique["domain"], [])
162
+ technique_id = technique["id"]
163
+ if technique_id not in flow.tactic_mapping:
164
+ continue
165
+ technique_item = dict(
166
+ techniqueID=technique_id,
167
+ tactic=flow.tactic_mapping[technique_id],
168
+ score=100,
169
+ showSubtechniques=True,
170
+ )
171
+ if comment := comments.get(technique_id):
172
+ technique_item["comment"] = comment
173
+ domain_techniques.append(technique_item)
174
+
175
+ retval = []
176
+
177
+ for domain, domain_techniques in domains.items():
178
+ retval.append(
179
+ {
180
+ "versions": {
181
+ "layer": "4.5",
182
+ "attack": tactics["version"],
183
+ "navigator": "5.1.0",
184
+ },
185
+ "name": report.name,
186
+ "domain": domain,
187
+ "techniques": domain_techniques,
188
+ "gradient": {
189
+ "colors": ["#ffffff", "#ff6666"],
190
+ "minValue": 0,
191
+ "maxValue": 100,
192
+ },
193
+ "legendItems": [],
194
+ "metadata": [{"name": "report_id", "value": report.id}],
195
+ "links": [
196
+ {
197
+ "label": "Generated using txt2detection",
198
+ "url": "https://github.com/muchdogesec/txt2detection/",
199
+ }
200
+ ],
201
+ "layout": {"layout": "side"},
202
+ }
203
+ )
204
+ return retval
205
+
206
+
207
+ def extract_attack_flow_and_navigator(
208
+ bundler: Bundler,
209
+ preprocessed_text,
210
+ ai_create_attack_flow,
211
+ ai_create_attack_navigator_layer,
212
+ ai_settings_relationships,
213
+ ):
214
+ ex: BaseAIExtractor = ai_settings_relationships
215
+ tactics = bundler.get_attack_tactics()
216
+ techniques = get_techniques_from_extracted_objects(bundler.bundle.objects, tactics)
217
+ if not techniques:
218
+ return None, None
219
+
220
+ logged_techniques = [
221
+ {k: v for k, v in t.items() if k != "stix_obj"} for t in techniques.values()
222
+ ]
223
+ logging.debug(f"parsed techniques: {logged_techniques}")
224
+
225
+ flow = ex.extract_attack_flow(preprocessed_text, techniques)
226
+ navigator = None
227
+ if ai_create_attack_flow:
228
+ logging.info("creating attack-flow bundle")
229
+ bundler.flow_objects = parse_flow(bundler.report, flow, techniques, tactics)
230
+
231
+ if ai_create_attack_navigator_layer:
232
+ navigator = create_navigator_layer(bundler.report, flow, techniques, tactics)
233
+ return flow, navigator