txt2stix 1.0.1.post3__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,7 @@ from llama_index.core.utils import get_tokenizer
14
14
  _ai_extractor_registry: dict[str, 'Type[BaseAIExtractor]'] = {}
15
15
  class BaseAIExtractor():
16
16
  system_prompt = DEFAULT_SYSTEM_PROMPT
17
-
17
+
18
18
  extraction_template = DEFAULT_EXTRACTION_TEMPL
19
19
 
20
20
  relationship_template = DEFAULT_RELATIONSHIP_TEMPL
@@ -28,7 +28,7 @@ class BaseAIExtractor():
28
28
  verbose=True,
29
29
  llm=self.llm,
30
30
  )
31
-
31
+
32
32
  def _get_relationship_program(self):
33
33
  return LLMTextCompletionProgram.from_defaults(
34
34
  output_parser=ParserWithLogging(RelationshipList),
@@ -36,7 +36,7 @@ class BaseAIExtractor():
36
36
  verbose=True,
37
37
  llm=self.llm,
38
38
  )
39
-
39
+
40
40
  def _get_content_checker_program(self):
41
41
  return LLMTextCompletionProgram.from_defaults(
42
42
  output_parser=ParserWithLogging(DescribesIncident),
@@ -44,10 +44,10 @@ class BaseAIExtractor():
44
44
  verbose=True,
45
45
  llm=self.llm,
46
46
  )
47
-
47
+
48
48
  def check_content(self, text) -> DescribesIncident:
49
49
  return self._get_content_checker_program()(context_str=text)
50
-
50
+
51
51
  def _get_attack_flow_program(self):
52
52
  return LLMTextCompletionProgram.from_defaults(
53
53
  output_parser=ParserWithLogging(AttackFlowList),
@@ -55,24 +55,29 @@ class BaseAIExtractor():
55
55
  verbose=True,
56
56
  llm=self.llm,
57
57
  )
58
-
59
- def extract_attack_flow(self, input_text, extractions, relationships) -> AttackFlowList:
60
- return self._get_attack_flow_program()(document=input_text, extractions=extractions, relationships=relationships)
58
+
59
+ def extract_attack_flow(self, input_text, techniques) -> AttackFlowList:
60
+ extracted_techniques = []
61
+ for t in techniques.values():
62
+ extracted_techniques.append(
63
+ dict(id=t['id'], name=t['name'], possible_tactics=list(t['possible_tactics'].keys()))
64
+ )
65
+ return self._get_attack_flow_program()(document=input_text, extracted_techniques=extracted_techniques)
61
66
 
62
67
  def extract_relationships(self, input_text, extractions, relationship_types: list[str]) -> RelationshipList:
63
68
  return self._get_relationship_program()(relationship_types=relationship_types, input_file=input_text, extractions=extractions)
64
-
69
+
65
70
  def extract_objects(self, input_text, extractors) -> ExtractionList:
66
71
  extraction_list = self._get_extraction_program()(extractors=get_extractors_str(extractors), input_file=input_text)
67
72
  return extraction_list.model_dump().get('extractions', [])
68
-
73
+
69
74
  def __init__(self, *args, **kwargs) -> None:
70
75
  pass
71
76
 
72
77
  def count_tokens(self, input_text):
73
78
  logging.info("unsupported model `%s`, estimating using llama-index's default tokenizer", self.extractor_name)
74
79
  return len(get_tokenizer()(input_text))
75
-
80
+
76
81
  def __init_subclass__(cls, /, provider, register=True, **kwargs):
77
82
  super().__init_subclass__(**kwargs)
78
83
  if register:
@@ -82,7 +87,6 @@ class BaseAIExtractor():
82
87
  @property
83
88
  def extractor_name(self):
84
89
  return f"{self.provider}:{self.llm.model}"
85
-
86
90
 
87
91
  def __hash__(self):
88
- return hash(self.extractor_name)
92
+ return hash(self.extractor_name)
@@ -128,43 +128,111 @@ DEFAULT_CONTENT_CHECKER_WITH_SUMMARY_TEMPL = PromptTemplate("""
128
128
  </summary>
129
129
  """)
130
130
 
131
+
132
+
131
133
  ATTACK_FLOW_PROMPT_TEMPL = ChatPromptTemplate([
132
- ChatMessage.from_str("""You are a cyber security threat intelligence analyst.
133
- Your job is to review report that describe a cyber security incidents.
134
- Examples include malware analysis, APT group reports, data breaches and vulnerabilities.""", MessageRole.SYSTEM),
135
- ChatMessage.from_str("Hi, What <document> would you like me to process for you? the message below must contain the document and the document only", MessageRole.ASSISTANT),
134
+ ChatMessage.from_str("""You are a cybersecurity threat intelligence analyst.
135
+
136
+ Your task is to analyze structured cybersecurity incident reports (e.g., malware analysis, APTs, data breaches, vulnerabilities) and extract and organize MITRE ATT&CK techniques as part of an attack flow analysis. This analysis helps defenders understand adversary behavior using the MITRE Attack Flow model maintained by the MITRE Center for Threat-Informed Defense.""", MessageRole.SYSTEM),
137
+
138
+ ChatMessage.from_str("Hello. Please provide the document for analysis. Only include the full document text in your response.", MessageRole.ASSISTANT),
139
+
136
140
  ChatMessage.from_str("{document}", MessageRole.USER),
137
- ChatMessage.from_str("What are the objects that have been extracted (<extractions>) from the document above?", MessageRole.ASSISTANT),
138
- ChatMessage.from_str("{extractions}", MessageRole.USER),
139
- ChatMessage.from_str("What are the relationships that have been extracted (<relationships>) between the documents?", MessageRole.USER),
140
- ChatMessage.from_str("{relationships}", MessageRole.USER),
141
- ChatMessage.from_str("What should I do with all the data that have been provided?", MessageRole.ASSISTANT),
142
- ChatMessage.from_str("""Consider all the MITRE ATT&CK Objects extracted from the report and the relationships they have to other objects.
143
141
 
144
- Now I need you to logically define the order of ATT&CK Tactics/Techniques as they are executed in the incident described in the report.
142
+ ChatMessage.from_str("What ATT&CK techniques and related metadata were extracted from this document?", MessageRole.ASSISTANT),
145
143
 
146
- It is possible that the Techniques extracted are not linked to the relevant MITRE ATT&CK Tactic. You should also assign the correct Tactic to a Technique where a Technique belongs to many ATT&CK Tactics in the ATT&CK Matrix if that can correctly be inferred.
144
+ ChatMessage.from_str("<extracted_techniques>\n\n{extracted_techniques}\n\n</extracted_techniques>", MessageRole.USER),
147
145
 
148
- You should also provide a short overview about how this technique is described in the report as the name, and a longer version in description.
146
+ ChatMessage.from_str("Let's begin with tactic selection. What should I do with the techniques and possible tactics?", MessageRole.ASSISTANT),
149
147
 
150
- IMPORTANT: only include the ATT&CK IDs extracted already, do not add any new extractions.
148
+ # PART 1: Tactic Selection Phase
149
+ ChatMessage.from_str("""
150
+ PART 1: TACTIC SELECTION
151
151
 
152
- You should deliver a response in JSON as follows
152
+ For each of the technique in `<extracted_techniques>`, return [technique_id, tactic_name], where
153
+ - technique id = `technique.id`
154
+ - tactic_name = choice from `technique.possible_tactics`, where choice is selected based on the **most contextually appropriate** tactic name for each technique based on how it's used in the document.
153
155
 
154
- [
156
+ 📌 Output only the tactic assignments in this format:
157
+ <code>
155
158
  {
156
- "position": "<ORDER OF OBJECTS STARTING AT 0",
157
- "attack_tactic_id": "<ID>",
158
- "attack_technique_id": "<ID>",
159
- "name": "<NAME>",
160
- "description": "<DESC>"
161
- },
159
+ "tactic_selection": [
160
+ ["Txxxx", "impact"],
161
+ ["Tyyyy", "discovery"],
162
+ ...
163
+ ]
164
+ }
165
+ </code>
166
+
167
+ ⚠️ Constraints:
168
+ - Use **only** the `possible_tactics` provided with each technique.
169
+ - Do **not** invent or infer any technique or tactic name beyond what’s given in <extracted_techniques>.
170
+ - Ensure **every** technique in `<extracted_techniques>` appears in `tactic_selection`, even if uncertain — choose the best fit.
171
+ - Technique IDs in `tactic_selection` must match exactly from <extracted_techniques> (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
172
+ - Must include every technique in `<extracted_techniques>`
173
+ """, MessageRole.USER),
174
+
175
+ ChatMessage.from_str("Thanks. Now let's continue with the attack flow. How should I proceed?", MessageRole.ASSISTANT),
176
+
177
+ # PART 2: Attack Flow Construction Phase
178
+ ChatMessage.from_str("""
179
+ PART 2: ATTACK FLOW CONSTRUCTION
180
+
181
+ Using the `<extracted_techniques>` and the incident details in the document, construct a sequence of MITRE ATT&CK techniques that represent the adversary’s logical progression through the attack.
182
+
183
+ For each technique:
184
+ - Use the `technique.id` exactly as provided
185
+ - Assign:
186
+ - `name`: a short, context-based phrase describing how the technique is used
187
+ - `description`: a longer explanation of how the technique operates in this specific incident, based only on the document
188
+ - `position`: the step in the logical or chronological attack sequence (starting at 0)
189
+
190
+ ⚠️ Constraints:
191
+ - Use **only** technique IDs provided in `<extracted_techniques>` — do **not** invent or infer new ones
192
+ - Ensure all included technique IDs exactly match `technique.id` from `<extracted_techniques>` (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
193
+
194
+ 📤 Output Format:
195
+ <code>
162
196
  {
163
- "position": "<ORDER OF OBJECTS STARTING AT 0",
164
- "attack_tactic_id": "<ID>",
165
- "attack_technique_id": "<ID>",
166
- "name": "<NAME>",
167
- "description": "<DESC>"
197
+ "items": [
198
+ {
199
+ "position": 0,
200
+ "attack_technique_id": "Txxxx",
201
+ "name": "Short contextual name",
202
+ "description": "Detailed contextual explanation"
203
+ },
204
+ ...
205
+ ],
206
+ "success": true
168
207
  }
169
- ]""", MessageRole.USER)
170
- ])
208
+ </code>
209
+
210
+ Your goal is to tell the story of how the adversary moved through the attack using the extracted ATT&CK techniques, in the correct sequence, with clear context for defenders.
211
+ """, MessageRole.USER),
212
+ # PART 3: Combination phase
213
+ ChatMessage.from_str("""
214
+ 📤 Final Output Format:
215
+ <code>
216
+ {
217
+ "tactic_selection": [...], // Use your previous output
218
+ "items": [
219
+ {
220
+ "position": 0,
221
+ "attack_technique_id": "Txxxx",
222
+ "name": "Short contextual name",
223
+ "description": "Detailed contextual explanation"
224
+ },
225
+ ...
226
+ ],
227
+ "success": true
228
+ }
229
+ </code>
230
+
231
+ ⚠️ Constraints:
232
+ - All `attack_technique_id` values in `items` must come from `<extracted_techniques>`
233
+ - The `position` field should reflect the **chronological or logical** execution order of the attack
234
+ - Do **not** introduce new technique IDs
235
+
236
+ ✅ Your goal is to build a realistic, document-based attack flow using MITRE ATT&CK technique–tactic pairs.
237
+ """, MessageRole.USER)
238
+ ])
@@ -38,16 +38,23 @@ class DescribesIncident(BaseModel):
38
38
 
39
39
  class AttackFlowItem(BaseModel):
40
40
  position : int = Field(description="order of object starting at 0")
41
- attack_tactic_id : str
42
41
  attack_technique_id : str
43
42
  name: str
44
43
  description: str
45
44
 
46
45
  class AttackFlowList(BaseModel):
47
- matrix : str = Field(description="one of ics, mobile and enterprise")
46
+ tactic_selection: list[tuple[str, str]] = Field(description="attack technique id to attack tactic id mapping using possible_tactics")
47
+ # additional_tactic_mapping: list[tuple[str, str]] = Field(description="the rest of tactic_mapping")
48
48
  items : list[AttackFlowItem]
49
49
  success: bool = Field(description="determines if there's any valid flow in <extractions>")
50
50
 
51
+ def model_post_init(self, context):
52
+ return super().model_post_init(context)
53
+
54
+ @property
55
+ def tactic_mapping(self):
56
+ return dict(self.tactic_selection)
57
+
51
58
  class ParserWithLogging(PydanticOutputParser):
52
59
  def parse(self, text: str):
53
60
  f = io.StringIO()
txt2stix/attack_flow.py CHANGED
@@ -1,7 +1,10 @@
1
+ import json
1
2
  import logging
2
3
  import uuid
3
4
  from stix2 import Relationship
5
+ from txt2stix import txt2stixBundler
4
6
 
7
+ from txt2stix.ai_extractor.base import BaseAIExtractor
5
8
  from txt2stix.common import UUID_NAMESPACE
6
9
  from txt2stix.retriever import STIXObjectRetriever
7
10
  from stix2extensions.attack_action import AttackAction, AttackFlow
@@ -9,33 +12,27 @@ from stix2extensions._extensions import attack_flow_ExtensionDefinitionSMO
9
12
  from .utils import AttackFlowList
10
13
 
11
14
 
12
- def parse_flow(report, flow: AttackFlowList):
15
+ def parse_flow(report, flow: AttackFlowList, techniques, tactics):
13
16
  logging.info(f"flow.success = {flow.success}")
14
17
  if not flow.success:
15
18
  return []
16
- attack_objects = STIXObjectRetriever().get_attack_objects(
17
- flow.matrix,
18
- [item.attack_tactic_id for item in flow.items]
19
- + [item.attack_technique_id for item in flow.items],
20
- )
21
- attack_objects = {
22
- obj["external_references"][0]["external_id"]: obj for obj in attack_objects
23
- }
24
19
  flow_objects = [report, attack_flow_ExtensionDefinitionSMO]
25
20
  last_action = None
26
21
  for i, item in enumerate(flow.items):
27
22
  try:
28
- tactic_obj = attack_objects[item.attack_tactic_id]
29
- technique_obj = attack_objects[item.attack_technique_id]
23
+ technique = techniques[item.attack_technique_id]
24
+ tactic_id = technique['possible_tactics'][flow.tactic_mapping[item.attack_technique_id]]
25
+ technique_obj = technique["stix_obj"]
26
+ tactic_obj = tactics[technique["domain"]][tactic_id]
30
27
  action_obj = AttackAction(
31
28
  **{
32
29
  "id": flow_id(
33
- report["id"], item.attack_technique_id, item.attack_tactic_id
30
+ report["id"], item.attack_technique_id, tactic_id
34
31
  ),
35
32
  "effect_refs": [f"attack-action--{str(uuid.uuid4())}"],
36
33
  "technique_id": item.attack_technique_id,
37
34
  "technique_ref": technique_obj["id"],
38
- "tactic_id": item.attack_tactic_id,
35
+ "tactic_id": tactic_id,
39
36
  "tactic_ref": tactic_obj["id"],
40
37
  "name": item.name,
41
38
  "description": item.description,
@@ -99,3 +96,113 @@ def flow_id(report_id, technique_id, tactic_id):
99
96
  f"{report_id}+{technique_id}+{tactic_id}",
100
97
  )
101
98
  )
99
+
100
+
101
+ def get_all_tactics():
102
+ tactics = {
103
+ "enterprise-attack": None,
104
+ "mobile-attack": None,
105
+ "ics-attack": None,
106
+ }
107
+ for k in tactics.keys():
108
+ matrix = k.replace("attack", "").strip("-")
109
+ all_tactics = STIXObjectRetriever().get_attack_tactics(matrix)
110
+ tactics[k] = all_tactics
111
+ return tactics
112
+
113
+
114
+ def get_techniques_from_extracted_objects(objects: dict, tactics: dict):
115
+ techniques = {}
116
+ for obj in objects:
117
+ if (
118
+ obj["type"] == "attack-pattern"
119
+ and obj.get("external_references", [{"source_name": None}])[0][
120
+ "source_name"
121
+ ]
122
+ == "mitre-attack"
123
+ ):
124
+ domain = obj["x_mitre_domains"][0]
125
+ technique = dict(
126
+ domain=domain,
127
+ name=obj["name"],
128
+ possible_tactics={},
129
+ id=obj["external_references"][0]["external_id"],
130
+ platforms=[
131
+ platform
132
+ for platform in obj["x_mitre_platforms"]
133
+ if platform != "None"
134
+ ],
135
+ stix_obj=obj,
136
+ )
137
+ for phase in obj["kill_chain_phases"]:
138
+ if not set(phase["kill_chain_name"].split("-")).issuperset(
139
+ ["mitre", "attack"]
140
+ ):
141
+ continue
142
+ tactic_name = phase["phase_name"]
143
+ tactic_obj = tactics[domain][tactic_name]
144
+ tactic_id = tactic_obj["external_references"][0]["external_id"]
145
+ technique["possible_tactics"][tactic_name] = tactic_id
146
+ techniques[technique["id"]] = technique
147
+ return techniques
148
+
149
+
150
+ def create_navigator_layer(report, summary, flow: AttackFlowList, techniques):
151
+ domains = {}
152
+ for technique in techniques.values():
153
+ domain_techniques = domains.setdefault(technique["domain"], [])
154
+ technique_id = technique["id"]
155
+ if technique_id not in flow.tactic_mapping:
156
+ continue
157
+ domain_techniques.append(
158
+ dict(techniqueID=technique_id, tactic=flow.tactic_mapping[technique_id])
159
+ )
160
+
161
+ retval = []
162
+
163
+ for domain, domain_techniques in domains.items():
164
+ retval.append(
165
+ {
166
+ "version": "4.5",
167
+ "name": report.name,
168
+ "domain": domain,
169
+ "description": summary,
170
+ "techniques": domain_techniques,
171
+ "gradient": {
172
+ "colors": ["#ffffff", "#ff6666"],
173
+ "minValue": 0,
174
+ "maxValue": 100,
175
+ },
176
+ "legendItems": [],
177
+ "metadata": [],
178
+ "layout": {"layout": "side"},
179
+ }
180
+ )
181
+ return retval
182
+
183
+
184
+ def extract_attack_flow_and_navigator(
185
+ bundler: txt2stixBundler,
186
+ preprocessed_text,
187
+ ai_create_attack_flow,
188
+ ai_create_attack_navigator_layer,
189
+ ai_settings_relationships,
190
+ ):
191
+ ex: BaseAIExtractor = ai_settings_relationships
192
+ tactics = get_all_tactics()
193
+ techniques = get_techniques_from_extracted_objects(bundler.bundle.objects, tactics)
194
+ logged_techniques = [
195
+ {k: v for k, v in t.items() if k != "stix_obj"}
196
+ for t in techniques.values()
197
+ ]
198
+ logging.debug(f"parsed techniques: {json.dumps(logged_techniques, indent=4)}")
199
+
200
+ flow = ex.extract_attack_flow(preprocessed_text, techniques)
201
+ navigator = None
202
+ if ai_create_attack_flow:
203
+ logging.info("creating attack-flow bundle")
204
+ bundler.flow_objects = parse_flow(bundler.report, flow, techniques, tactics)
205
+
206
+ if ai_create_attack_navigator_layer:
207
+ navigator = create_navigator_layer(bundler.report, bundler.summary, flow, techniques)
208
+ return flow, navigator
txt2stix/bundler.py CHANGED
@@ -194,6 +194,7 @@ class txt2stixBundler:
194
194
  self.all_extractors = extractors
195
195
  self.identity = identity or self.default_identity
196
196
  self.tlp_level = TLP_LEVEL.get(tlp_level)
197
+ self.summary = ""
197
198
  if report_id:
198
199
  self.uuid = report_id
199
200
  else:
@@ -415,6 +416,7 @@ class txt2stixBundler:
415
416
  )
416
417
 
417
418
  def add_summary(self, summary, ai_summary_provider):
419
+ self.summary = summary
418
420
  summary_note_obj = Note(
419
421
  type="note",
420
422
  spec_version="2.1",
@@ -1,3 +1,5 @@
1
+ ## IMPORTANT: if using CTI Butler database locally in arangodb (i.e is not app.ctibutler.com in .env) you need to follow these steps to import the data needed to populate these lookups: https://github.com/muchdogesec/stix2arango/blob/main/utilities/arango_cti_processor/README.md (use `--database ctibutler_database` in the s2a script or change it in this script)
2
+
1
3
  import os
2
4
  from arango import ArangoClient
3
5
 
txt2stix/retriever.py CHANGED
@@ -22,6 +22,15 @@ class STIXObjectRetriever:
22
22
  endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/{attack_id}/")
23
23
  return self._retrieve_objects(endpoint)
24
24
 
25
+ def get_attack_tactics(self, matrix):
26
+ endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/?attack_type=Tactic")
27
+ tactics = self._retrieve_objects(endpoint)
28
+ retval = {}
29
+ for tac in tactics:
30
+ retval[tac['x_mitre_shortname']] = tac
31
+ retval[tac['external_references'][0]['external_id']] = tac
32
+ return retval
33
+
25
34
  def get_attack_objects(self, matrix, attack_ids):
26
35
  endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/?attack_id={','.join(attack_ids)}")
27
36
  return self._retrieve_objects(endpoint)
txt2stix/txt2stix.py CHANGED
@@ -1,4 +1,6 @@
1
1
  import argparse, dotenv
2
+ import contextlib
3
+ import shutil
2
4
  from datetime import datetime
3
5
  import glob
4
6
  import uuid
@@ -11,7 +13,7 @@ import sys, os
11
13
  from pydantic import BaseModel
12
14
 
13
15
  from txt2stix.ai_extractor.utils import DescribesIncident
14
- from txt2stix.attack_flow import parse_flow
16
+ from txt2stix import attack_flow
15
17
 
16
18
 
17
19
  from .utils import RELATIONSHIP_TYPES, Txt2StixData, remove_links
@@ -244,6 +246,14 @@ def parse_args():
244
246
  help="create attack flow for attack objects in report/bundle",
245
247
  )
246
248
 
249
+ anav_arg = parser.add_argument(
250
+ "--ai_create_attack_navigator_layer",
251
+ default=False,
252
+ action="store_true",
253
+ help="create attack flow for attack objects in report/bundle",
254
+ )
255
+
256
+
247
257
  args = parser.parse_args()
248
258
  if not args.input_file.exists():
249
259
  raise argparse.ArgumentError(inf_arg, "cannot open file")
@@ -258,7 +268,11 @@ def parse_args():
258
268
 
259
269
  if args.ai_create_attack_flow and not args.ai_settings_relationships:
260
270
  raise argparse.ArgumentError(
261
- aflow_arg, "--ai_create_attack_flow requires --ai_settings_relationships"
271
+ aflow_arg, "--ai_settings_relationships must be set"
272
+ )
273
+ if args.ai_create_attack_navigator_layer and not args.ai_settings_relationships:
274
+ raise argparse.ArgumentError(
275
+ anav_arg, "--ai_settings_relationships must be set"
262
276
  )
263
277
  #### process --use-extractions
264
278
  if args.use_extractions.get("ai") and not args.ai_settings_extractions:
@@ -352,6 +366,7 @@ def _count_token(extractor: BaseAIExtractor, input: str):
352
366
  def run_txt2stix(bundler: txt2stixBundler, preprocessed_text: str, extractors_map: dict,
353
367
  ai_content_check_provider=None,
354
368
  ai_create_attack_flow=None,
369
+ ai_create_attack_navigator_layer=None,
355
370
  input_token_limit=10,
356
371
  ai_settings_extractions=None,
357
372
  ai_settings_relationships=None,
@@ -385,15 +400,13 @@ def run_txt2stix(bundler: txt2stixBundler, preprocessed_text: str, extractors_ma
385
400
  retval.extractions = extract_all(bundler, extractors_map, preprocessed_text, ai_extractors=ai_settings_extractions, ignore_extraction_boundary=ignore_extraction_boundary)
386
401
  if relationship_mode == "ai" and sum(map(lambda x: len(x), retval.extractions.values())):
387
402
  retval.relationships = extract_relationships_with_ai(bundler, preprocessed_text, retval.extractions, ai_settings_relationships)
388
-
389
- if ai_create_attack_flow:
390
- logging.info("creating attack-flow bundle")
391
- ex: BaseAIExtractor = ai_settings_relationships
392
- retval.attack_flow = ex.extract_attack_flow(preprocessed_text, retval.extractions, retval.relationships)
393
- bundler.flow_objects = parse_flow(bundler.report, retval.attack_flow)
394
-
403
+
404
+ if ai_create_attack_flow or ai_create_attack_navigator_layer:
405
+ retval.attack_flow, retval.navigator_layer = attack_flow.extract_attack_flow_and_navigator(bundler, preprocessed_text, ai_create_attack_flow, ai_create_attack_navigator_layer, ai_settings_relationships)
395
406
  return retval
396
407
 
408
+
409
+
397
410
  def main():
398
411
  dotenv.load_dotenv()
399
412
  logger = newLogger("txt2stix")
@@ -420,13 +433,20 @@ def main():
420
433
 
421
434
  ## write outputs
422
435
  out = bundler.to_json()
423
- output_path = Path("./output")/f"{bundler.bundle.id}.json"
424
- output_path.parent.mkdir(exist_ok=True)
436
+ output_dir = Path("./output")/str(job_id)
437
+ with contextlib.suppress(BaseException):
438
+ shutil.rmtree(output_dir)
439
+ output_dir.mkdir(exist_ok=True, parents=True)
440
+ output_path = output_dir/f"{bundler.bundle.id}.json"
425
441
  output_path.write_text(out)
426
442
  logger.info(f"Wrote bundle output to `{output_path}`")
427
- data_path = Path(str(output_path).replace('bundle--', 'data--'))
443
+ data_path = output_dir/"data.json"
428
444
  data_path.write_text(data.model_dump_json(indent=4))
429
445
  logger.info(f"Wrote data output to `{data_path}`")
446
+ for nav_layer in data.navigator_layer or []:
447
+ nav_path = output_dir/f"navigator-{nav_layer['domain']}.json"
448
+ nav_path.write_text(json.dumps(nav_layer, indent=4))
449
+ logger.info(f"Wrote navigator output to `{nav_path}`")
430
450
  except argparse.ArgumentError as e:
431
451
  logger.exception(e, exc_info=True)
432
452
  except:
txt2stix/utils.py CHANGED
@@ -51,6 +51,7 @@ class Txt2StixData(BaseModel):
51
51
  extractions: dict = Field(default=None)
52
52
  relationships: list[dict] = Field(default_factory=list)
53
53
  attack_flow: AttackFlowList = Field(default=None)
54
+ navigator_layer: list = Field(default=None)
54
55
 
55
56
 
56
57
  def remove_links(input_text: str, remove_images: bool, remove_anchors: bool):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2stix
3
- Version: 1.0.1.post3
3
+ Version: 1.0.2
4
4
  Summary: txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/txt2stix
6
6
  Project-URL: Issues, https://github.com/muchdogesec/txt2stix/issues
@@ -171,8 +171,9 @@ If any AI extractions, or AI relationship mode is set, you must set the followin
171
171
  #### Other AI related settings
172
172
 
173
173
  * `--ai_content_check_provider` (`model:provider`, required if passed): Passing this flag will get the AI to try and classify the text in the input to 1) determine if it is talking about threat intelligence, and 2) what type of threat intelligence it is talking about. For context, we use this to filter out non-threat intel posts in Obstracts and Stixify. You pass `provider:model` with this flag to determine the AI model you wish to use to perform the check. It will also create a summary of the content passed (and store this into a STIX Note).
174
- * `--ai_extract_if_no_incidence` (boolean, default `true`) if content check decides the report is not related to cyber security intelligence (e.g. vendor marketing), then you can use this setting to decide wether or not script should proceed. Setting to `false` will stop processing. It is designed to save AI tokens processing unknown content at scale in an automated way.
175
- * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
174
+ * `--ai_extract_if_no_incidence` (boolean, default `true`, will only work if `ai_content_check_provider` set) if content check decides the report is not related to cyber security intelligence (e.g. vendor marketing), then you can use this setting to decide wether or not script should proceed. Setting to `false` will stop processing. It is designed to save AI tokens processing unknown content at scale in an automated way.
175
+ * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_settings_relationships`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK extractions to define the logical order in which they are being described. You must pass `--ai_settings_relationships` for this to work.
176
+ * `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate [MITRE ATT&CK Navigator layers](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK extractions. For each ATT&CK domain (Enterprise, ICS, Mobile) txt2stix will generate a layer. You must pass `--ai_settings_relationships` for this to work because the AI is tasked with linking extracted Techniques to the correct Tactic. Known issues with `openai:gpt-3.5` (avoid using this model if possible when using ATT&CK Navigator).
176
177
 
177
178
  ## Adding new extractions
178
179
 
@@ -1,23 +1,23 @@
1
1
  txt2stix/__init__.py,sha256=Sm_VT913IFuAZ6dJEdVz3baPwC5VYtHySVfBAOUG92w,803
2
- txt2stix/attack_flow.py,sha256=WWlukuQYrGW1SJ1DnhfROYC5Ck4WYqNifgmtiuyDg7E,4177
3
- txt2stix/bundler.py,sha256=EVTcVgZyVMwb6XjNQ3Gyj7zm44UErXo9wbVr2JGsjQQ,16797
2
+ txt2stix/attack_flow.py,sha256=DLDaNXB_gxuqdEb_A1VQO_nu69MG23nolTx7-JESrKI,7889
3
+ txt2stix/bundler.py,sha256=kqUNW9_jktuMyWSkoAa-ydZY-L5gzSSkthb7OdhUiKo,16854
4
4
  txt2stix/common.py,sha256=ISnGNKqJPE1EcfhL-x_4G18mcwt1urmorkW-ru9kV-0,585
5
5
  txt2stix/extractions.py,sha256=_tlsqYHhfAoV-PJzxRHysrX47uxCsMlSg7PQWxww1u0,2171
6
6
  txt2stix/indicator.py,sha256=c6S0xx0K8JM-PT_Qd1PlN_ZlDXdnEwiRS8529iUp3yg,30774
7
7
  txt2stix/lookups.py,sha256=h42YVtYUkWZm6ZPv2h5hHDHDzDs3yBqrT_T7pj2MDZI,2301
8
- txt2stix/retriever.py,sha256=zU8L00RSh9N5J0NpAo3CM3IHsuZsNVjJGohRisXcMRs,5167
8
+ txt2stix/retriever.py,sha256=auKlk6JlRE9en-oiQ5KICMW0IwmU8R558o0K5UmEQZc,5550
9
9
  txt2stix/stix.py,sha256=9nXD9a2dCY4uaatl-mlIA1k3srwQBhGW-tUSho3iYe0,30
10
- txt2stix/txt2stix.py,sha256=RzGmzkIViEHO45GsxStcz5nbE0ynhifVeJpTalFSlZc,17405
11
- txt2stix/utils.py,sha256=P66yq-SphsQu2S9At6BfYpavfghXsZqh4h6W13HUEoI,3256
10
+ txt2stix/txt2stix.py,sha256=HYXN9dKzakoqdqJ4wSthwGdFIxOm6KTegiQlVmfp0eQ,18169
11
+ txt2stix/utils.py,sha256=n6mh4t9ZRJ7iT4Jvp9ai_dfCXjgXNcRtF_zXO7nkpnk,3304
12
12
  txt2stix/ai_extractor/__init__.py,sha256=5Tf6Co9THzytBdFEVhD-7vvT05TT3nSpltnAV1sfdoM,349
13
13
  txt2stix/ai_extractor/anthropic.py,sha256=mdz-8CB-BSCEqnK5l35DRZURVPUf508ef2b48XMxmuk,441
14
- txt2stix/ai_extractor/base.py,sha256=MAtnKvWUmWZgnzwDM0i2n-WrRWq69du4KVcapNMIsEg,3523
14
+ txt2stix/ai_extractor/base.py,sha256=mHu6xtWu78aDHnb2ePXR0UCBbROS-jH0kPRgQxfIwhI,3685
15
15
  txt2stix/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
16
16
  txt2stix/ai_extractor/gemini.py,sha256=yJC7knYzl-TScyCBd-MTpUf-NT6znC25E7vXxNMqjLU,578
17
17
  txt2stix/ai_extractor/openai.py,sha256=DtllzeVhZw1231hj35vn1U8V2MMzm8wM7mqKLBkxazQ,489
18
18
  txt2stix/ai_extractor/openrouter.py,sha256=hAA6mTOMcpA28XYsOCvuJH7WMJqXCxfqZGJf_VrDsIk,628
19
- txt2stix/ai_extractor/prompts.py,sha256=3PewwmNptHEvsG1r1Yk5rs3oaEX5Jf3BRFyGaHru6r0,8137
20
- txt2stix/ai_extractor/utils.py,sha256=mnGIDiDa8ecwyRqDuYcKBIOiXfeQsivKxe93CfGW660,4440
19
+ txt2stix/ai_extractor/prompts.py,sha256=NtqtVyPPtShPlVZ5SrFmo-LCkfpANIIi4H9rjqaxqDo,10559
20
+ txt2stix/ai_extractor/utils.py,sha256=xPVtp_lI7254MvkXPt9YY_Vter0uiPLKMGcv5poXVKs,4763
21
21
  txt2stix/pattern/__init__.py,sha256=K9ofaP2AOikvzb48VSBpJZijckdqufZxSzr_kbRypLY,491
22
22
  txt2stix/pattern/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  txt2stix/pattern/extractors/base_extractor.py,sha256=ly80rp-L40g7DbhrGiCvhPWI95-ZFMtAQUEC-fH6Y-o,6130
@@ -81,7 +81,7 @@ txt2stix/includes/helpers/stix_relationship_types.txt,sha256=PQytANVSrWepdK_SLEZ
81
81
  txt2stix/includes/helpers/tlds.txt,sha256=Va_awj-FQiKgs5ace6C0kC5xxAHIl9yAIBhvT08Q7Q0,9551
82
82
  txt2stix/includes/helpers/windows_registry_key_prefix.txt,sha256=J5gU4FsqmOVYt6cVRgwCG7odYEWk-UPLpuCiDwpzBfg,145
83
83
  txt2stix/includes/lookups/_README.md,sha256=OGkyqCcqAOPI-JLE81zAmyg4sHW5apJNhDFcvHUW1nc,338
84
- txt2stix/includes/lookups/_generate_lookups.py,sha256=MEVIy1Xb67bdBOoTJ3BKBf8xpQHYHSOGyQOKH0Ru0bU,9025
84
+ txt2stix/includes/lookups/_generate_lookups.py,sha256=ex12zxiFnKYHgsXfcDX4OL-KyrjAXSlvzeYVUzUD2lE,9390
85
85
  txt2stix/includes/lookups/attack_pattern.txt,sha256=4ARDLG-cwUqk6_TO_JAY6hNJg6KRbAaIr-Or5nML6io,15
86
86
  txt2stix/includes/lookups/campaign.txt,sha256=N66XO0H3Rx-3Tvo7wwHDouckIT0tGlGVyCDKxDs1KnM,11
87
87
  txt2stix/includes/lookups/country_iso3166_alpha2.txt,sha256=LMM7j50NoBv7BlK64mpmE3Dbef9_tNBUNbuTXOEIvCo,746
@@ -112,8 +112,8 @@ txt2stix/includes/lookups/threat_actor.txt,sha256=QfDO9maQuqKBgW_Sdd7VGv1SHZ9Ra-
112
112
  txt2stix/includes/lookups/tld.txt,sha256=-MEgJea2NMG_KDsnc4BVvI8eRk5Dm93L-t8SGYx5wMo,8598
113
113
  txt2stix/includes/lookups/tool.txt,sha256=HGKG6JpUE26w6ezzSxOjBkp15UpSaB7N-mZ_NU_3G7A,6
114
114
  txt2stix/includes/tests/test_cases.yaml,sha256=QD1FdIunpPkOpsn6wJRqs2vil_hv8OSVaqUp4a96aZg,22247
115
- txt2stix-1.0.1.post3.dist-info/METADATA,sha256=6rWnpDd2GTg0rjflOVndvCsbEISnV4rxhRj21as6SqI,14289
116
- txt2stix-1.0.1.post3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
117
- txt2stix-1.0.1.post3.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
118
- txt2stix-1.0.1.post3.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
119
- txt2stix-1.0.1.post3.dist-info/RECORD,,
115
+ txt2stix-1.0.2.dist-info/METADATA,sha256=-qdpBMRkkfhkAvFlAK8ya9Dj8ZYnXH0rt-NJSH8bqnw,14887
116
+ txt2stix-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
117
+ txt2stix-1.0.2.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
118
+ txt2stix-1.0.2.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
119
+ txt2stix-1.0.2.dist-info/RECORD,,