PyPI - txt2detection - Versions diffs - 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

txt2detection 1.0.8py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of txt2detection might be problematic. Click here for more details.

Files changed (18) hide show

txt2detection/__main__.py +219 -68
txt2detection/ai_extractor/base.py +41 -13
txt2detection/ai_extractor/models.py +34 -0
txt2detection/ai_extractor/openai.py +1 -3
txt2detection/ai_extractor/openrouter.py +4 -4
txt2detection/ai_extractor/prompts.py +130 -3
txt2detection/attack_flow.py +233 -0
txt2detection/bundler.py +165 -91
txt2detection/credential_checker.py +11 -9
txt2detection/models.py +11 -0
txt2detection/observables.py +0 -1
txt2detection/utils.py +24 -12
{txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/METADATA +6 -8
txt2detection-1.0.9.dist-info/RECORD +24 -0
txt2detection-1.0.8.dist-info/RECORD +0 -22
{txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/WHEEL +0 -0
{txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/entry_points.txt +0 -0
{txt2detection-1.0.8.dist-info → txt2detection-1.0.9.dist-info}/licenses/LICENSE +0 -0

txt2detection/ai_extractor/prompts.py CHANGED Viewed

@@ -1,10 +1,8 @@
 from llama_index.core import PromptTemplate, ChatPromptTemplate
 import textwrap
 from llama_index.core.base.llms.types import ChatMessage, MessageRole
 SIEMRULES_PROMPT = ChatPromptTemplate([
     ChatMessage.from_str("""
 **Persona:**
@@ -113,4 +111,133 @@ Make sure your response follows this format and adheres to the rules above.
 """),
     ChatMessage.from_str("Taking the entire input of my next message, analyze and return appropriate response", MessageRole.USER),
     ChatMessage.from_str("{document}", MessageRole.USER),
-])
+])
+ATTACK_FLOW_PROMPT_TEMPL = ChatPromptTemplate(
+    [
+        ChatMessage.from_str(
+            """You are a cybersecurity threat intelligence analyst.
+Your task is to analyze structured cybersecurity incident reports (e.g., malware analysis, APTs, data breaches, vulnerabilities) and extract and organize MITRE ATT&CK techniques as part of an attack flow analysis. This analysis helps defenders understand adversary behavior using the MITRE Attack Flow model maintained by the MITRE Center for Threat-Informed Defense.""",
+            MessageRole.SYSTEM,
+        ),
+        ChatMessage.from_str(
+            "Hello. Please provide the document for analysis. Only include the full document text in your response.",
+            MessageRole.ASSISTANT,
+        ),
+        ChatMessage.from_str("{document}", MessageRole.USER),
+        ChatMessage.from_str(
+            "What ATT&CK techniques and related metadata were extracted from this document?",
+            MessageRole.ASSISTANT,
+        ),
+        ChatMessage.from_str(
+            "<extracted_techniques>\n\n{extracted_techniques}\n\n</extracted_techniques>",
+            MessageRole.USER,
+        ),
+        ChatMessage.from_str(
+            "Let's begin with tactic selection. What should I do with the techniques and possible tactics?",
+            MessageRole.ASSISTANT,
+        ),
+        # PART 1: Tactic Selection Phase
+        ChatMessage.from_str(
+            """
+PART 1: TACTIC SELECTION
+For each of the technique in `<extracted_techniques>`, return [technique_id, tactic_name], where
+- technique id = `technique.id`
+- tactic_name = choice from `technique.possible_tactics`, where choice is selected based on the **most contextually appropriate** tactic name for each technique based on how it's used in the document.
+📌 Output only the tactic assignments in this format:
+<code>
+{
+  "tactic_selection": [
+    ["Txxxx", "impact"],
+    ["Tyyyy", "discovery"],
+    ...
+  ]
+}
+</code>
+⚠️ Constraints:
+- Use **only** the `possible_tactics` provided with each technique.
+- Do **not** invent or infer any technique or tactic name beyond what’s given in <extracted_techniques>.
+- Ensure **every** technique in `<extracted_techniques>` appears in `tactic_selection`, even if uncertain — choose the best fit.
+- Technique IDs in `tactic_selection` must match exactly from <extracted_techniques> (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
+- Must include every technique in `<extracted_techniques>`
+""",
+            MessageRole.USER,
+        ),
+        ChatMessage.from_str(
+            "Thanks. Now let's continue with the attack flow. How should I proceed?",
+            MessageRole.ASSISTANT,
+        ),
+        # PART 2: Attack Flow Construction Phase
+        ChatMessage.from_str(
+            """
+PART 2: ATTACK FLOW CONSTRUCTION
+Using the `<extracted_techniques>` and the incident details in the document, construct a sequence of MITRE ATT&CK techniques that represent the adversary’s logical progression through the attack.
+For each technique:
+- Use the `technique.id` exactly as provided
+- Assign:
+  - `name`: a short, context-based phrase describing how the technique is used
+  - `description`: a longer explanation of how the technique operates in this specific incident, based only on the document
+  - `position`: the step in the logical or chronological attack sequence (starting at 0)
+⚠️ Constraints:
+- Use **only** technique IDs provided in `<extracted_techniques>` — do **not** invent or infer new ones
+- Ensure all included technique IDs exactly match `technique.id` from `<extracted_techniques>` (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
+📤 Output Format:
+<code>
+{
+  "items": [
+    {
+      "position": 0,
+      "attack_technique_id": "Txxxx",
+      "name": "Short contextual name",
+      "description": "Detailed contextual explanation"
+    },
+    ...
+  ],
+  "success": true
+}
+</code>
+Your goal is to tell the story of how the adversary moved through the attack using the extracted ATT&CK techniques, in the correct sequence, with clear context for defenders.
+""",
+            MessageRole.USER,
+        ),
+        # PART 3: Combination phase
+        ChatMessage.from_str(
+            """
+📤 Final Output Format:
+<code>
+{
+  "tactic_selection": [...],  // Use your previous output
+  "items": [
+    {
+      "position": 0,
+      "attack_technique_id": "Txxxx",
+      "name": "Short contextual name",
+      "description": "Detailed contextual explanation"
+    },
+    ...
+  ],
+  "success": true
+}
+</code>
+⚠️ Constraints:
+- All `attack_technique_id` values in `items` must come from `<extracted_techniques>`
+- The `position` field should reflect the **chronological or logical** execution order of the attack
+- Do **not** introduce new technique IDs
+✅ Your goal is to build a realistic, document-based attack flow using MITRE ATT&CK technique–tactic pairs.
+""",
+            MessageRole.USER,
+        ),
+    ]
+)

txt2detection/attack_flow.py ADDED Viewed

@@ -0,0 +1,233 @@
+import json
+import logging
+import uuid
+from stix2 import Relationship
+from txt2detection.ai_extractor.models import AttackFlowList
+from .bundler import Bundler
+from .ai_extractor.base import BaseAIExtractor
+from .models import UUID_NAMESPACE
+from stix2extensions.attack_action import AttackAction, AttackFlow
+from stix2extensions._extensions import attack_flow_ExtensionDefinitionSMO
+def parse_flow(report, flow: AttackFlowList, techniques, tactics):
+    logging.info(f"flow.success = {flow.success}")
+    if not flow.success:
+        return []
+    objects = [report, attack_flow_ExtensionDefinitionSMO]
+    for domain in ["enterprise-attack", "mobile-attack", "ics-attack"]:
+        flow_objects = parse_domain_flow(report, flow, techniques, tactics, domain)
+        objects.extend(flow_objects)
+    return objects
+def parse_domain_flow(report, flow: AttackFlowList, techniques, tactics, domain):
+    flow_objects = []
+    flow_obj = None
+    last_action = None
+    for i, item in enumerate(flow.items):
+        try:
+            technique = techniques[item.attack_technique_id]
+            if technique["domain"] != domain:
+                continue
+            tactic_id = technique["possible_tactics"][
+                flow.tactic_mapping[item.attack_technique_id]
+            ]
+            technique_obj = technique["stix_obj"]
+            tactic_obj = tactics[tactic_id]
+            action_obj = AttackAction(
+                **{
+                    "id": flow_id(report["id"], item.attack_technique_id, tactic_id),
+                    "effect_refs": [f"attack-action--{str(uuid.uuid4())}"],
+                    "technique_id": item.attack_technique_id,
+                    "technique_ref": technique_obj["id"],
+                    "tactic_id": tactic_id,
+                    "tactic_ref": tactic_obj["id"],
+                    "name": item.name,
+                    "description": item.description,
+                },
+                allow_custom=True,
+            )
+            action_obj.effect_refs.clear()
+            if not flow_obj:
+                flow_obj = {
+                    "type": "attack-flow",
+                    "id": "attack-flow--"
+                    + str(
+                        uuid.uuid5(UUID_NAMESPACE, f"attack-flow+{domain}+{report.id}")
+                    ),
+                    "spec_version": "2.1",
+                    "created": report.created,
+                    "modified": report.modified,
+                    "created_by_ref": report.created_by_ref,
+                    "start_refs": [action_obj["id"]],
+                    "name": f"[{domain.split('-')[0].upper()}] {report.name}",
+                    "description": report.description,
+                    "scope": "malware",
+                    "external_references": report.external_references,
+                    "object_marking_refs": report.object_marking_refs,
+                }
+                flow_objects.append(AttackFlow(**flow_obj))
+                flow_objects.append(
+                    Relationship(
+                        type="relationship",
+                        spec_version="2.1",
+                        id="relationship--"
+                        + str(
+                            uuid.uuid5(
+                                UUID_NAMESPACE,
+                                f"attack-flow+{report.id}+{flow_obj['id']}",
+                            )
+                        ),
+                        created_by_ref=report.created_by_ref,
+                        created=report.created,
+                        modified=report.modified,
+                        relationship_type="attack-flow",
+                        description=f"Attack Flow for {report.name}",
+                        source_ref=report.id,
+                        target_ref=flow_obj["id"],
+                        external_references=report.external_references,
+                        object_marking_refs=report.object_marking_refs,
+                    )
+                )
+            else:
+                last_action["effect_refs"].append(action_obj["id"])
+            flow_objects.append(tactic_obj)
+            flow_objects.append(technique_obj)
+            flow_objects.append(action_obj)
+            last_action = action_obj
+        except Exception as e:
+            if flow_objects == 2:
+                logging.exception("FATAL: create attack flow object failed")
+                return []
+            logging.debug("create attack-action failed", exc_info=True)
+            raise
+    return flow_objects
+def flow_id(report_id, technique_id, tactic_id):
+    return "attack-action--" + str(
+        uuid.uuid5(
+            uuid.UUID(report_id.split("--")[-1]),
+            f"{report_id}+{technique_id}+{tactic_id}",
+        )
+    )
+def get_techniques_from_extracted_objects(objects: dict, tactics: dict):
+    techniques = {}
+    for obj in objects:
+        if (
+            obj["type"] == "attack-pattern"
+            and obj.get("external_references", [{"source_name": None}])[0][
+                "source_name"
+            ]
+            == "mitre-attack"
+        ):
+            domain = obj["x_mitre_domains"][0]
+            technique = dict(
+                domain=domain,
+                name=obj["name"],
+                possible_tactics={},
+                id=obj["external_references"][0]["external_id"],
+                platforms=[
+                    platform
+                    for platform in obj["x_mitre_platforms"]
+                    if platform != "None"
+                ],
+                stix_obj=obj,
+            )
+            for phase in obj["kill_chain_phases"]:
+                if not set(phase["kill_chain_name"].split("-")).issuperset(
+                    ["mitre", "attack"]
+                ):
+                    continue
+                tactic_name = phase["phase_name"]
+                tactic_obj = tactics[tactic_name]
+                tactic_id = tactic_obj["external_references"][0]["external_id"]
+                technique["possible_tactics"][tactic_name] = tactic_id
+            techniques[technique["id"]] = technique
+    return techniques
+def create_navigator_layer(report, flow: AttackFlowList, techniques, tactics):
+    domains = {}
+    comments = {item.attack_technique_id: item.description for item in flow.items}
+    for technique in techniques.values():
+        domain_techniques = domains.setdefault(technique["domain"], [])
+        technique_id = technique["id"]
+        if technique_id not in flow.tactic_mapping:
+            continue
+        technique_item = dict(
+            techniqueID=technique_id,
+            tactic=flow.tactic_mapping[technique_id],
+            score=100,
+            showSubtechniques=True,
+        )
+        if comment := comments.get(technique_id):
+            technique_item["comment"] = comment
+        domain_techniques.append(technique_item)
+    retval = []
+    for domain, domain_techniques in domains.items():
+        retval.append(
+            {
+                "versions": {
+                    "layer": "4.5",
+                    "attack": tactics["version"],
+                    "navigator": "5.1.0",
+                },
+                "name": report.name,
+                "domain": domain,
+                "techniques": domain_techniques,
+                "gradient": {
+                    "colors": ["#ffffff", "#ff6666"],
+                    "minValue": 0,
+                    "maxValue": 100,
+                },
+                "legendItems": [],
+                "metadata": [{"name": "report_id", "value": report.id}],
+                "links": [
+                    {
+                        "label": "Generated using txt2detection",
+                        "url": "https://github.com/muchdogesec/txt2detection/",
+                    }
+                ],
+                "layout": {"layout": "side"},
+            }
+        )
+    return retval
+def extract_attack_flow_and_navigator(
+    bundler: Bundler,
+    preprocessed_text,
+    ai_create_attack_flow,
+    ai_create_attack_navigator_layer,
+    ai_settings_relationships,
+):
+    ex: BaseAIExtractor = ai_settings_relationships
+    tactics = bundler.get_attack_tactics()
+    techniques = get_techniques_from_extracted_objects(bundler.bundle.objects, tactics)
+    if not techniques:
+        return None, None
+    logged_techniques = [
+        {k: v for k, v in t.items() if k != "stix_obj"} for t in techniques.values()
+    ]
+    logging.debug(f"parsed techniques: {logged_techniques}")
+    flow = ex.extract_attack_flow(preprocessed_text, techniques)
+    navigator = None
+    if ai_create_attack_flow:
+        logging.info("creating attack-flow bundle")
+        bundler.flow_objects = parse_flow(bundler.report, flow, techniques, tactics)
+    if ai_create_attack_navigator_layer:
+        navigator = create_navigator_layer(bundler.report, flow, techniques, tactics)
+    return flow, navigator

txt2detection 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl

Potentially problematic release.

txt2detection 1.0.8py3-none-any.whl → 1.0.9py3-none-any.whl