PyPI - txt2detection - Versions diffs - 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl - Mend

txt2detection 1.0.10py3-none-any.whl → 1.0.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of txt2detection might be problematic. Click here for more details.

Files changed (18) hide show

txt2detection/__main__.py +31 -36
txt2detection/ai_extractor/base.py +0 -23
txt2detection/ai_extractor/deepseek.py +5 -4
txt2detection/ai_extractor/prompts.py +7 -129
txt2detection/ai_extractor/utils.py +1 -1
txt2detection/attack_navigator.py +66 -0
txt2detection/bundler.py +66 -36
txt2detection/models.py +6 -7
txt2detection/observables.py +0 -24
txt2detection/utils.py +5 -0
{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/METADATA +3 -5
txt2detection-1.0.12.dist-info/RECORD +23 -0
txt2detection/ai_extractor/models.py +0 -34
txt2detection/attack_flow.py +0 -231
txt2detection-1.0.10.dist-info/RECORD +0 -24
{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/WHEEL +0 -0
{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/entry_points.txt +0 -0
{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/licenses/LICENSE +0 -0

txt2detection/__main__.py CHANGED Viewed

@@ -10,10 +10,11 @@ import logging
 import re
 import sys
 import uuid
+from pydantic import ValidationError
 from stix2 import Identity
 import yaml
-from txt2detection import attack_flow, credential_checker
+from txt2detection import credential_checker
 from txt2detection.ai_extractor.base import BaseAIExtractor
 from txt2detection.models import (
     TAG_PATTERN,
@@ -185,17 +186,11 @@ def parse_args():
             choices=valid_licenses(),
         )
         mode_parser.add_argument(
-            "--ai_create_attack_navigator_layer",
+            "--create_attack_navigator_layer",
             help="Create navigator layer",
             action="store_true",
             default=False,
         )
-        mode_parser.add_argument(
-            "--ai_create_attack_flow",
-            help="Create attack flow",
-            action="store_true",
-            default=False,
-        )
     file.add_argument(
         "--input_file",
@@ -228,11 +223,6 @@ def parse_args():
     if args.mode != "sigma":
         assert args.ai_provider, "--ai_provider is required in file or txt mode"
-    if args.ai_create_attack_navigator_layer or args.ai_create_attack_flow:
-        assert (
-            args.ai_provider
-        ), "--ai_provider is required when --ai_create_attack_navigator_layer/--ai_create_attack_flow is passed"
     if args.mode == "file":
         args.input_text = args.input_file
@@ -253,21 +243,16 @@ def run_txt2detection(
     labels: list[str],
     report_id: str | uuid.UUID,
     ai_provider: BaseAIExtractor,
-    ai_create_attack_flow=False,
-    ai_create_attack_navigator_layer=False,
+    create_attack_navigator_layer=False,
     **kwargs,
 ) -> Bundler:
-    if (
-        kwargs.get("sigma_file") != "sigma_file"
-        or ai_create_attack_flow
-        or ai_create_attack_navigator_layer
-    ):
+    if not kwargs.get("sigma_file"):
         validate_token_count(
             int(os.getenv("INPUT_TOKEN_LIMIT", 0)), input_text, ai_provider
         )
     if sigma := kwargs.get("sigma_file"):
-        detection = get_sigma_detections(sigma)
+        detection = get_sigma_detections(sigma, name=name)
         if not identity and detection.author:
             identity = make_identity(detection.author)
         kwargs.update(
@@ -303,22 +288,19 @@ def run_txt2detection(
         )
         detections = ai_provider.get_detections(input_text)
     bundler.bundle_detections(detections)
-    if ai_create_attack_flow or ai_create_attack_navigator_layer:
-        bundler.data.attack_flow, bundler.data.navigator_layer = (
-            attack_flow.extract_attack_flow_and_navigator(
-                bundler,
-                bundler.report.description,
-                ai_create_attack_flow,
-                ai_create_attack_navigator_layer,
-                ai_provider,
-            )
-        )
+    if create_attack_navigator_layer:
+        bundler.create_attack_navigator()
     return bundler
-def get_sigma_detections(sigma: str) -> SigmaRuleDetection:
+def get_sigma_detections(sigma: str, name=None) -> SigmaRuleDetection:
     obj = yaml.safe_load(io.StringIO(sigma))
+    if not isinstance(obj, dict):
+        raise ValueError(
+            f"bad sigma input file. expected object/dict, got {type(obj)}."
+        )
+    if name:
+        obj["title"] = name
     return SigmaRuleDetection.model_validate(obj)
@@ -328,7 +310,14 @@ def main(args: Args):
     logging.info(f"starting argument: {json.dumps(sys.argv[1:])}")
     kwargs = args.__dict__
     kwargs["identity"] = args.use_identity
-    bundler = run_txt2detection(**kwargs)
+    try:
+        bundler = run_txt2detection(**kwargs)
+    except (ValidationError, ValueError) as e:
+        logging.error(f"Validate sigma file failed: {str(e)}")
+        if isinstance(e, ValidationError):
+            full_error = e.json(indent=4)
+            logging.debug(f"Validate sigma file failed: {full_error}", exc_info=True)
+        sys.exit(19)
     output_dir = Path("./output") / str(bundler.bundle.id)
     shutil.rmtree(output_dir, ignore_errors=True)
@@ -342,6 +331,12 @@ def main(args: Args):
     for obj in bundler.bundle["objects"]:
         if obj["type"] != "indicator" or obj["pattern_type"] != "sigma":
             continue
-        name = obj["id"].replace("indicator", "rule") + ".yml"
-        (rules_dir / name).write_text(obj["pattern"])
+        rule_id: str = obj["id"].replace("indicator--", "")
+        rule_path = rules_dir / ("rule--" + rule_id + ".yml")
+        nav_path = rules_dir / f"attack-enterprise-navigator-layer-rule--{rule_id}.json"
+        rule_path.write_text(obj["pattern"])
+        if rule_nav := (
+            bundler.data.navigator_layer and bundler.data.navigator_layer.get(rule_id)
+        ):
+            nav_path.write_text(json.dumps(rule_nav, indent=4))
     logging.info(f"Writing bundle output to `{output_path}`")

txt2detection/ai_extractor/base.py CHANGED Viewed

@@ -7,7 +7,6 @@ from llama_index.core.llms.llm import LLM
 from txt2detection.ai_extractor import prompts
-from txt2detection.ai_extractor.models import AttackFlowList
 from txt2detection.ai_extractor.utils import ParserWithLogging
 from txt2detection.models import DetectionContainer, DetectionContainer
 from llama_index.core.utils import get_tokenizer
@@ -62,28 +61,6 @@ class BaseAIExtractor:
     def extractor_name(self):
         return f"{self.provider}:{self.llm.model}"
-    def _get_attack_flow_program(self):
-        return LLMTextCompletionProgram.from_defaults(
-            output_parser=ParserWithLogging(AttackFlowList),
-            prompt=prompts.ATTACK_FLOW_PROMPT_TEMPL,
-            verbose=True,
-            llm=self.llm,
-        )
-    def extract_attack_flow(self, input_text, techniques) -> AttackFlowList:
-        extracted_techniques = []
-        for t in techniques.values():
-            extracted_techniques.append(
-                dict(
-                    id=t["id"],
-                    name=t["name"],
-                    possible_tactics=list(t["possible_tactics"].keys()),
-                )
-            )
-        return self._get_attack_flow_program()(
-            document=input_text, extracted_techniques=extracted_techniques
-        )
     def check_credential(self):
         try:
             return "authorized" if self._check_credential() else "unauthorized"

txt2detection/ai_extractor/deepseek.py CHANGED Viewed

@@ -4,10 +4,11 @@ import os
 from .base import BaseAIExtractor
 from llama_index.llms.deepseek import DeepSeek
-class DeepseekExtractor(BaseAIExtractor, provider='deepseek'):
+class DeepseekExtractor(BaseAIExtractor, provider="deepseek"):
     def __init__(self, **kwargs) -> None:
-        kwargs.setdefault('temperature', float(os.environ.get('TEMPERATURE', 0.0)))
-        kwargs.setdefault('model', 'deepseek-chat')
+        kwargs.setdefault("temperature", float(os.environ.get("TEMPERATURE", 0.0)))
+        kwargs.setdefault("model", "deepseek-chat")
         self.llm = DeepSeek(system_prompt=self.system_prompt, **kwargs)
         super().__init__()
@@ -16,4 +17,4 @@ class DeepseekExtractor(BaseAIExtractor, provider='deepseek'):
             return len(self.llm._tokenizer.encode(text))
         except Exception as e:
             logging.warning(e)
-            return super().count_tokens(text)
+            return super().count_tokens(text)

txt2detection/ai_extractor/prompts.py CHANGED Viewed

@@ -3,8 +3,10 @@ import textwrap
 from llama_index.core.base.llms.types import ChatMessage, MessageRole
-SIEMRULES_PROMPT = ChatPromptTemplate([
-    ChatMessage.from_str("""
+SIEMRULES_PROMPT = ChatPromptTemplate(
+    [
+        ChatMessage.from_str(
+            """
 **Persona:**
 You are an expert in cybersecurity threat detection. Given a structured security report, generate a Sigma rule following the Sigma specification.
@@ -108,136 +110,12 @@ Make sure your response follows this format and adheres to the rules above.
 ## **Additional Instructions**
 - Ensure the `"tags"` field includes relevant ATT&CK and CVE references based on the report content.
 - Return a **valid JSON output** without YAML formatting for seamless processing.
-"""),
-    ChatMessage.from_str("Taking the entire input of my next message, analyze and return appropriate response", MessageRole.USER),
-    ChatMessage.from_str("{document}", MessageRole.USER),
-])
-ATTACK_FLOW_PROMPT_TEMPL = ChatPromptTemplate(
-    [
-        ChatMessage.from_str(
-            """You are a cybersecurity threat intelligence analyst.
-Your task is to analyze structured cybersecurity incident reports (e.g., malware analysis, APTs, data breaches, vulnerabilities) and extract and organize MITRE ATT&CK techniques as part of an attack flow analysis. This analysis helps defenders understand adversary behavior using the MITRE Attack Flow model maintained by the MITRE Center for Threat-Informed Defense.""",
-            MessageRole.SYSTEM,
-        ),
-        ChatMessage.from_str(
-            "Hello. Please provide the document for analysis. Only include the full document text in your response.",
-            MessageRole.ASSISTANT,
+"""
         ),
-        ChatMessage.from_str("{document}", MessageRole.USER),
         ChatMessage.from_str(
-            "What ATT&CK techniques and related metadata were extracted from this document?",
-            MessageRole.ASSISTANT,
-        ),
-        ChatMessage.from_str(
-            "<extracted_techniques>\n\n{extracted_techniques}\n\n</extracted_techniques>",
-            MessageRole.USER,
-        ),
-        ChatMessage.from_str(
-            "Let's begin with tactic selection. What should I do with the techniques and possible tactics?",
-            MessageRole.ASSISTANT,
-        ),
-        # PART 1: Tactic Selection Phase
-        ChatMessage.from_str(
-            """
-PART 1: TACTIC SELECTION
-For each of the technique in `<extracted_techniques>`, return [technique_id, tactic_name], where
-- technique id = `technique.id`
-- tactic_name = choice from `technique.possible_tactics`, where choice is selected based on the **most contextually appropriate** tactic name for each technique based on how it's used in the document.
-📌 Output only the tactic assignments in this format:
-<code>
-{
-  "tactic_selection": [
-    ["Txxxx", "impact"],
-    ["Tyyyy", "discovery"],
-    ...
-  ]
-}
-</code>
-⚠️ Constraints:
-- Use **only** the `possible_tactics` provided with each technique.
-- Do **not** invent or infer any technique or tactic name beyond what’s given in <extracted_techniques>.
-- Ensure **every** technique in `<extracted_techniques>` appears in `tactic_selection`, even if uncertain — choose the best fit.
-- Technique IDs in `tactic_selection` must match exactly from <extracted_techniques> (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
-- Must include every technique in `<extracted_techniques>`
-""",
-            MessageRole.USER,
-        ),
-        ChatMessage.from_str(
-            "Thanks. Now let's continue with the attack flow. How should I proceed?",
-            MessageRole.ASSISTANT,
-        ),
-        # PART 2: Attack Flow Construction Phase
-        ChatMessage.from_str(
-            """
-PART 2: ATTACK FLOW CONSTRUCTION
-Using the `<extracted_techniques>` and the incident details in the document, construct a sequence of MITRE ATT&CK techniques that represent the adversary’s logical progression through the attack.
-For each technique:
-- Use the `technique.id` exactly as provided
-- Assign:
-  - `name`: a short, context-based phrase describing how the technique is used
-  - `description`: a longer explanation of how the technique operates in this specific incident, based only on the document
-  - `position`: the step in the logical or chronological attack sequence (starting at 0)
-⚠️ Constraints:
-- Use **only** technique IDs provided in `<extracted_techniques>` — do **not** invent or infer new ones
-- Ensure all included technique IDs exactly match `technique.id` from `<extracted_techniques>` (e.g., `T1059` must match `T1059` and not `T1059.005`, `T1001.001` must match `T1001.001` and not `T1001`).
-📤 Output Format:
-<code>
-{
-  "items": [
-    {
-      "position": 0,
-      "attack_technique_id": "Txxxx",
-      "name": "Short contextual name",
-      "description": "Detailed contextual explanation"
-    },
-    ...
-  ],
-  "success": true
-}
-</code>
-Your goal is to tell the story of how the adversary moved through the attack using the extracted ATT&CK techniques, in the correct sequence, with clear context for defenders.
-""",
-            MessageRole.USER,
-        ),
-        # PART 3: Combination phase
-        ChatMessage.from_str(
-            """
-📤 Final Output Format:
-<code>
-{
-  "tactic_selection": [...],  // Use your previous output
-  "items": [
-    {
-      "position": 0,
-      "attack_technique_id": "Txxxx",
-      "name": "Short contextual name",
-      "description": "Detailed contextual explanation"
-    },
-    ...
-  ],
-  "success": true
-}
-</code>
-⚠️ Constraints:
-- All `attack_technique_id` values in `items` must come from `<extracted_techniques>`
-- The `position` field should reflect the **chronological or logical** execution order of the attack
-- Do **not** introduce new technique IDs
-✅ Your goal is to build a realistic, document-based attack flow using MITRE ATT&CK technique–tactic pairs.
-""",
+            "Taking the entire input of my next message, analyze and return appropriate response",
             MessageRole.USER,
         ),
+        ChatMessage.from_str("{document}", MessageRole.USER),
     ]
 )

txt2detection/ai_extractor/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ import typing
 from llama_index.core.output_parsers import PydanticOutputParser
-if typing.TYPE_CHECKING:
+if typing.TYPE_CHECKING:
     from txt2detection.bundler import Bundler

txt2detection/attack_navigator.py ADDED Viewed

@@ -0,0 +1,66 @@
+import typing
+if typing.TYPE_CHECKING:
+    from .bundler import Bundler
+def map_technique_tactic(obj, report_tactics, rule_tactics):
+    """
+    Return first matching tactics in the same rule
+    If no tactic match, try to return from all the tactics in report
+    If none exist, return nothing
+    """
+    technique_name = obj["external_references"][0]["external_id"]
+    tactic_name = None
+    tactic_names = set()
+    for phase in obj["kill_chain_phases"]:
+        if not set(phase["kill_chain_name"].split("-")).issuperset(["mitre", "attack"]):
+            continue
+        tactic_names.add(phase["phase_name"])
+    tactic_obj = None
+    if s := tactic_names.intersection(rule_tactics):
+        tactic_obj = rule_tactics[s.pop()]
+    elif tactic_names.intersection(report_tactics):
+        tactic_obj = report_tactics[s.pop()]
+    if tactic_obj:
+        tactic_name = tactic_obj["external_references"][0]["external_id"]
+    return technique_name, tactic_name
+def create_navigator_layer(report, indicator, technique_mapping, mitre_version):
+    techniques = []
+    for technique_id, tactic in technique_mapping.items():
+        technique_item = dict(
+            techniqueID=technique_id,
+            score=100,
+            showSubtechniques=True,
+        )
+        if tactic:
+            technique_item["tactic"] = tactic
+        techniques.append(technique_item)
+    return {
+        "name": indicator["name"],
+        "domain": "enterprise-attack",
+        "versions": {
+            "layer": "4.5",
+            "attack": mitre_version,
+            "navigator": "5.1.0",
+        },
+        "techniques": techniques,
+        "gradient": {
+            "colors": ["#ffffff", "#ff6666"],
+            "minValue": 0,
+            "maxValue": 100,
+        },
+        "legendItems": [],
+        "metadata": [
+            {"name": "report_id", "value": report.id, "rule_id": indicator["id"]}
+        ],
+        "links": [
+            {
+                "label": "Generated using txt2detection",
+                "url": "https://github.com/muchdogesec/txt2detection/",
+            }
+        ],
+        "layout": {"layout": "side"},
+    }

txt2detection/bundler.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import contextlib
 import enum
+import itertools
 import json
 import logging
 import os
@@ -15,7 +16,7 @@ from stix2 import (
 from stix2.serialization import serialize
 import hashlib
-from txt2detection import attack_flow, observables
+from txt2detection import attack_navigator, observables
 from txt2detection.models import (
     AIDetection,
     BaseDetection,
@@ -30,7 +31,11 @@ import uuid
 from stix2 import parse as parse_stix
 from txt2detection.models import TLP_LEVEL
-from txt2detection.utils import STATUSES, remove_rule_specific_tags
+from txt2detection.utils import (
+    STATUSES,
+    load_stix_object_from_url,
+    remove_rule_specific_tags,
+)
 logger = logging.getLogger("txt2detection.bundler")
@@ -42,7 +47,6 @@ class Bundler:
     uuid = None
     id_map = dict()
     data: DataContainer
-    ATTACK_FLOW_SMO_URL = "https://github.com/muchdogesec/stix2extensions/raw/refs/heads/main/remote-definitions/attack-flow.json"
     # https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
     default_identity = Identity(
         **{
@@ -82,6 +86,10 @@ class Bundler:
         }
     )
+    extension_definition = load_stix_object_from_url(
+        "https://raw.githubusercontent.com/muchdogesec/stix2extensions/refs/heads/main/extension-definitions/properties/indicator-sigma_rule.json"
+    )
     @classmethod
     def generate_report_id(cls, created_by_ref, created, name):
         if not created_by_ref:
@@ -114,6 +122,7 @@ class Bundler:
         self.labels = labels or []
         self.license = license
+        self.all_objects = set()
         self.job_id = f"report--{self.uuid}"
         self.external_refs = (external_refs or []) + [
             dict(
@@ -124,6 +133,8 @@ class Bundler:
             for url in self.reference_urls
         ]
         self.data = DataContainer.model_construct()
+        self.tactics = {}
+        self.techniques = {}
         self.report = Report(
             created_by_ref=self.identity.id,
@@ -148,7 +159,6 @@ class Bundler:
         )
         self.report.object_refs.clear()  # clear object refs
         self.set_defaults()
-        self.all_objects = set()
         if not description:
             self.report.external_references.pop(0)
@@ -159,6 +169,7 @@ class Bundler:
         self.bundle.objects.extend([self.default_marking, self.identity, self.report])
         # add default STIX 2.1 marking definition for txt2detection
         self.report.object_marking_refs.append(self.default_marking.id)
+        self.add_ref(self.extension_definition)
     def add_ref(self, sdo, append_report=False):
         sdo_id = sdo["id"]
@@ -191,7 +202,19 @@ class Bundler:
             "pattern": detection.make_rule(self),
             "valid_from": self.report.created,
             "object_marking_refs": self.report.object_marking_refs,
-            "external_references": self.external_refs + detection.external_references,
+            "external_references": self.external_refs,
+            "extensions": {
+                self.extension_definition["id"]: {
+                    "extension_type": "toplevel-property-extension"
+                }
+            },
+            "x_sigma_type": "base",
+            "x_sigma_level": detection.level,
+            "x_sigma_status": detection.status,
+            "x_sigma_license": detection.license,
+            "x_sigma_fields": detection.fields,
+            "x_sigma_falsepositives": detection.falsepositives,
+            "x_sigma_scope": detection.scope,
         }
         indicator["external_references"].append(
             {
@@ -205,13 +228,19 @@ class Bundler:
         logger.debug("```yaml\n" + indicator["pattern"] + "\n```")
         logger.debug(f" =================== end of rule =================== ")
-        self.data.attacks = dict.fromkeys(detection.mitre_attack_ids, "Not found")
+        self.data.attacks.update(dict.fromkeys(detection.mitre_attack_ids, "Not found"))
+        tactics = self.tactics[detection.id] = {}
+        techniques = self.techniques[detection.id] = []
         for obj in self.get_attack_objects(detection.mitre_attack_ids):
             self.add_ref(obj)
             self.add_relation(indicator, obj)
             self.data.attacks[obj["external_references"][0]["external_id"]] = obj["id"]
+            if obj["type"] == "x-mitre-tactic":
+                tactics[obj["x_mitre_shortname"]] = obj
+            else:
+                techniques.append(obj)
-        self.data.cves = dict.fromkeys(detection.cve_ids, "Not found")
+        self.data.cves.update(dict.fromkeys(detection.cve_ids, "Not found"))
         for obj in self.get_cve_objects(detection.cve_ids):
             self.add_ref(obj)
             self.add_relation(indicator, obj)
@@ -302,24 +331,13 @@ class Bundler:
         return self._get_objects(endpoint, headers)
     @classmethod
-    def get_attack_tactics(cls):
+    def get_attack_version(cls):
         headers = {}
         api_root = os.environ["CTIBUTLER_BASE_URL"] + "/"
         if api_key := os.environ.get("CTIBUTLER_API_KEY"):
             headers["API-KEY"] = api_key
-        endpoint = urljoin(
-            api_root, f"v1/attack-enterprise/objects/?attack_type=Tactic"
-        )
         version_url = urljoin(api_root, f"v1/attack-enterprise/versions/installed/")
-        tactics = cls._get_objects(endpoint, headers=headers)
-        retval = dict(
-            version=requests.get(version_url, headers=headers).json()["latest"]
-        )
-        for tac in tactics:
-            retval[tac["x_mitre_shortname"]] = tac
-            retval[tac["external_references"][0]["external_id"]] = tac
-        return retval
+        return requests.get(version_url, headers=headers).json()["latest"]
     @classmethod
     def get_cve_objects(cls, cve_ids):
@@ -356,28 +374,40 @@ class Bundler:
         return data
     def bundle_detections(self, container: DetectionContainer):
-        self.data = DataContainer(detections=container)
+        self.data.detections = container
         if not container.success:
             return
         for d in container.detections:
             self.add_rule_indicator(d)
-    @property
-    def flow_objects(self):
-        return self._flow_objects
-    @flow_objects.setter
-    def flow_objects(self, objects):
-        smo_objects = requests.get(self.ATTACK_FLOW_SMO_URL).json()["objects"]
-        objects.extend(smo_objects)
-        for obj in objects:
-            if obj["id"] == self.report.id:
+    def create_attack_navigator(self):
+        self.mitre_version = self.get_attack_version()
+        all_tactics = dict(
+            itertools.chain(*map(lambda x: x.items(), self.tactics.values()))
+        )
+        self.data.navigator_layer = {}
+        for detection_id, techniques in self.techniques.items():
+            if not techniques:
                 continue
-            is_report_object = obj["type"] not in ["extension-definition", "identity"]
-            self.add_ref(obj, append_report=is_report_object)
-        self._flow_objects = objects
+            tactics = self.tactics[detection_id]
+            mapping = dict(
+                [
+                    attack_navigator.map_technique_tactic(
+                        technique, all_tactics, tactics
+                    )
+                    for technique in techniques
+                ]
+            )
+            indicator = [
+                f
+                for f in self.bundle.objects
+                if str(f["id"]).endswith(detection_id) and f["type"] == "indicator"
+            ][0]
+            self.data.navigator_layer[detection_id] = (
+                attack_navigator.create_navigator_layer(
+                    self.report, indicator, mapping, self.mitre_version
+                )
+            )
 def make_logsouce_string(source: dict):

txt2detection/models.py CHANGED Viewed

@@ -19,7 +19,6 @@ from stix2 import (
     MarkingDefinition,
 )
-from txt2detection.ai_extractor.models import AttackFlowList
 if typing.TYPE_CHECKING:
     from txt2detection.bundler import Bundler
@@ -274,7 +273,8 @@ class BaseDetection(BaseModel):
     @property
     def mitre_attack_ids(self):
         retval = []
-        for label in self.tags:
+        for i, label in enumerate(self.tags):
+            label = label.replace("_", "-").lower()
             namespace, _, label_id = label.partition(".")
             if namespace == "attack":
                 retval.append(MITRE_TACTIC_MAP.get(label_id, label_id.upper()))
@@ -336,7 +336,7 @@ class SigmaRuleDetection(BaseDetection):
     fields: Optional[List[str]] = None
     falsepositives: Optional[List[str]] = None
     level: Optional[Level] = None
-    tags: Optional[List[SigmaTag]] = Field(default_factory=[])
+    tags: Optional[List[SigmaTag]] = Field(default_factory=list)
     scope: Optional[List[str]] = None
     _indicator_types: list = None
@@ -402,11 +402,10 @@ class DetectionContainer(BaseModel):
 class DataContainer(BaseModel):
     detections: DetectionContainer
-    attack_flow: AttackFlowList = Field(default=None)
-    navigator_layer: list = Field(default=None)
+    navigator_layer: dict = Field(default=None)
     observables: list[dict] = Field(default=None)
-    cves: dict[str, str] = Field(default=None)
-    attacks: dict[str, str] = Field(default=None)
+    cves: dict[str, str] = Field(default_factory=dict)
+    attacks: dict[str, str] = Field(default_factory=dict)
 def tlp_from_tags(tags: list[SigmaTag]):

txt2detection/observables.py CHANGED Viewed

@@ -159,27 +159,3 @@ def to_stix_object(observable_type: str, value):
                 )
             )
     return None
-# example_detection = {
-#     "selection": {
-#         "source_ip": "192.168.1.10",
-#         "destination_email": "attacker@example.com",
-#         "url_path": "http://malicious.example.com/payload.exe",
-#         "file_hash_md5": "44d88612fea8a8f36de82e1278abb02f",
-#         "mac_address": "00:1A:2B:3C:4D:5E",
-#         "username": "CORP\\jdoe",
-#         "registry_key": "HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run",
-#         "registry_key2": "HK_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run",
-#         "certificate": "-----BEGIN CERTIFICATE-----FAKECERT-----END CERTIFICATE-----",
-#         "ip": " 192.167.1.1",
-#     },
-#     "condition": "selection",
-# }
-# # Usage
-# observables = find_stix_observables(example_detection)
-# print(observables)
-# for a, b in observables:
-#     print(to_stix_object(a, b))

txt2detection/utils.py CHANGED Viewed

@@ -85,6 +85,11 @@ def remove_rule_specific_tags(tags):
         labels.append(tag)
     return labels
+@lru_cache()
+def load_stix_object_from_url(url):
+    resp = requests.get(url)
+    return resp.json()
 def as_date(d: "date|datetime"):
     if isinstance(d, datetime):

{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: txt2detection
-Version: 1.0.10
+Version: 1.0.12
 Summary: A command line tool that takes a txt file containing threat intelligence and turns it into a detection rule.
 Project-URL: Homepage, https://github.com/muchdogesec/txt2detection
 Project-URL: Issues, https://github.com/muchdogesec/txt2detection/issues
@@ -162,8 +162,7 @@ Use this mode to generate a set of rules from an input text file;
     * Provider (env var required `ANTHROPIC_API_KEY`): `anthropic:`, models e.g.: `claude-3-5-sonnet-latest`, `claude-3-5-haiku-latest`, `claude-3-opus-latest` ([More here](https://docs.anthropic.com/en/docs/about-claude/models))
     * Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
     * Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
-* `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
-* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
+* `--create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only.
 Note, in this mode, the following values will be automatically assigned to the rule
@@ -190,8 +189,7 @@ Note, in this mode you should be aware of a few things;
 * `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
 * `status` (optional): either `stable`, `test`, `experimental`, `deprecated`, `unsupported`. If passed, will overwrite any existing `status` recorded in the rule
 * `level` (optional): either `informational`, `low`, `medium`, `high`, `critical`. If passed, will overwrite any existing `level` recorded in the rule
-* `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
-* `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
+* `--create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags.
 ### A note on observable extraction

txt2detection-1.0.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,23 @@
+txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
+txt2detection/__main__.py,sha256=felrmY7q1tLK4wc17Lx0Ip3WJ23GicoIj0k7pelCqLM,11610
+txt2detection/attack_navigator.py,sha256=CEph4Q3N49ASC0b0eXzTgbBU_JBUHUrBWag7dn_TGbg,2135
+txt2detection/bundler.py,sha256=QnuVdyL0J2CC2rIgTLCt3HclDAxXtK7kdLtv8AGMsJ0,15151
+txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
+txt2detection/models.py,sha256=9nEmbyRNIM5ZhM6asJymKlcUA2Bj2mbFEtWX5OPTU2s,12857
+txt2detection/observables.py,sha256=FuOfq7TsQykeHbE5waakx-rh1JacAq3G8mUqi-3Aw_o,5896
+txt2detection/utils.py,sha256=ZLpFbu9AXsEBaqY1Kjy0mEClaq8hLN8axuOc6bPD-3U,2914
+txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
+txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
+txt2detection/ai_extractor/base.py,sha256=g69o_CsabqL2Y3KxwXLOZU6f98NUjBJwhlPhBgjXSks,2358
+txt2detection/ai_extractor/deepseek.py,sha256=uRbPWmbnu4dzXaBxWPOsKt68v5NJeugGyvOoedjCve0,662
+txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
+txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
+txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
+txt2detection/ai_extractor/prompts.py,sha256=u8PyFcyqrr-MTo2uwa4cDOhh7FbvSnmc0sceaKzThsw,5996
+txt2detection/ai_extractor/utils.py,sha256=CHsyVylMIldFATXPcmRNciruO-4nqh68n076lABRaFk,536
+txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
+txt2detection-1.0.12.dist-info/METADATA,sha256=V23xCbPkNx7btdIwmi9VZEeDsfZYUJIIOnx6EmV-YLA,14797
+txt2detection-1.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+txt2detection-1.0.12.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
+txt2detection-1.0.12.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
+txt2detection-1.0.12.dist-info/RECORD,,

txt2detection/ai_extractor/models.py DELETED Viewed

@@ -1,34 +0,0 @@
-import io
-import json
-import logging
-import dotenv
-import textwrap
-from pydantic import BaseModel, Field, RootModel
-from llama_index.core.output_parsers import PydanticOutputParser
-class AttackFlowItem(BaseModel):
-    position: int = Field(description="order of object starting at 0")
-    attack_technique_id: str
-    name: str
-    description: str
-class AttackFlowList(BaseModel):
-    tactic_selection: list[tuple[str, str]] = Field(
-        description="attack technique id to attack tactic id mapping using possible_tactics"
-    )
-    # additional_tactic_mapping: list[tuple[str, str]] = Field(description="the rest of tactic_mapping")
-    items: list[AttackFlowItem]
-    success: bool = Field(
-        description="determines if there's any valid flow in <extractions>"
-    )
-    def model_post_init(self, context):
-        return super().model_post_init(context)
-    @property
-    def tactic_mapping(self):
-        return dict(self.tactic_selection)

txt2detection/attack_flow.py DELETED Viewed

@@ -1,231 +0,0 @@
-import json
-import logging
-import uuid
-from stix2 import Relationship
-from txt2detection.ai_extractor.models import AttackFlowList
-from .bundler import Bundler
-from .ai_extractor.base import BaseAIExtractor
-from .models import UUID_NAMESPACE
-from stix2extensions.attack_action import AttackAction, AttackFlow
-def parse_flow(report, flow: AttackFlowList, techniques, tactics):
-    logging.info(f"flow.success = {flow.success}")
-    if not flow.success:
-        return []
-    objects = [report]
-    for domain in ["enterprise-attack", "mobile-attack", "ics-attack"]:
-        flow_objects = parse_domain_flow(report, flow, techniques, tactics, domain)
-        objects.extend(flow_objects)
-    return objects
-def parse_domain_flow(report, flow: AttackFlowList, techniques, tactics, domain):
-    flow_objects = []
-    flow_obj = None
-    last_action = None
-    for i, item in enumerate(flow.items):
-        try:
-            technique = techniques[item.attack_technique_id]
-            if technique["domain"] != domain:
-                continue
-            tactic_id = technique["possible_tactics"][
-                flow.tactic_mapping[item.attack_technique_id]
-            ]
-            technique_obj = technique["stix_obj"]
-            tactic_obj = tactics[tactic_id]
-            action_obj = AttackAction(
-                **{
-                    "id": flow_id(report["id"], item.attack_technique_id, tactic_id),
-                    "effect_refs": [f"attack-action--{str(uuid.uuid4())}"],
-                    "technique_id": item.attack_technique_id,
-                    "technique_ref": technique_obj["id"],
-                    "tactic_id": tactic_id,
-                    "tactic_ref": tactic_obj["id"],
-                    "name": item.name,
-                    "description": item.description,
-                },
-                allow_custom=True,
-            )
-            action_obj.effect_refs.clear()
-            if not flow_obj:
-                flow_obj = {
-                    "type": "attack-flow",
-                    "id": "attack-flow--"
-                    + str(
-                        uuid.uuid5(UUID_NAMESPACE, f"attack-flow+{domain}+{report.id}")
-                    ),
-                    "spec_version": "2.1",
-                    "created": report.created,
-                    "modified": report.modified,
-                    "created_by_ref": report.created_by_ref,
-                    "start_refs": [action_obj["id"]],
-                    "name": f"[{domain.split('-')[0].upper()}] {report.name}",
-                    "description": report.description,
-                    "scope": "malware",
-                    "external_references": report.external_references,
-                    "object_marking_refs": report.object_marking_refs,
-                }
-                flow_objects.append(AttackFlow(**flow_obj))
-                flow_objects.append(
-                    Relationship(
-                        type="relationship",
-                        spec_version="2.1",
-                        id="relationship--"
-                        + str(
-                            uuid.uuid5(
-                                UUID_NAMESPACE,
-                                f"attack-flow+{report.id}+{flow_obj['id']}",
-                            )
-                        ),
-                        created_by_ref=report.created_by_ref,
-                        created=report.created,
-                        modified=report.modified,
-                        relationship_type="attack-flow",
-                        description=f"Attack Flow for {report.name}",
-                        source_ref=report.id,
-                        target_ref=flow_obj["id"],
-                        external_references=report.external_references,
-                        object_marking_refs=report.object_marking_refs,
-                    )
-                )
-            else:
-                last_action["effect_refs"].append(action_obj["id"])
-            flow_objects.append(tactic_obj)
-            flow_objects.append(technique_obj)
-            flow_objects.append(action_obj)
-            last_action = action_obj
-        except Exception as e:
-            if flow_objects == 2:
-                logging.exception("FATAL: create attack flow object failed")
-                return []
-            logging.debug("create attack-action failed", exc_info=True)
-            raise
-    return flow_objects
-def flow_id(report_id, technique_id, tactic_id):
-    return "attack-action--" + str(
-        uuid.uuid5(
-            uuid.UUID(report_id.split("--")[-1]),
-            f"{report_id}+{technique_id}+{tactic_id}",
-        )
-    )
-def get_techniques_from_extracted_objects(objects: dict, tactics: dict):
-    techniques = {}
-    for obj in objects:
-        if (
-            obj["type"] == "attack-pattern"
-            and obj.get("external_references", [{"source_name": None}])[0][
-                "source_name"
-            ]
-            == "mitre-attack"
-        ):
-            domain = obj["x_mitre_domains"][0]
-            technique = dict(
-                domain=domain,
-                name=obj["name"],
-                possible_tactics={},
-                id=obj["external_references"][0]["external_id"],
-                platforms=[
-                    platform
-                    for platform in obj["x_mitre_platforms"]
-                    if platform != "None"
-                ],
-                stix_obj=obj,
-            )
-            for phase in obj["kill_chain_phases"]:
-                if not set(phase["kill_chain_name"].split("-")).issuperset(
-                    ["mitre", "attack"]
-                ):
-                    continue
-                tactic_name = phase["phase_name"]
-                tactic_obj = tactics[tactic_name]
-                tactic_id = tactic_obj["external_references"][0]["external_id"]
-                technique["possible_tactics"][tactic_name] = tactic_id
-            techniques[technique["id"]] = technique
-    return techniques
-def create_navigator_layer(report, flow: AttackFlowList, techniques, tactics):
-    domains = {}
-    comments = {item.attack_technique_id: item.description for item in flow.items}
-    for technique in techniques.values():
-        domain_techniques = domains.setdefault(technique["domain"], [])
-        technique_id = technique["id"]
-        if technique_id not in flow.tactic_mapping:
-            continue
-        technique_item = dict(
-            techniqueID=technique_id,
-            tactic=flow.tactic_mapping[technique_id],
-            score=100,
-            showSubtechniques=True,
-        )
-        if comment := comments.get(technique_id):
-            technique_item["comment"] = comment
-        domain_techniques.append(technique_item)
-    retval = []
-    for domain, domain_techniques in domains.items():
-        retval.append(
-            {
-                "versions": {
-                    "layer": "4.5",
-                    "attack": tactics["version"],
-                    "navigator": "5.1.0",
-                },
-                "name": report.name,
-                "domain": domain,
-                "techniques": domain_techniques,
-                "gradient": {
-                    "colors": ["#ffffff", "#ff6666"],
-                    "minValue": 0,
-                    "maxValue": 100,
-                },
-                "legendItems": [],
-                "metadata": [{"name": "report_id", "value": report.id}],
-                "links": [
-                    {
-                        "label": "Generated using txt2detection",
-                        "url": "https://github.com/muchdogesec/txt2detection/",
-                    }
-                ],
-                "layout": {"layout": "side"},
-            }
-        )
-    return retval
-def extract_attack_flow_and_navigator(
-    bundler: Bundler,
-    preprocessed_text,
-    ai_create_attack_flow,
-    ai_create_attack_navigator_layer,
-    ai_settings_relationships,
-):
-    ex: BaseAIExtractor = ai_settings_relationships
-    tactics = bundler.get_attack_tactics()
-    techniques = get_techniques_from_extracted_objects(bundler.bundle.objects, tactics)
-    if not techniques:
-        return None, None
-    logged_techniques = [
-        {k: v for k, v in t.items() if k != "stix_obj"} for t in techniques.values()
-    ]
-    logging.debug(f"parsed techniques: {logged_techniques}")
-    flow = ex.extract_attack_flow(preprocessed_text, techniques)
-    navigator = None
-    if ai_create_attack_flow:
-        logging.info("creating attack-flow bundle")
-        bundler.flow_objects = parse_flow(bundler.report, flow, techniques, tactics)
-    if ai_create_attack_navigator_layer:
-        navigator = create_navigator_layer(bundler.report, flow, techniques, tactics)
-    return flow, navigator

txt2detection-1.0.10.dist-info/RECORD DELETED Viewed

@@ -1,24 +0,0 @@
-txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
-txt2detection/__main__.py,sha256=s5XcIctE59ALjys6Y8lRIqS_pQWi1mlNo2gyG8_XS5s,11622
-txt2detection/attack_flow.py,sha256=x6GhDZZ8xOzugfMELvHvrhclcIqozGIt9_mzyr2KKnA,8741
-txt2detection/bundler.py,sha256=eGCIwLY0J_SVyOI_1IFsm_8RgvaE_32t5MIc_UyJwm0,13994
-txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
-txt2detection/models.py,sha256=_-sR03FEWI46OUZdL7U0tibNn909B0NU9LWNzopBtiY,12888
-txt2detection/observables.py,sha256=RxgJchvk6_Z2pBxJ6MAGsx00gj8TyRt9W2BTQTb1F9o,6762
-txt2detection/utils.py,sha256=EJ5lMhnghUgW0JbcRmeiDXYwm5GaB6XrG4cUjru-52g,2812
-txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
-txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
-txt2detection/ai_extractor/base.py,sha256=2C3d4BoH7I4fnvp6cLxbtjiFVPm4WJLFwnS_lAppHr8,3210
-txt2detection/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
-txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
-txt2detection/ai_extractor/models.py,sha256=xMTvUHoxIflbBA4mkGLTjwf657DVEOxd6gqLpEUciQ4,963
-txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
-txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
-txt2detection/ai_extractor/prompts.py,sha256=xI82PelsTidnRzi5wnNbEC4lmkio92YUDd8SZu4CQiE,10961
-txt2detection/ai_extractor/utils.py,sha256=SUxyPhkGp5yDbX_H_E018i93R8IbyLsQ00PIBDecfuc,540
-txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
-txt2detection-1.0.10.dist-info/METADATA,sha256=CHTRZrV_v6gfyAyEW6hfNaQutVpSv5yM7w084u_x7U4,15870
-txt2detection-1.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-txt2detection-1.0.10.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
-txt2detection-1.0.10.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
-txt2detection-1.0.10.dist-info/RECORD,,

{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/WHEEL RENAMED Viewed

File without changes

{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{txt2detection-1.0.10.dist-info → txt2detection-1.0.12.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

txt2detection 1.0.10__py3-none-any.whl → 1.0.12__py3-none-any.whl

Potentially problematic release.

txt2detection 1.0.10py3-none-any.whl → 1.0.12py3-none-any.whl