PyPI - txt2stix - Versions diffs - 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl - Mend

txt2stix 1.1.12py3-none-any.whl → 1.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

txt2stix/__init__.py +0 -1
txt2stix/ai_extractor/base.py +4 -1
txt2stix/ai_extractor/utils.py +3 -0
txt2stix/attack_flow.py +2 -1
txt2stix/bundler.py +1 -4
txt2stix/indicator.py +21 -3
txt2stix/txt2stix.py +277 -81
txt2stix/utils.py +2 -2
{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/METADATA +1 -1
{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/RECORD +13 -13
{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/WHEEL +1 -1
{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/entry_points.txt +0 -0
{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/licenses/LICENSE +0 -0

txt2stix/__init__.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from txt2stix import extractions
 from .bundler import txt2stixBundler
-from .txt2stix import extract_all
 from pathlib import Path
 INCLUDES_PATH = None

txt2stix/ai_extractor/base.py CHANGED Viewed

@@ -104,4 +104,7 @@ class BaseAIExtractor():
     def _check_credential(self):
         self.llm.complete("say 'hi'")
-        return True
+        return True
+    def __str__(self):
+        return self.extractor_name

txt2stix/ai_extractor/utils.py CHANGED Viewed

@@ -33,6 +33,9 @@ class RelationshipList(BaseModel):
     relationships: list[Relationship] = Field(default_factory=list)
     success: bool
+    def get(self, key, default=None):
+        return getattr(self, key, default)
 class DescribesIncident(BaseModel):
     describes_incident: bool = Field(description="does the <document> include malware analysis, APT group reports, data breaches and vulnerabilities?")
     explanation: str = Field(description="Two or three sentence summary of the incidents it describes OR summary of what it describes instead of an incident")

txt2stix/attack_flow.py CHANGED Viewed

@@ -213,6 +213,7 @@ def extract_attack_flow_and_navigator(
     ai_create_attack_flow,
     ai_create_attack_navigator_layer,
     ai_settings_relationships,
+    flow=None
 ):
     ex: BaseAIExtractor = ai_settings_relationships
     tactics = get_all_tactics()
@@ -225,7 +226,7 @@ def extract_attack_flow_and_navigator(
     ]
     logging.debug(f"parsed techniques: {json.dumps(logged_techniques, indent=4)}")
-    flow = ex.extract_attack_flow(preprocessed_text, techniques)
+    flow = flow or ex.extract_attack_flow(preprocessed_text, techniques)
     navigator = None
     if ai_create_attack_flow:
         logging.info("creating attack-flow bundle")

txt2stix/bundler.py CHANGED Viewed

@@ -422,10 +422,6 @@ class txt2stixBundler:
     def process_observables(self, extractions, add_standard_relationship=False):
         for ex in extractions:
             try:
-                if ex.get("id", "").startswith(
-                    "ai"
-                ):  # so id is distinct across multiple AIExtractors
-                    ex["id"] = f'{ex["id"]}_{self.observables_processed}'
                 ex["id"] = ex.get("id", f"ex_{self.observables_processed}")
                 self.observables_processed += 1
                 self.add_indicator(ex, add_standard_relationship)
@@ -437,6 +433,7 @@ class txt2stixBundler:
                 ex["error"] = str(e)
     def process_relationships(self, observables):
+        print(observables)
         for relationship in observables:
             try:
                 self.add_ai_relationship(relationship)

txt2stix/indicator.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from __future__ import annotations
+from datetime import UTC, datetime
 import os
 import re
+import uuid
 from stix2.parsing import dict_to_stix2
 from stix2 import HashConstant, File
 from stix2.v21.vocab import HASHING_ALGORITHM
@@ -24,7 +26,7 @@ if TYPE_CHECKING:
 # from schwifty import IBAN
-from .common import MinorException
+from .common import UUID_NAMESPACE, MinorException
 from .retriever import retrieve_stix_objects
@@ -675,11 +677,19 @@ def _build_observables(
             )
         )
+    _id_part = str(
+        uuid.uuid5(
+            UUID_NAMESPACE,
+            f"txt2stix+{extracted_value}",
+        )
+    )
     if stix_mapping == "attack-pattern":
         stix_objects = [
             dict_to_stix2(
                 {
                     "type": "attack-pattern",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -695,6 +705,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "campaign",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -711,6 +722,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "course-of-action",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -727,6 +739,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "infrastructure",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -744,6 +757,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "intrusion-set",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -760,6 +774,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "malware",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -778,6 +793,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "threat-actor",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -795,6 +811,7 @@ def _build_observables(
             dict_to_stix2(
                 {
                     "type": "tool",
+                    # "id": stix_mapping + "--" + _id_part,
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
                     "created": indicator["created"],
@@ -814,8 +831,9 @@ def _build_observables(
                     "type": "identity",
                     "spec_version": "2.1",
                     "created_by_ref": indicator["created_by_ref"],
-                    "created": indicator["created"],
-                    "modified": indicator["modified"],
+                    "created": datetime(2020, 1, 1, tzinfo=UTC),
+                    "modified": datetime(2020, 1, 1, tzinfo=UTC),
+                    "id": "identity--" + _id_part,
                     "name": extracted_value,
                     "identity_class": "unspecified",
                     "object_marking_refs": indicator["object_marking_refs"],

txt2stix/txt2stix.py CHANGED Viewed

@@ -21,7 +21,7 @@ from .utils import RELATIONSHIP_TYPES, Txt2StixData, remove_links
 from .common import UUID_NAMESPACE, FatalException
 from .bundler import txt2stixBundler, parse_stix, TLP_LEVEL
-from .import extractions, lookups, pattern
+from . import extractions, lookups, pattern
 from types import SimpleNamespace
 import functools
 from fnmatch import filter
@@ -40,41 +40,51 @@ def newLogger(name: str) -> logging.Logger:
         level=logging.DEBUG,  # Set the desired logging level
         format=f"%(asctime)s [{name}] [%(levelname)s] %(message)s",
         handlers=[stream_handler],
-        datefmt='%d-%b-%y %H:%M:%S',
+        datefmt="%d-%b-%y %H:%M:%S",
     )
     return logging.root
 def setLogFile(logger, file: Path):
     file.parent.mkdir(parents=True, exist_ok=True)
     logger.info(f"Saving log to `{file.absolute()}`")
     handler = logging.FileHandler(file, "w")
-    handler.formatter = logging.Formatter(fmt='%(levelname)s %(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S')
+    handler.formatter = logging.Formatter(
+        fmt="%(levelname)s %(asctime)s - %(message)s", datefmt="%d-%b-%y %H:%M:%S"
+    )
     handler.setLevel(logging.DEBUG)
     logger.addHandler(handler)
     logger.info("=====================txt2stix======================")
 MODULE_PATH = Path(__file__).parent.parent
-INCLUDES_PATH = MODULE_PATH/"includes"
+INCLUDES_PATH = MODULE_PATH / "includes"
 try:
     from . import includes
     INCLUDES_PATH = Path(includes.__file__).parent
 except:
     pass
 def split_comma(s: str) -> list[str]:
     return [ss for ss in s.split(",") if ss]
 def range_type(min, max):
     def fn(astr):
         value = int(astr)
-        if min<= value <= max:
+        if min <= value <= max:
             return value
         else:
-            raise argparse.ArgumentTypeError(f'value {value} not in range [{min}-{max}]')
+            raise argparse.ArgumentTypeError(
+                f"value {value} not in range [{min}-{max}]"
+            )
     return fn
 def parse_labels(labels: str) -> list[str]:
     labels = labels.split(",")
     for label in labels:
@@ -83,39 +93,44 @@ def parse_labels(labels: str) -> list[str]:
     return labels
 def parse_extractors_globbed(type, all_extractors, names):
     globbed_names = set()
     for name in names.split(","):
         matches = fnmatch.filter(all_extractors.keys(), name)
         if not matches:
-            raise argparse.ArgumentTypeError(f'`{name}` has 0 matches')
+            raise argparse.ArgumentTypeError(f"`{name}` has 0 matches")
         globbed_names.update(matches)
-    filtered_extractors  = {}
+    filtered_extractors = {}
     for extractor_name in globbed_names:
         try:
             extractor = all_extractors[extractor_name]
-            extraction_processor  = filtered_extractors.get(extractor.type, {})
+            extraction_processor = filtered_extractors.get(extractor.type, {})
             if extractor.type in ["lookup"]:
                 lookups.load_lookup(extractor)
             if extractor.type == "pattern":
                 pattern.load_extractor(extractor)
-            filtered_extractors[extractor.type] =  extraction_processor
+            filtered_extractors[extractor.type] = extraction_processor
             extraction_processor[extractor_name] = extractor
         except BaseException as e:
             raise argparse.ArgumentTypeError(f"{type} `{extractor_name}`: {e}")
     return filtered_extractors
 def parse_ref(value):
-    m = re.compile(r'(.+?)=(.+)').match(value)
+    m = re.compile(r"(.+?)=(.+)").match(value)
     if not m:
         raise argparse.ArgumentTypeError("must be in format key=value")
     return dict(source_name=m.group(1), external_id=m.group(2))
 def parse_model(value: str):
-    splits = value.split(':', 1)
+    splits = value.split(":", 1)
     provider = splits[0]
     if provider not in ALL_AI_EXTRACTORS:
-        raise argparse.ArgumentTypeError(f"invalid AI provider in `{value}`, must be one of {list(ALL_AI_EXTRACTORS)}")
+        raise argparse.ArgumentTypeError(
+            f"invalid AI provider in `{value}`, must be one of {list(ALL_AI_EXTRACTORS)}"
+        )
     provider = ALL_AI_EXTRACTORS[provider]
     try:
@@ -125,6 +140,7 @@ def parse_model(value: str):
     except Exception as e:
         raise ModelError(f"Unable to initialize model `{value}`") from e
 def parse_bool(value: str):
     value = value.lower()
     return value in ["yes", "y", "true", "1"]
@@ -135,7 +151,12 @@ def parse_args():
     all_extractors = extractions.parse_extraction_config(INCLUDES_PATH)
     parser = argparse.ArgumentParser(description="File Conversion Tool")
-    parser.add_argument('--check_credentials', "--check-credentials", action="store_true", help="Print the validity of the credentials and exit")
+    parser.add_argument(
+        "--check_credentials",
+        "--check-credentials",
+        action="store_true",
+        help="Print the validity of the credentials and exit",
+    )
     args, _ = parser.parse_known_args()
     if args.check_credentials:
         statuses = credential_checker.check_statuses(test_llms=True)
@@ -259,7 +280,6 @@ def parse_args():
         help="create attack flow for attack objects in report/bundle",
     )
     args = parser.parse_args()
     if not args.input_file.exists():
         raise argparse.ArgumentError(inf_arg, "cannot open file")
@@ -296,6 +316,8 @@ REQUIRED_ENV_VARIABLES = [
     "CTIBUTLER_BASE_URL",
     "VULMATCH_BASE_URL",
 ]
 def load_env():
     for env in REQUIRED_ENV_VARIABLES:
         if not os.getenv(env):
@@ -304,19 +326,34 @@ def load_env():
 def log_notes(content, type):
     logging.debug(f" ========================= {type} ========================= ")
-    logging.debug(f" ========================= {'+'*len(type)} ========================= ")
+    logging.debug(
+        f" ========================= {'+'*len(type)} ========================= "
+    )
     logging.debug(json.dumps(content, sort_keys=True, indent=4))
-    logging.debug(f" ========================= {'-'*len(type)} ========================= ")
+    logging.debug(
+        f" ========================= {'-'*len(type)} ========================= "
+    )
-def extract_all(bundler: txt2stixBundler, extractors_map, text_content, ai_extractors: list[BaseAIExtractor]=[], **kwargs):
-    assert ai_extractors or not extractors_map.get("ai"), "There should be at least one AI extractor in ai_extractors"
+def run_extractors(
+    extractors_map, text_content, ai_extractors: list[BaseAIExtractor] = [], **kwargs
+):
+    """Run extraction calls (lookup, pattern, AI) and return a dict of all extracts.
-    text_content = "\n"+text_content+"\n"
+    This function does NOT modify the bundler. Use `process_extracts` to
+    feed the returned extracts into a bundler (or replay saved extracts).
+    """
+    assert ai_extractors or not extractors_map.get(
+        "ai"
+    ), "There should be at least one AI extractor in ai_extractors"
+    text_content = "\n" + text_content + "\n"
     all_extracts = dict()
     if extractors_map.get("lookup"):
         try:
-            lookup_extracts = lookups.extract_all(extractors_map["lookup"].values(), text_content)
-            bundler.process_observables(lookup_extracts)
+            lookup_extracts = lookups.extract_all(
+                extractors_map["lookup"].values(), text_content
+            )
             all_extracts["lookup"] = lookup_extracts
         except BaseException as e:
             logging.exception("lookup extraction failed", exc_info=True)
@@ -324,95 +361,239 @@ def extract_all(bundler: txt2stixBundler, extractors_map, text_content, ai_extra
     if extractors_map.get("pattern"):
         try:
             logging.info("using pattern extractors")
-            pattern_extracts = pattern.extract_all(extractors_map["pattern"].values(), text_content, ignore_extraction_boundary=kwargs.get('ignore_extraction_boundary', False))
-            bundler.process_observables(pattern_extracts)
+            pattern_extracts = pattern.extract_all(
+                extractors_map["pattern"].values(),
+                text_content,
+                ignore_extraction_boundary=kwargs.get(
+                    "ignore_extraction_boundary", False
+                ),
+            )
             all_extracts["pattern"] = pattern_extracts
         except BaseException as e:
             logging.exception("pattern extraction failed", exc_info=True)
     if extractors_map.get("ai"):
         logging.info("using ai extractors")
         for extractor in ai_extractors:
             logging.info("running extractor: %s", extractor.extractor_name)
             try:
-                ai_extracts = extractor.extract_objects(text_content, extractors_map["ai"].values())
-                bundler.process_observables(ai_extracts)
+                ai_extracts = extractor.extract_objects(
+                    text_content, extractors_map["ai"].values()
+                )
                 all_extracts[f"ai-{extractor.extractor_name}"] = ai_extracts
             except BaseException as e:
-                logging.exception("AI extraction failed for %s", extractor.extractor_name, exc_info=True)
+                logging.exception(
+                    "AI extraction failed for %s",
+                    extractor.extractor_name,
+                    exc_info=True,
+                )
+    for i, ex in enumerate(itertools.chain(*all_extracts.values())):
+        ex["id"] = "ex-" + str(i)
+    return all_extracts
+def process_extracts(bundler: txt2stixBundler, all_extracts: dict):
+    """Process a previously-created `all_extracts` dict into the given bundler.
+    This allows replaying saved extracts without invoking extractors again.
+    """
+    for key, extracts in (all_extracts or {}).items():
+        try:
+            bundler.process_observables(extracts)
+        except BaseException:
+            logging.exception("processing extracts failed for %s", key, exc_info=True)
     log_notes(all_extracts, "Extractions")
-    return all_extracts
-def extract_relationships_with_ai(bundler: txt2stixBundler, text_content, all_extracts, ai_extractor_session: BaseAIExtractor):
+def extract_relationships(
+    text_content, all_extracts, ai_extractor_session: BaseAIExtractor
+):
     relationships = None
     try:
-        all_extracts = list(itertools.chain(*all_extracts.values()))
-        relationships = ai_extractor_session.extract_relationships(text_content, all_extracts, RELATIONSHIP_TYPES)
-        relationships = relationships.model_dump()
+        # flatten extracts into a single list
+        flattened = list(itertools.chain(*all_extracts.values()))
+        rel = ai_extractor_session.extract_relationships(
+            text_content, flattened, RELATIONSHIP_TYPES
+        )
+        relationships = rel.model_dump()
         log_notes(relationships, "Relationships")
-        bundler.process_relationships(relationships['relationships'])
     except BaseException as e:
-        logging.exception("Relationship processing failed: %s", e)
+        logging.exception("Relationship extraction failed: %s", e)
     return relationships
 def validate_token_count(max_tokens, input, extractors: list[BaseAIExtractor]):
-    logging.info('INPUT_TOKEN_LIMIT = %d', max_tokens)
+    logging.info("INPUT_TOKEN_LIMIT = %d", max_tokens)
     for extractor in extractors:
         token_count = _count_token(extractor, input)
-        logging.info(f"{extractor.extractor_name}: input_file token count = {token_count}")
-        if  token_count > max_tokens:
-            raise FatalException(f"{extractor.extractor_name}: input_file token count ({token_count}) exceeds INPUT_TOKEN_LIMIT ({max_tokens})")
+        logging.info(
+            f"{extractor.extractor_name}: input_file token count = {token_count}"
+        )
+        if token_count > max_tokens:
+            raise FatalException(
+                f"{extractor.extractor_name}: input_file token count ({token_count}) exceeds INPUT_TOKEN_LIMIT ({max_tokens})"
+            )
 @functools.lru_cache
 def _count_token(extractor: BaseAIExtractor, input: str):
     return extractor.count_tokens(input)
-def run_txt2stix(bundler: txt2stixBundler, preprocessed_text: str, extractors_map: dict,
-                ai_content_check_provider=None,
-                ai_create_attack_flow=None,
-                ai_create_attack_navigator_layer=None,
-                input_token_limit=10,
-                ai_settings_extractions=None,
-                ai_settings_relationships=None,
-                relationship_mode="standard",
-                ignore_extraction_boundary=False,
-                ai_extract_if_no_incidence=True, # continue even if ai_content_check fails
-                **kwargs
-        ) -> Txt2StixData:
+def run_txt2stix(
+    bundler: txt2stixBundler,
+    preprocessed_text: str,
+    extractors_map: dict,
+    ai_content_check_provider=None,
+    ai_create_attack_flow=None,
+    ai_create_attack_navigator_layer=None,
+    input_token_limit=10,
+    ai_settings_extractions=None,
+    ai_settings_relationships=None,
+    relationship_mode="standard",
+    ignore_extraction_boundary=False,
+    ai_extract_if_no_incidence=True,  # continue even if ai_content_check fails
+    txt2stix_data: Txt2StixData = None,
+    **kwargs,
+) -> Txt2StixData:
+    # First, perform extraction-phase (LLM and extractor calls). This does not
+    # modify the provided bundler so the results can be saved and replayed.
+    # skip extraction phase if txt2stix_data is passed
+    txt2stix_data = txt2stix_data or extraction_phase(
+        preprocessed_text,
+        extractors_map,
+        ai_content_check_provider=ai_content_check_provider,
+        input_token_limit=input_token_limit,
+        ai_settings_extractions=ai_settings_extractions,
+        ai_settings_relationships=ai_settings_relationships,
+        relationship_mode=relationship_mode,
+        ignore_extraction_boundary=ignore_extraction_boundary,
+        ai_extract_if_no_incidence=ai_extract_if_no_incidence,
+    )
+    # Then, process the extracted data into the bundler (no LLM calls).
+    processing_phase(
+        bundler,
+        preprocessed_text,
+        txt2stix_data,
+        ai_create_attack_flow=ai_create_attack_flow,
+        ai_create_attack_navigator_layer=ai_create_attack_navigator_layer,
+        ai_settings_relationships=ai_settings_relationships,
+        ai_content_check_provider=ai_content_check_provider,
+    )
+    return txt2stix_data
+def extraction_phase(
+    preprocessed_text: str,
+    extractors_map: dict,
+    ai_content_check_provider=None,
+    input_token_limit=10,
+    ai_settings_extractions=None,
+    ai_settings_relationships=None,
+    relationship_mode="standard",
+    ignore_extraction_boundary=False,
+    ai_extract_if_no_incidence=True,
+    **kwargs,
+) -> Txt2StixData:
+    """Perform token validation and run extractors/AI models. Does NOT modify a bundler."""
     should_extract = True
-    retval = Txt2StixData.model_construct()
-    retval.extractions = retval.attack_flow = retval.relationships = None
+    txt2stix_data = Txt2StixData.model_construct()
+    txt2stix_data.extractions = txt2stix_data.attack_flow = (
+        txt2stix_data.relationships
+    ) = None
     if ai_content_check_provider:
         logging.info("checking content")
-        model : BaseAIExtractor = ai_content_check_provider
+        model: BaseAIExtractor = ai_content_check_provider
         validate_token_count(input_token_limit, preprocessed_text, [model])
-        retval.content_check = model.check_content(preprocessed_text)
-        should_extract = retval.content_check.describes_incident
+        txt2stix_data.content_check = model.check_content(preprocessed_text)
+        should_extract = txt2stix_data.content_check.describes_incident
         logging.info("=== ai-check-content output ====")
-        logging.info(retval.content_check.model_dump_json())
-        bundler.report.external_references.append(dict(source_name='txt2stix_describes_incident', description=str(should_extract).lower(), external_id=model.extractor_name))
-        for classification in retval.content_check.incident_classification:
-            bundler.report.labels.append(f'classification.{classification}'.lower())
-        bundler.add_summary(retval.content_check.summary, model.extractor_name)
+        logging.info(txt2stix_data.content_check.model_dump_json())
     if should_extract or ai_extract_if_no_incidence:
         if extractors_map.get("ai"):
-            validate_token_count(input_token_limit, preprocessed_text, ai_settings_extractions)
+            validate_token_count(
+                input_token_limit, preprocessed_text, ai_settings_extractions
+            )
         if relationship_mode == "ai":
-            validate_token_count(input_token_limit, preprocessed_text, [ai_settings_relationships])
+            validate_token_count(
+                input_token_limit, preprocessed_text, [ai_settings_relationships]
+            )
+        txt2stix_data.extractions = run_extractors(
+            extractors_map,
+            preprocessed_text,
+            ai_extractors=ai_settings_extractions,
+            ignore_extraction_boundary=ignore_extraction_boundary,
+        )
-        retval.extractions = extract_all(bundler, extractors_map, preprocessed_text, ai_extractors=ai_settings_extractions, ignore_extraction_boundary=ignore_extraction_boundary)
-        if relationship_mode == "ai" and sum(map(lambda x: len(x), retval.extractions.values())):
-            retval.relationships = extract_relationships_with_ai(bundler, preprocessed_text, retval.extractions, ai_settings_relationships)
-        if ai_create_attack_flow or ai_create_attack_navigator_layer:
-            retval.attack_flow, retval.navigator_layer = attack_flow.extract_attack_flow_and_navigator(bundler, preprocessed_text, ai_create_attack_flow, ai_create_attack_navigator_layer, ai_settings_relationships)
-    return retval
+        if (
+            relationship_mode == "ai"
+            and txt2stix_data.extractions
+            and sum(map(lambda x: len(x), txt2stix_data.extractions.values()))
+        ):
+            txt2stix_data.relationships = extract_relationships(
+                preprocessed_text, txt2stix_data.extractions, ai_settings_relationships
+            )
+    return txt2stix_data
+def processing_phase(
+    bundler: txt2stixBundler,
+    preprocessed_text: str,
+    data: Txt2StixData,
+    ai_create_attack_flow=False,
+    ai_create_attack_navigator_layer=False,
+    ai_settings_relationships=None,
+    ai_content_check_provider=None,
+):
+    """Process extracted `data` into the given `bundler` without invoking LLMs."""
+    try:
+        if data.content_check:
+            cc = data.content_check
+            provider_name = str(ai_content_check_provider)
+            bundler.report.external_references.append(
+                dict(
+                    source_name="txt2stix_describes_incident",
+                    description=str(cc.describes_incident).lower(),
+                    external_id=provider_name,
+                )
+            )
+            for classification in cc.incident_classification:
+                bundler.report.labels.append(f"classification.{classification}".lower())
+            bundler.add_summary(cc.summary, provider_name)
+    except BaseException:
+        logging.exception("applying content_check to bundler failed", exc_info=True)
+    # process extracts into bundler
+    process_extracts(bundler, data.extractions)
+    # process relationships into bundler
+    try:
+        if data.relationships:
+            bundler.process_relationships(data.relationships.get("relationships", []))
+    except BaseException:
+        logging.exception("processing relationships failed", exc_info=True)
+    # generate attack flow / navigator layer now that bundler has been populated
+    try:
+        if ai_create_attack_flow or ai_create_attack_navigator_layer:
+            data.attack_flow, data.navigator_layer = (
+                attack_flow.extract_attack_flow_and_navigator(
+                    bundler,
+                    preprocessed_text,
+                    ai_create_attack_flow,
+                    ai_create_attack_navigator_layer,
+                    ai_settings_relationships,
+                    flow=data.attack_flow,
+                )
+            )
+    except BaseException:
+        logging.exception("attack flow / navigator generation failed", exc_info=True)
 def main():
@@ -424,35 +605,50 @@ def main():
         setLogFile(logger, Path(f"logs/logs-{job_id}.log"))
         logger.info(f"Arguments: {json.dumps(sys.argv[1:])}")
         input_text = args.input_file.read_text()
-        preprocessed_text = remove_links(input_text, args.ignore_image_refs, args.ignore_link_refs)
+        preprocessed_text = remove_links(
+            input_text, args.ignore_image_refs, args.ignore_link_refs
+        )
         load_env()
-        bundler = txt2stixBundler(args.name, args.use_identity, args.tlp_level, input_text, args.confidence, args.all_extractors, args.labels, created=args.created, report_id=args.report_id, external_references=args.external_refs)
+        bundler = txt2stixBundler(
+            args.name,
+            args.use_identity,
+            args.tlp_level,
+            input_text,
+            args.confidence,
+            args.all_extractors,
+            args.labels,
+            created=args.created,
+            report_id=args.report_id,
+            external_references=args.external_refs,
+        )
         log_notes(sys.argv, "Config")
         data = run_txt2stix(
-            bundler, preprocessed_text, args.use_extractions,
-            input_token_limit=int(os.environ['INPUT_TOKEN_LIMIT']),
+            bundler,
+            preprocessed_text,
+            args.use_extractions,
+            input_token_limit=int(os.environ["INPUT_TOKEN_LIMIT"]),
             **args.__dict__,
         )
         ## write outputs
         out = bundler.to_json()
-        output_dir = Path("./output")/str(bundler.uuid)
+        output_dir = Path("./output") / str(bundler.uuid)
         with contextlib.suppress(BaseException):
             shutil.rmtree(output_dir)
         output_dir.mkdir(exist_ok=True, parents=True)
-        output_path = output_dir/f"{bundler.bundle.id}.json"
+        output_path = output_dir / f"{bundler.bundle.id}.json"
         output_path.write_text(out)
         logger.info(f"Wrote bundle output to `{output_path}`")
-        data_path = output_dir/f"data--{bundler.uuid}.json"
+        data_path = output_dir / f"data--{bundler.uuid}.json"
         data_path.write_text(data.model_dump_json(indent=4))
         logger.info(f"Wrote data output to `{data_path}`")
         for nav_layer in data.navigator_layer or []:
-            nav_path = output_dir/f"navigator-{nav_layer['domain']}----{bundler.uuid}.json"
+            nav_path = (
+                output_dir / f"navigator-{nav_layer['domain']}----{bundler.uuid}.json"
+            )
             nav_path.write_text(json.dumps(nav_layer, indent=4))
             logger.info(f"Wrote navigator output to `{nav_path}`")
     except argparse.ArgumentError as e:

txt2stix/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ import mistune
 from mistune.renderers.markdown import MarkdownRenderer
 from mistune.util import unescape
-from txt2stix.ai_extractor.utils import AttackFlowList, DescribesIncident
+from txt2stix.ai_extractor.utils import AttackFlowList, DescribesIncident, RelationshipList
 class ImageLinkRemover(MarkdownRenderer):
     def __init__(self, remove_links: bool=False, remove_images: bool=False):
         self.remove_links = remove_links
@@ -49,7 +49,7 @@ class ImageLinkRemover(MarkdownRenderer):
 class Txt2StixData(BaseModel):
     content_check: DescribesIncident = Field(default=None)
     extractions: dict = Field(default=None)
-    relationships: list[dict] = Field(default_factory=list)
+    relationships: dict|RelationshipList = Field(default_factory=dict)
     attack_flow: AttackFlowList = Field(default=None)
     navigator_layer: list = Field(default=None)

{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: txt2stix
-Version: 1.1.12
+Version: 1.1.14
 Summary: txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle.
 Project-URL: Homepage, https://github.com/muchdogesec/txt2stix
 Project-URL: Issues, https://github.com/muchdogesec/txt2stix/issues

{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/RECORD RENAMED Viewed

@@ -1,24 +1,24 @@
-txt2stix/__init__.py,sha256=Sm_VT913IFuAZ6dJEdVz3baPwC5VYtHySVfBAOUG92w,803
-txt2stix/attack_flow.py,sha256=qlzI7TdYwPOXegx0hTdvVuZ_He2yQVg9eFPOpEt3huE,9038
-txt2stix/bundler.py,sha256=5E6IptaAyHXdMA7JUw8yG5J2hLZ9kqQuDsWCQAC3xlY,16937
+txt2stix/__init__.py,sha256=kHCnJtzi37ivXx2STT5zT7-cUL16i86o7ywtSd3iXd4,769
+txt2stix/attack_flow.py,sha256=VAsgNKZvPa-llUsGvbv0tYNc2Kif5pNeMoxH88-6CWc,9060
+txt2stix/bundler.py,sha256=GmpWW9ek4iFZdEIyjVSpd9RnmyeNsZJOpnax5Tt0uT0,16748
 txt2stix/common.py,sha256=ISnGNKqJPE1EcfhL-x_4G18mcwt1urmorkW-ru9kV-0,585
 txt2stix/credential_checker.py,sha256=eWDP-jY3-jm8zI0JMoUcyoQZ_JqPNfCIr_HAO8nVYz0,3044
 txt2stix/extractions.py,sha256=_tlsqYHhfAoV-PJzxRHysrX47uxCsMlSg7PQWxww1u0,2171
-txt2stix/indicator.py,sha256=dyf4wbvVrZRitZpm6t7UusSM98bVW1qc5UkdGpVm3ls,30025
+txt2stix/indicator.py,sha256=EYh3mmgdq-8_5uQrHX5OkQG1sgiO1jQjRCqJaHqyF8k,30770
 txt2stix/lookups.py,sha256=h42YVtYUkWZm6ZPv2h5hHDHDzDs3yBqrT_T7pj2MDZI,2301
 txt2stix/retriever.py,sha256=sMNhnEYk3l5W44qZsWaDQtJYoHXA1oYIPM6wDqiUHSg,6642
 txt2stix/stix.py,sha256=9nXD9a2dCY4uaatl-mlIA1k3srwQBhGW-tUSho3iYe0,30
-txt2stix/txt2stix.py,sha256=CaK2YmkMjBvC8FXZmvkThZfb9_K94sV31Uvns3gPx20,18862
-txt2stix/utils.py,sha256=n6mh4t9ZRJ7iT4Jvp9ai_dfCXjgXNcRtF_zXO7nkpnk,3304
+txt2stix/txt2stix.py,sha256=eUL0pynQXruJRDvqs-LQ-dspDITx5tFDnTPEgCRQApk,23348
+txt2stix/utils.py,sha256=Le0VYx8n8UNpcjqwpx7Avb06qIS9_hId8yP8_PquBUs,3333
 txt2stix/ai_extractor/__init__.py,sha256=5Tf6Co9THzytBdFEVhD-7vvT05TT3nSpltnAV1sfdoM,349
 txt2stix/ai_extractor/anthropic.py,sha256=B5Z3nm2-w5KBhLcVJGkhNF0dn4lUo-fW_DnbOeJKA5Q,481
-txt2stix/ai_extractor/base.py,sha256=t0SCh24FeDEDzXsrGFada6ux9F6m0ILwXtPSaleDiv8,4172
+txt2stix/ai_extractor/base.py,sha256=I_UwX4mOAVa8HrjSkI3KqKKImIBtQ29RdprDOu2NK6A,4235
 txt2stix/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
 txt2stix/ai_extractor/gemini.py,sha256=rhhYrCa1zZTjadVk2QFhguD8_Yr03gl-D4Yb2nVBMI4,633
 txt2stix/ai_extractor/openai.py,sha256=1RxaLy0TJ4GjNKmcJoi6ZiBrCS_gt5ql9jpeE-SOy8g,642
 txt2stix/ai_extractor/openrouter.py,sha256=hAA6mTOMcpA28XYsOCvuJH7WMJqXCxfqZGJf_VrDsIk,628
 txt2stix/ai_extractor/prompts.py,sha256=NtqtVyPPtShPlVZ5SrFmo-LCkfpANIIi4H9rjqaxqDo,10559
-txt2stix/ai_extractor/utils.py,sha256=K3qglBRWYAuRU806-ahbz_PK1qQFfJ7ueWybVxYZYlQ,4425
+txt2stix/ai_extractor/utils.py,sha256=7iB2qm-oUSFaYidsNi74EACwLV5skCcecCw3F9eIJx4,4507
 txt2stix/pattern/__init__.py,sha256=K9ofaP2AOikvzb48VSBpJZijckdqufZxSzr_kbRypLY,491
 txt2stix/pattern/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 txt2stix/pattern/extractors/base_extractor.py,sha256=ly80rp-L40g7DbhrGiCvhPWI95-ZFMtAQUEC-fH6Y-o,6130
@@ -114,8 +114,8 @@ txt2stix/includes/lookups/threat_actor.txt,sha256=QfDO9maQuqKBgW_Sdd7VGv1SHZ9Ra-
 txt2stix/includes/lookups/tld.txt,sha256=-MEgJea2NMG_KDsnc4BVvI8eRk5Dm93L-t8SGYx5wMo,8598
 txt2stix/includes/lookups/tool.txt,sha256=HGKG6JpUE26w6ezzSxOjBkp15UpSaB7N-mZ_NU_3G7A,6
 txt2stix/includes/tests/test_cases.yaml,sha256=vErA3c5fySeWvJ5yJ8dCTEo3ufRATASAjaF4gj4Az1M,22424
-txt2stix-1.1.12.dist-info/METADATA,sha256=H_-Z_rIZrd0_yLobzdL9Ftthm400x05vLmSThIRDcVQ,15032
-txt2stix-1.1.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-txt2stix-1.1.12.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
-txt2stix-1.1.12.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
-txt2stix-1.1.12.dist-info/RECORD,,
+txt2stix-1.1.14.dist-info/METADATA,sha256=NaUvm8KFwWFYKiug2PDVWGXRd_W1E75y5j3LiouGW7c,15032
+txt2stix-1.1.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+txt2stix-1.1.14.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
+txt2stix-1.1.14.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
+txt2stix-1.1.14.dist-info/RECORD,,

{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.27.0
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{txt2stix-1.1.12.dist-info → txt2stix-1.1.14.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

txt2stix 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

txt2stix 1.1.12py3-none-any.whl → 1.1.14py3-none-any.whl