txt2stix 1.0.11__py3-none-any.whl → 1.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,8 @@ from txt2stix.ai_extractor.prompts import DEFAULT_CONTENT_CHECKER_WITH_SUMMARY_T
10
10
  from txt2stix.ai_extractor.utils import AttackFlowList, DescribesIncident, ExtractionList, ParserWithLogging, RelationshipList, get_extractors_str
11
11
  from llama_index.core.utils import get_tokenizer
12
12
 
13
+ from txt2stix.lookups import find_get_indexes
14
+
13
15
 
14
16
  _ai_extractor_registry: dict[str, 'Type[BaseAIExtractor]'] = {}
15
17
  class BaseAIExtractor():
@@ -67,8 +69,11 @@ class BaseAIExtractor():
67
69
  def extract_relationships(self, input_text, extractions, relationship_types: list[str]) -> RelationshipList:
68
70
  return self._get_relationship_program()(relationship_types=relationship_types, input_file=input_text, extractions=extractions)
69
71
 
70
- def extract_objects(self, input_text, extractors) -> ExtractionList:
71
- extraction_list = self._get_extraction_program()(extractors=get_extractors_str(extractors), input_file=input_text)
72
+ def extract_objects(self, input_text: str, extractors) -> ExtractionList:
73
+ extraction_list: ExtractionList = self._get_extraction_program()(extractors=get_extractors_str(extractors), input_file=input_text)
74
+ for extract in extraction_list.extractions:
75
+ extract.start_index = list(find_get_indexes(extract.original_text, input_text))
76
+
72
77
  return extraction_list.model_dump().get('extractions', [])
73
78
 
74
79
  def __init__(self, *args, **kwargs) -> None:
@@ -15,7 +15,8 @@ class Extraction(BaseModel):
15
15
  id: str = Field(description='is the id of the extraction of the format `"ai-%d" %(position in list)`, it should start from 1 (e.g `"ai-1", "ai-2", ..., "ai-n"`)')
16
16
  value: str = Field(description='is the value extracted from the text')
17
17
  original_text: str = Field(description='is the original text the extraction was made from')
18
- # start_index: list[str|int] = Field(description='a list of the index positions of the first character for each matching extraction. Some documents might capture many extractions where `key` and `value` are the same for many entries. This property allows the user to identify how many extractions happened, and where they are in the document.')
18
+ start_index: list[str|int] = Field(default_factory=list, description='no result expected')
19
+
19
20
 
20
21
  class Relationship(BaseModel):
21
22
  source_ref: str = Field(description='is the id for the source extraction for the relationship (e.g. extraction_1).')
@@ -85,9 +86,3 @@ def get_extractors_str(extractors):
85
86
  logging.debug(buffer.getvalue())
86
87
  logging.debug("======== extractors end ======")
87
88
  return buffer.getvalue()
88
-
89
-
90
-
91
- if __name__ == '__main__':
92
- a = ExtractionList(extractions=[Extraction(type="yes", id="1", value="2", original_text="3")], success=True)
93
- print(a.model_dump())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2stix
3
- Version: 1.0.11
3
+ Version: 1.0.12
4
4
  Summary: txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/txt2stix
6
6
  Project-URL: Issues, https://github.com/muchdogesec/txt2stix/issues
@@ -12,13 +12,13 @@ txt2stix/txt2stix.py,sha256=Y7vr4zzh8PvFCD-pX8-qm8kxuintjkhnqQ-OYfq7CRs,18589
12
12
  txt2stix/utils.py,sha256=n6mh4t9ZRJ7iT4Jvp9ai_dfCXjgXNcRtF_zXO7nkpnk,3304
13
13
  txt2stix/ai_extractor/__init__.py,sha256=5Tf6Co9THzytBdFEVhD-7vvT05TT3nSpltnAV1sfdoM,349
14
14
  txt2stix/ai_extractor/anthropic.py,sha256=mdz-8CB-BSCEqnK5l35DRZURVPUf508ef2b48XMxmuk,441
15
- txt2stix/ai_extractor/base.py,sha256=w8FFceCtOZ4_uAaVMTZCzUdKnC3_3nDBafBzHlfHCn0,3959
15
+ txt2stix/ai_extractor/base.py,sha256=t0SCh24FeDEDzXsrGFada6ux9F6m0ILwXtPSaleDiv8,4172
16
16
  txt2stix/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
17
17
  txt2stix/ai_extractor/gemini.py,sha256=yJC7knYzl-TScyCBd-MTpUf-NT6znC25E7vXxNMqjLU,578
18
18
  txt2stix/ai_extractor/openai.py,sha256=FK3UlKozwoBVoBYS_CDGa9lSOae5AC3rMcOH_v0y5_Q,629
19
19
  txt2stix/ai_extractor/openrouter.py,sha256=hAA6mTOMcpA28XYsOCvuJH7WMJqXCxfqZGJf_VrDsIk,628
20
20
  txt2stix/ai_extractor/prompts.py,sha256=NtqtVyPPtShPlVZ5SrFmo-LCkfpANIIi4H9rjqaxqDo,10559
21
- txt2stix/ai_extractor/utils.py,sha256=xPVtp_lI7254MvkXPt9YY_Vter0uiPLKMGcv5poXVKs,4763
21
+ txt2stix/ai_extractor/utils.py,sha256=jG5tPuS2xfiH7xCxlaEkAOboNOOMDah9JpGDXrUUJBA,4342
22
22
  txt2stix/pattern/__init__.py,sha256=K9ofaP2AOikvzb48VSBpJZijckdqufZxSzr_kbRypLY,491
23
23
  txt2stix/pattern/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  txt2stix/pattern/extractors/base_extractor.py,sha256=ly80rp-L40g7DbhrGiCvhPWI95-ZFMtAQUEC-fH6Y-o,6130
@@ -113,8 +113,8 @@ txt2stix/includes/lookups/threat_actor.txt,sha256=QfDO9maQuqKBgW_Sdd7VGv1SHZ9Ra-
113
113
  txt2stix/includes/lookups/tld.txt,sha256=-MEgJea2NMG_KDsnc4BVvI8eRk5Dm93L-t8SGYx5wMo,8598
114
114
  txt2stix/includes/lookups/tool.txt,sha256=HGKG6JpUE26w6ezzSxOjBkp15UpSaB7N-mZ_NU_3G7A,6
115
115
  txt2stix/includes/tests/test_cases.yaml,sha256=QD1FdIunpPkOpsn6wJRqs2vil_hv8OSVaqUp4a96aZg,22247
116
- txt2stix-1.0.11.dist-info/METADATA,sha256=RMfOHLhIbQI650ut4rDZlAreolrdjk6YZ5VPpTA6rbw,15483
117
- txt2stix-1.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
118
- txt2stix-1.0.11.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
119
- txt2stix-1.0.11.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
120
- txt2stix-1.0.11.dist-info/RECORD,,
116
+ txt2stix-1.0.12.dist-info/METADATA,sha256=IwWfdsyujlh62Rc5jpXLRBNO5ySWxbqFkC8Kw_gnCMo,15483
117
+ txt2stix-1.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
118
+ txt2stix-1.0.12.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
119
+ txt2stix-1.0.12.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
120
+ txt2stix-1.0.12.dist-info/RECORD,,