txt2stix 1.0.11__py3-none-any.whl → 1.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,8 @@ from txt2stix.ai_extractor.prompts import DEFAULT_CONTENT_CHECKER_WITH_SUMMARY_T
10
10
  from txt2stix.ai_extractor.utils import AttackFlowList, DescribesIncident, ExtractionList, ParserWithLogging, RelationshipList, get_extractors_str
11
11
  from llama_index.core.utils import get_tokenizer
12
12
 
13
+ from txt2stix.lookups import find_get_indexes
14
+
13
15
 
14
16
  _ai_extractor_registry: dict[str, 'Type[BaseAIExtractor]'] = {}
15
17
  class BaseAIExtractor():
@@ -67,8 +69,11 @@ class BaseAIExtractor():
67
69
  def extract_relationships(self, input_text, extractions, relationship_types: list[str]) -> RelationshipList:
68
70
  return self._get_relationship_program()(relationship_types=relationship_types, input_file=input_text, extractions=extractions)
69
71
 
70
- def extract_objects(self, input_text, extractors) -> ExtractionList:
71
- extraction_list = self._get_extraction_program()(extractors=get_extractors_str(extractors), input_file=input_text)
72
+ def extract_objects(self, input_text: str, extractors) -> ExtractionList:
73
+ extraction_list: ExtractionList = self._get_extraction_program()(extractors=get_extractors_str(extractors), input_file=input_text)
74
+ for extract in extraction_list.extractions:
75
+ extract.start_index = list(find_get_indexes(extract.original_text, input_text))
76
+
72
77
  return extraction_list.model_dump().get('extractions', [])
73
78
 
74
79
  def __init__(self, *args, **kwargs) -> None:
@@ -15,7 +15,8 @@ class Extraction(BaseModel):
15
15
  id: str = Field(description='is the id of the extraction of the format `"ai-%d" %(position in list)`, it should start from 1 (e.g `"ai-1", "ai-2", ..., "ai-n"`)')
16
16
  value: str = Field(description='is the value extracted from the text')
17
17
  original_text: str = Field(description='is the original text the extraction was made from')
18
- # start_index: list[str|int] = Field(description='a list of the index positions of the first character for each matching extraction. Some documents might capture many extractions where `key` and `value` are the same for many entries. This property allows the user to identify how many extractions happened, and where they are in the document.')
18
+ start_index: list[str|int] = Field(default_factory=list, description='no result expected')
19
+
19
20
 
20
21
  class Relationship(BaseModel):
21
22
  source_ref: str = Field(description='is the id for the source extraction for the relationship (e.g. extraction_1).')
@@ -85,9 +86,3 @@ def get_extractors_str(extractors):
85
86
  logging.debug(buffer.getvalue())
86
87
  logging.debug("======== extractors end ======")
87
88
  return buffer.getvalue()
88
-
89
-
90
-
91
- if __name__ == '__main__':
92
- a = ExtractionList(extractions=[Extraction(type="yes", id="1", value="2", original_text="3")], success=True)
93
- print(a.model_dump())
@@ -28,7 +28,7 @@ ai_ipv4_address_cidr:
28
28
  modified: 2020-01-01
29
29
  created_by: DOGESEC
30
30
  version: 1.0.0
31
- prompt_base: 'Extract all IPv4 addresses with a CIDR from the text.'
31
+ prompt_base: 'Extract all IPv4 addresses with a CIDR from the text. CIDR part must be >=0 <=32.'
32
32
  prompt_helper: 'Do not include any IPs that do not have a CIDR.'
33
33
  prompt_conversion: ''
34
34
  test_cases: ipv4_address_cidr
@@ -44,8 +44,8 @@ ai_ipv4_address_port:
44
44
  modified: 2020-01-01
45
45
  created_by: DOGESEC
46
46
  version: 1.0.0
47
- prompt_base: 'Extract all IPv4 addresses with a port from the text.'
48
- prompt_helper: 'Do not include any IPv4s that do not contain a port number'
47
+ prompt_base: 'Extract all IPv4 addresses with a port from the text. Port number part must be >=0 <=65535.'
48
+ prompt_helper: 'Do not include any IPv4s that do not contain a port number.'
49
49
  prompt_conversion: ''
50
50
  test_cases: ipv4_address_port
51
51
  stix_mapping: ipv4-addr-port
@@ -72,13 +72,13 @@ ai_ipv6_address_cidr:
72
72
  type: ai
73
73
  dogesec_web: true
74
74
  name: 'IPv6 Address with CIDR'
75
- description: 'Extracts IPv6 addresses with CIDRs'
75
+ description: 'Extracts IPv6 addresses with CIDRs.'
76
76
  notes: 'pattern_ipv6_address_cidr legacy extraction also exists if you cannot use AI'
77
77
  created: 2020-01-01
78
78
  modified: 2020-01-01
79
79
  created_by: DOGESEC
80
80
  version: 1.0.0
81
- prompt_base: 'Extract all IPv6 addresses with a CIDR from the text.'
81
+ prompt_base: 'Extract all IPv6 addresses with a CIDR from the text. CIDR part must be >=0 <=128.'
82
82
  prompt_helper: 'Do not include any IPv6s that do not contain a CIDR'
83
83
  prompt_conversion: ''
84
84
  test_cases: generic_ipv6_address_cidr
@@ -94,7 +94,7 @@ ai_ipv6_address_port:
94
94
  modified: 2020-01-01
95
95
  created_by: DOGESEC
96
96
  version: 1.0.0
97
- prompt_base: 'Extract all IPv6 addresses with a CIDR from the text.'
97
+ prompt_base: 'Extract all IPv6 addresses with a CIDR from the text. Port number part must be >=0 <=65535.'
98
98
  prompt_helper: 'Do not include any IPv6s that do not contain a port number'
99
99
  prompt_conversion: ''
100
100
  test_cases: generic_ipv6_address_port
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2stix
3
- Version: 1.0.11
3
+ Version: 1.0.13
4
4
  Summary: txt2stix is a Python script that is designed to identify and extract IoCs and TTPs from text files, identify the relationships between them, convert them to STIX 2.1 objects, and output as a STIX 2.1 bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/txt2stix
6
6
  Project-URL: Issues, https://github.com/muchdogesec/txt2stix/issues
@@ -12,13 +12,13 @@ txt2stix/txt2stix.py,sha256=Y7vr4zzh8PvFCD-pX8-qm8kxuintjkhnqQ-OYfq7CRs,18589
12
12
  txt2stix/utils.py,sha256=n6mh4t9ZRJ7iT4Jvp9ai_dfCXjgXNcRtF_zXO7nkpnk,3304
13
13
  txt2stix/ai_extractor/__init__.py,sha256=5Tf6Co9THzytBdFEVhD-7vvT05TT3nSpltnAV1sfdoM,349
14
14
  txt2stix/ai_extractor/anthropic.py,sha256=mdz-8CB-BSCEqnK5l35DRZURVPUf508ef2b48XMxmuk,441
15
- txt2stix/ai_extractor/base.py,sha256=w8FFceCtOZ4_uAaVMTZCzUdKnC3_3nDBafBzHlfHCn0,3959
15
+ txt2stix/ai_extractor/base.py,sha256=t0SCh24FeDEDzXsrGFada6ux9F6m0ILwXtPSaleDiv8,4172
16
16
  txt2stix/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
17
17
  txt2stix/ai_extractor/gemini.py,sha256=yJC7knYzl-TScyCBd-MTpUf-NT6znC25E7vXxNMqjLU,578
18
18
  txt2stix/ai_extractor/openai.py,sha256=FK3UlKozwoBVoBYS_CDGa9lSOae5AC3rMcOH_v0y5_Q,629
19
19
  txt2stix/ai_extractor/openrouter.py,sha256=hAA6mTOMcpA28XYsOCvuJH7WMJqXCxfqZGJf_VrDsIk,628
20
20
  txt2stix/ai_extractor/prompts.py,sha256=NtqtVyPPtShPlVZ5SrFmo-LCkfpANIIi4H9rjqaxqDo,10559
21
- txt2stix/ai_extractor/utils.py,sha256=xPVtp_lI7254MvkXPt9YY_Vter0uiPLKMGcv5poXVKs,4763
21
+ txt2stix/ai_extractor/utils.py,sha256=jG5tPuS2xfiH7xCxlaEkAOboNOOMDah9JpGDXrUUJBA,4342
22
22
  txt2stix/pattern/__init__.py,sha256=K9ofaP2AOikvzb48VSBpJZijckdqufZxSzr_kbRypLY,491
23
23
  txt2stix/pattern/extractors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  txt2stix/pattern/extractors/base_extractor.py,sha256=ly80rp-L40g7DbhrGiCvhPWI95-ZFMtAQUEC-fH6Y-o,6130
@@ -74,7 +74,7 @@ txt2stix/pattern/extractors/url/url_extractor.py,sha256=-SH1WvxbViaRZ1on8lRlzNAc
74
74
  txt2stix/pattern/extractors/url/url_file_extractor.py,sha256=_VDu_BX3Ys9SKhZlscZPp9xSOKCxNKKvJ2gbe7Nvuv0,881
75
75
  txt2stix/pattern/extractors/url/url_path_extractor.py,sha256=FNKcMZRnJpcZZF44T8DHsDWzhBqPq5a23h7T7l2osac,2459
76
76
  txt2stix/includes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
- txt2stix/includes/extractions/ai/config.yaml,sha256=xt5SOqrc4F5Qr2KgWv-j_bqKRqxoqvATSGGq5YtMvRg,41405
77
+ txt2stix/includes/extractions/ai/config.yaml,sha256=8tFAA4cCrXHbf2ewJNo5yTrLUB7qhM0jOu2XsA1hUYs,41540
78
78
  txt2stix/includes/extractions/lookup/config.yaml,sha256=lZoJ-vHig30TpfiwNEl4fiT-AwdOlhm7h0pE8b_G6jg,12059
79
79
  txt2stix/includes/extractions/pattern/config.yaml,sha256=BTfFc69hTR1TMMuu86UVg0K6aFQAxAF55R8l2-PBeOM,20154
80
80
  txt2stix/includes/helpers/mimetype_filename_extension_list.csv,sha256=kgozjMyp7y87CqRcoedfDwNXSLKrDgC9r9YKDYK0EbY,27593
@@ -113,8 +113,8 @@ txt2stix/includes/lookups/threat_actor.txt,sha256=QfDO9maQuqKBgW_Sdd7VGv1SHZ9Ra-
113
113
  txt2stix/includes/lookups/tld.txt,sha256=-MEgJea2NMG_KDsnc4BVvI8eRk5Dm93L-t8SGYx5wMo,8598
114
114
  txt2stix/includes/lookups/tool.txt,sha256=HGKG6JpUE26w6ezzSxOjBkp15UpSaB7N-mZ_NU_3G7A,6
115
115
  txt2stix/includes/tests/test_cases.yaml,sha256=QD1FdIunpPkOpsn6wJRqs2vil_hv8OSVaqUp4a96aZg,22247
116
- txt2stix-1.0.11.dist-info/METADATA,sha256=RMfOHLhIbQI650ut4rDZlAreolrdjk6YZ5VPpTA6rbw,15483
117
- txt2stix-1.0.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
118
- txt2stix-1.0.11.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
119
- txt2stix-1.0.11.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
120
- txt2stix-1.0.11.dist-info/RECORD,,
116
+ txt2stix-1.0.13.dist-info/METADATA,sha256=pzYsUzHCP45Ab_8MWy0slJzmFiXzNeiVg49Z53lMDG8,15483
117
+ txt2stix-1.0.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
118
+ txt2stix-1.0.13.dist-info/entry_points.txt,sha256=x6QPtt65hWeomw4IpJ_wQUesBl1M4WOLODbhOKyWMFg,55
119
+ txt2stix-1.0.13.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
120
+ txt2stix-1.0.13.dist-info/RECORD,,