txt2stix 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. txt2stix/__init__.py +33 -0
  2. txt2stix/ai_extractor/__init__.py +15 -0
  3. txt2stix/ai_extractor/anthropic.py +12 -0
  4. txt2stix/ai_extractor/base.py +87 -0
  5. txt2stix/ai_extractor/deepseek.py +19 -0
  6. txt2stix/ai_extractor/gemini.py +18 -0
  7. txt2stix/ai_extractor/openai.py +15 -0
  8. txt2stix/ai_extractor/openrouter.py +20 -0
  9. txt2stix/ai_extractor/prompts.py +164 -0
  10. txt2stix/ai_extractor/utils.py +85 -0
  11. txt2stix/attack_flow.py +101 -0
  12. txt2stix/bundler.py +428 -0
  13. txt2stix/common.py +23 -0
  14. txt2stix/extractions.py +59 -0
  15. txt2stix/includes/__init__.py +0 -0
  16. txt2stix/includes/extractions/ai/config.yaml +1023 -0
  17. txt2stix/includes/extractions/lookup/config.yaml +393 -0
  18. txt2stix/includes/extractions/pattern/config.yaml +609 -0
  19. txt2stix/includes/helpers/mimetype_filename_extension_list.csv +936 -0
  20. txt2stix/includes/helpers/stix_relationship_types.txt +41 -0
  21. txt2stix/includes/helpers/tlds.txt +1446 -0
  22. txt2stix/includes/helpers/windows_registry_key_prefix.txt +12 -0
  23. txt2stix/includes/lookups/_README.md +11 -0
  24. txt2stix/includes/lookups/_generate_lookups.py +247 -0
  25. txt2stix/includes/lookups/attack_pattern.txt +1 -0
  26. txt2stix/includes/lookups/campaign.txt +1 -0
  27. txt2stix/includes/lookups/country_iso3166_alpha2.txt +249 -0
  28. txt2stix/includes/lookups/course_of_action.txt +1 -0
  29. txt2stix/includes/lookups/disarm_id_v1_5.txt +345 -0
  30. txt2stix/includes/lookups/disarm_name_v1_5.txt +347 -0
  31. txt2stix/includes/lookups/extensions.txt +78 -0
  32. txt2stix/includes/lookups/identity.txt +1 -0
  33. txt2stix/includes/lookups/infrastructure.txt +1 -0
  34. txt2stix/includes/lookups/intrusion_set.txt +1 -0
  35. txt2stix/includes/lookups/malware.txt +2 -0
  36. txt2stix/includes/lookups/mitre_atlas_id_v4_5_2.txt +116 -0
  37. txt2stix/includes/lookups/mitre_atlas_name_v4_5_2.txt +117 -0
  38. txt2stix/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +1502 -0
  39. txt2stix/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +1656 -0
  40. txt2stix/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +1765 -0
  41. txt2stix/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +141 -0
  42. txt2stix/includes/lookups/mitre_attack_ics_id_v16_0.txt +254 -0
  43. txt2stix/includes/lookups/mitre_attack_ics_name_v16_0.txt +293 -0
  44. txt2stix/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +159 -0
  45. txt2stix/includes/lookups/mitre_attack_mobile_id_v16_0.txt +277 -0
  46. txt2stix/includes/lookups/mitre_attack_mobile_name_v16_0.txt +296 -0
  47. txt2stix/includes/lookups/mitre_capec_id_v3_9.txt +559 -0
  48. txt2stix/includes/lookups/mitre_capec_name_v3_9.txt +560 -0
  49. txt2stix/includes/lookups/mitre_cwe_id_v4_15.txt +939 -0
  50. txt2stix/includes/lookups/mitre_cwe_name_v4_15.txt +939 -0
  51. txt2stix/includes/lookups/threat_actor.txt +1 -0
  52. txt2stix/includes/lookups/tld.txt +1422 -0
  53. txt2stix/includes/lookups/tool.txt +1 -0
  54. txt2stix/includes/tests/test_cases.yaml +695 -0
  55. txt2stix/indicator.py +860 -0
  56. txt2stix/lookups.py +68 -0
  57. txt2stix/pattern/__init__.py +13 -0
  58. txt2stix/pattern/extractors/__init__.py +0 -0
  59. txt2stix/pattern/extractors/base_extractor.py +167 -0
  60. txt2stix/pattern/extractors/card/README.md +34 -0
  61. txt2stix/pattern/extractors/card/__init__.py +15 -0
  62. txt2stix/pattern/extractors/card/amex_card_extractor.py +52 -0
  63. txt2stix/pattern/extractors/card/diners_card_extractor.py +47 -0
  64. txt2stix/pattern/extractors/card/discover_card_extractor.py +48 -0
  65. txt2stix/pattern/extractors/card/jcb_card_extractor.py +43 -0
  66. txt2stix/pattern/extractors/card/master_card_extractor.py +63 -0
  67. txt2stix/pattern/extractors/card/union_card_extractor.py +38 -0
  68. txt2stix/pattern/extractors/card/visa_card_extractor.py +46 -0
  69. txt2stix/pattern/extractors/crypto/__init__.py +3 -0
  70. txt2stix/pattern/extractors/crypto/btc_extractor.py +38 -0
  71. txt2stix/pattern/extractors/directory/__init__.py +10 -0
  72. txt2stix/pattern/extractors/directory/unix_directory_extractor.py +40 -0
  73. txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +42 -0
  74. txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +47 -0
  75. txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +42 -0
  76. txt2stix/pattern/extractors/domain/__init__.py +8 -0
  77. txt2stix/pattern/extractors/domain/domain_extractor.py +39 -0
  78. txt2stix/pattern/extractors/domain/hostname_extractor.py +36 -0
  79. txt2stix/pattern/extractors/domain/sub_domain_extractor.py +49 -0
  80. txt2stix/pattern/extractors/hashes/__init__.py +16 -0
  81. txt2stix/pattern/extractors/hashes/md5_extractor.py +16 -0
  82. txt2stix/pattern/extractors/hashes/sha1_extractor.py +14 -0
  83. txt2stix/pattern/extractors/hashes/sha224_extractor.py +18 -0
  84. txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +14 -0
  85. txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +13 -0
  86. txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +15 -0
  87. txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +16 -0
  88. txt2stix/pattern/extractors/helper.py +64 -0
  89. txt2stix/pattern/extractors/ip/__init__.py +14 -0
  90. txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +49 -0
  91. txt2stix/pattern/extractors/ip/ipv4_extractor.py +18 -0
  92. txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +42 -0
  93. txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +18 -0
  94. txt2stix/pattern/extractors/ip/ipv6_extractor.py +16 -0
  95. txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +46 -0
  96. txt2stix/pattern/extractors/others/__init__.py +22 -0
  97. txt2stix/pattern/extractors/others/asn_extractor.py +14 -0
  98. txt2stix/pattern/extractors/others/cpe_extractor.py +29 -0
  99. txt2stix/pattern/extractors/others/cve_extractor.py +14 -0
  100. txt2stix/pattern/extractors/others/email_extractor.py +21 -0
  101. txt2stix/pattern/extractors/others/filename_extractor.py +17 -0
  102. txt2stix/pattern/extractors/others/iban_extractor.py +15 -0
  103. txt2stix/pattern/extractors/others/mac_address_extractor.py +13 -0
  104. txt2stix/pattern/extractors/others/phonenumber_extractor.py +41 -0
  105. txt2stix/pattern/extractors/others/user_agent_extractor.py +20 -0
  106. txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +18 -0
  107. txt2stix/pattern/extractors/url/__init__.py +7 -0
  108. txt2stix/pattern/extractors/url/url_extractor.py +22 -0
  109. txt2stix/pattern/extractors/url/url_file_extractor.py +21 -0
  110. txt2stix/pattern/extractors/url/url_path_extractor.py +74 -0
  111. txt2stix/retriever.py +126 -0
  112. txt2stix/stix.py +1 -0
  113. txt2stix/txt2stix.py +336 -0
  114. txt2stix/utils.py +86 -0
  115. txt2stix-0.0.4.dist-info/METADATA +190 -0
  116. txt2stix-0.0.4.dist-info/RECORD +119 -0
  117. txt2stix-0.0.4.dist-info/WHEEL +4 -0
  118. txt2stix-0.0.4.dist-info/entry_points.txt +2 -0
  119. txt2stix-0.0.4.dist-info/licenses/LICENSE +202 -0
txt2stix/bundler.py ADDED
@@ -0,0 +1,428 @@
1
+ import enum
2
+ import logging
3
+ from stix2 import (
4
+ Report,
5
+ Identity,
6
+ MarkingDefinition,
7
+ Relationship,
8
+ Bundle,
9
+ )
10
+ from stix2.parsing import dict_to_stix2, parse as parse_stix
11
+ from stix2.serialization import serialize
12
+ import hashlib
13
+ from stix2 import (
14
+ v21,
15
+ )
16
+ import requests
17
+
18
+
19
+ from .common import UUID_NAMESPACE, MinorException
20
+ from datetime import UTC, datetime as dt
21
+ import uuid
22
+ import json
23
+ from .indicator import build_observables
24
+
25
+
26
+ logger = logging.getLogger("txt2stix.stix")
27
+
28
+
29
+ class TLP_LEVEL(enum.Enum):
30
+ CLEAR = MarkingDefinition(
31
+ spec_version="2.1",
32
+ id="marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
33
+ created="2022-10-01T00:00:00.000Z",
34
+ definition_type="TLP:CLEAR",
35
+ extensions={
36
+ "extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
37
+ "extension_type": "property-extension",
38
+ "tlp_2_0": "clear",
39
+ }
40
+ },
41
+ )
42
+ GREEN = MarkingDefinition(
43
+ spec_version="2.1",
44
+ id="marking-definition--bab4a63c-aed9-4cf5-a766-dfca5abac2bb",
45
+ created="2022-10-01T00:00:00.000Z",
46
+ definition_type="TLP:GREEN",
47
+ extensions={
48
+ "extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
49
+ "extension_type": "property-extension",
50
+ "tlp_2_0": "green",
51
+ }
52
+ },
53
+ )
54
+ AMBER = MarkingDefinition(
55
+ spec_version="2.1",
56
+ id="marking-definition--55d920b0-5e8b-4f79-9ee9-91f868d9b421",
57
+ created="2022-10-01T00:00:00.000Z",
58
+ definition_type="TLP:AMBER",
59
+ extensions={
60
+ "extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
61
+ "extension_type": "property-extension",
62
+ "tlp_2_0": "amber",
63
+ }
64
+ },
65
+ )
66
+ AMBER_STRICT = MarkingDefinition(
67
+ spec_version="2.1",
68
+ id="marking-definition--939a9414-2ddd-4d32-a0cd-375ea402b003",
69
+ created="2022-10-01T00:00:00.000Z",
70
+ definition_type="TLP:AMBER+STRICT",
71
+ extensions={
72
+ "extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
73
+ "extension_type": "property-extension",
74
+ "tlp_2_0": "amber+strict",
75
+ }
76
+ },
77
+ )
78
+ RED = MarkingDefinition(
79
+ spec_version="2.1",
80
+ id="marking-definition--e828b379-4e03-4974-9ac4-e53a884c97c1",
81
+ created="2022-10-01T00:00:00.000Z",
82
+ definition_type="TLP:RED",
83
+ extensions={
84
+ "extension-definition--60a3c5c5-0d10-413e-aab3-9e08dde9e88d": {
85
+ "extension_type": "property-extension",
86
+ "tlp_2_0": "red",
87
+ }
88
+ },
89
+ )
90
+
91
+ @classmethod
92
+ def levels(cls):
93
+ return dict(
94
+ clear=cls.CLEAR,
95
+ green=cls.GREEN,
96
+ amber=cls.AMBER,
97
+ amber_strict=cls.AMBER_STRICT,
98
+ red=cls.RED,
99
+ )
100
+
101
+ @classmethod
102
+ def values(cls):
103
+ return [
104
+ cls.CLEAR.value,
105
+ cls.GREEN.value,
106
+ cls.AMBER.value,
107
+ cls.AMBER_STRICT.value,
108
+ cls.RED.value,
109
+ ]
110
+
111
+ @classmethod
112
+ def get(cls, level):
113
+ if isinstance(level, cls):
114
+ return level
115
+ return cls.levels()[level]
116
+
117
+ @property
118
+ def name(self):
119
+ return super().name.lower()
120
+
121
+
122
+ class txt2stixBundler:
123
+ EXTENSION_MAPPING = {
124
+ "user-agent": None,
125
+ "cryptocurrency-wallet": None,
126
+ "cryptocurrency-transaction": None,
127
+ "bank-card": None,
128
+ "bank-account": None,
129
+ "phone-number": None,
130
+ "weakness": None,
131
+ }
132
+ EXTENSION_DEFINITION_BASE_URL = "https://raw.githubusercontent.com/muchdogesec/stix2extensions/main/extension-definitions"
133
+ report = None
134
+ identity = None
135
+ object_marking_refs = []
136
+ uuid = None
137
+ id_map = dict()
138
+ id_value_map = dict()
139
+ _flow_objects = []
140
+ # this identity is https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/identity/txt2stix.json
141
+ default_identity = Identity(
142
+ type="identity",
143
+ spec_version="2.1",
144
+ id="identity--f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5",
145
+ created_by_ref="identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
146
+ created="2020-01-01T00:00:00.000Z",
147
+ modified="2020-01-01T00:00:00.000Z",
148
+ name="txt2stix",
149
+ description="https://github.com/muchdogsec/txt2stix",
150
+ identity_class="system",
151
+ sectors=["technology"],
152
+ contact_information="https://www.dogesec.com/contact/",
153
+ object_marking_refs=[
154
+ "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
155
+ "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
156
+ ],
157
+ )
158
+ # this marking-definition is https://raw.githubusercontent.com/muchdogesec/stix4doge/main/objects/marking-definition/txt2stix.json
159
+ default_marking = MarkingDefinition(
160
+ type="marking-definition",
161
+ spec_version="2.1",
162
+ id="marking-definition--f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5",
163
+ created_by_ref="identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
164
+ created="2020-01-01T00:00:00.000Z",
165
+ definition_type="statement",
166
+ definition={
167
+ "statement": "This object was created using: https://github.com/muchdogesec/txt2stix"
168
+ },
169
+ object_marking_refs=[
170
+ "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
171
+ "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
172
+ ],
173
+ )
174
+
175
+ def __init__(
176
+ self,
177
+ name,
178
+ identity,
179
+ tlp_level,
180
+ description,
181
+ confidence,
182
+ extractors,
183
+ labels,
184
+ report_id=None,
185
+ created=None,
186
+ external_references=None,
187
+ modified=None,
188
+ ) -> None:
189
+ self.observables_processed = 0
190
+ self.created = created or dt.now(tz=UTC)
191
+ self.all_extractors = extractors
192
+ self.identity = identity or self.default_identity
193
+ self.tlp_level = TLP_LEVEL.get(tlp_level)
194
+ if report_id:
195
+ self.uuid = report_id
196
+ else:
197
+ self.uuid = str(
198
+ uuid.uuid5(UUID_NAMESPACE, f"{self.identity.id}+{self.created}+{name}")
199
+ )
200
+ external_references = external_references or []
201
+ labels = labels or []
202
+ labels.append('placeholder_label')
203
+
204
+ self.job_id = f"report--{self.uuid}"
205
+ self.report_md5 = hashlib.md5(description.encode()).hexdigest()
206
+ self.report = Report(
207
+ created_by_ref=self.identity.id,
208
+ name=name,
209
+ id=self.job_id,
210
+ description=description,
211
+ object_refs=[
212
+ f"note--{self.uuid}"
213
+ ], # won't allow creation with empty object_refs
214
+ created=self.created,
215
+ modified=modified or self.created,
216
+ object_marking_refs=[self.tlp_level.value.id],
217
+ labels=labels,
218
+ published=self.created,
219
+ external_references=[
220
+ {
221
+ "source_name": "txt2stix_report_id",
222
+ "external_id": self.uuid,
223
+ },
224
+ {
225
+ "source_name": "txt2stix Report MD5",
226
+ "description": self.report_md5,
227
+ },
228
+ ] + external_references,
229
+ confidence=confidence,
230
+ )
231
+ self.report.object_refs.clear() # clear object refs
232
+ self.report.labels.pop(-1) # remove txt2stix placeholder
233
+ self.added_objects = set()
234
+ self.set_defaults()
235
+
236
+ def set_defaults(self):
237
+ # self.value.extend(TLP_LEVEL.values()) # adds all tlp levels
238
+ self.bundle = Bundle(objects=[self.tlp_level.value], id=f"bundle--{self.uuid}")
239
+
240
+ self.bundle.objects.extend([self.default_marking, self.identity, self.report])
241
+ # add default STIX 2.1 marking definition for txt2stix
242
+ self.report.object_marking_refs.append(self.default_marking.id)
243
+
244
+ def add_extension(self, object):
245
+ _type = object["type"]
246
+ if self.EXTENSION_MAPPING.get(_type, "") is None:
247
+ if isinstance(object, v21._Observable):
248
+ url = self.EXTENSION_DEFINITION_BASE_URL + f"/scos/{_type}.json"
249
+ elif isinstance(object, v21._DomainObject):
250
+ url = self.EXTENSION_DEFINITION_BASE_URL + f"/sdos/{_type}.json"
251
+ else:
252
+ raise Exception(
253
+ f"Unknown custom object object.type = {_type}, {type(object)=}"
254
+ )
255
+ logger.info(f'getting extension definition for "{_type}" from `{url}`')
256
+ self.EXTENSION_MAPPING[_type] = self.load_stix_object_from_url(url)
257
+ extension = self.EXTENSION_MAPPING[_type]
258
+ self.add_ref(extension, is_report_object=False)
259
+
260
+ @staticmethod
261
+ def load_stix_object_from_url(url):
262
+ resp = requests.get(url)
263
+ return dict_to_stix2(resp.json())
264
+
265
+ def add_ref(self, sdo, is_report_object=True):
266
+ self.add_extension(sdo)
267
+ sdo_id = sdo["id"]
268
+ if sdo_id not in self.added_objects:
269
+ self.added_objects.add(sdo_id)
270
+ if is_report_object:
271
+ self.report.object_refs.append(sdo_id)
272
+ self.bundle.objects.append(sdo)
273
+
274
+ sdo_value = ""
275
+ for key in ['name', 'value', 'path', 'key', 'string', 'number', 'iban_number', 'address', 'hashes']:
276
+ if v := sdo.get(key):
277
+ sdo_value = v
278
+ break
279
+ else:
280
+ if refs := sdo.get('external_references', []):
281
+ sdo_value = refs[0]['external_id']
282
+ else:
283
+ sdo_value = "{NOTEXTRACTED}"
284
+
285
+
286
+ self.id_value_map[sdo_id] = sdo_value
287
+
288
+
289
+ def add_indicator(self, extracted_dict, add_standard_relationship):
290
+ extractor = self.all_extractors[extracted_dict["type"]]
291
+ stix_mapping = extractor.stix_mapping
292
+ extracted_value = extracted_dict["value"]
293
+ extracted_id = extracted_dict["id"]
294
+
295
+
296
+ indicator = self.new_indicator(extractor, stix_mapping, extracted_value)
297
+ # set id so it doesn''t need to be created in build_observables
298
+ if extracted_dict.get("indexes"):
299
+ indicator["external_references"].append(
300
+ dict(
301
+ source_name="indexes",
302
+ description=json.dumps(extracted_dict["indexes"]),
303
+ )
304
+ )
305
+ objects, related_refs = build_observables(
306
+ self, stix_mapping, indicator, extracted_dict['value'], extractor
307
+ )
308
+ if not objects:
309
+ raise MinorException(
310
+ f"build observable returns {objects} from extraction: {extracted_dict}"
311
+ )
312
+ self.id_map[extracted_id] = related_refs
313
+
314
+ for sdo in objects:
315
+ sdo = parse_stix(sdo, allow_custom=True)
316
+ self.add_ref(sdo)
317
+
318
+ def new_indicator(self, extractor, stix_mapping, extracted_value):
319
+ indicator = {
320
+ "type": "indicator",
321
+ "id": self.indicator_id_from_value(extracted_value, stix_mapping),
322
+ "spec_version": "2.1",
323
+ "created_by_ref": self.report.created_by_ref,
324
+ "created": self.report.created,
325
+ "modified": self.report.modified,
326
+ "indicator_types": ["unknown"],
327
+ "name": extracted_value,
328
+ "pattern_type": "stix",
329
+ "pattern": f"[ {stix_mapping}:value = { repr(extracted_value) } ]",
330
+ "valid_from": self.report.created,
331
+ "object_marking_refs": self.report.object_marking_refs,
332
+ "external_references": [
333
+ {
334
+ "source_name": "txt2stix_report_id",
335
+ "external_id": self.uuid,
336
+ },
337
+ {
338
+ "source_name": "txt2stix_extraction_type",
339
+ "description": f"{extractor.slug}_{extractor.version}",
340
+ },
341
+ ],
342
+ }
343
+
344
+ return indicator
345
+
346
+ def add_ai_relationship(self, gpt_out):
347
+ for source_ref in self.id_map.get(gpt_out["source_ref"], []):
348
+ for target_ref in self.id_map.get(gpt_out["target_ref"], []):
349
+ self.add_standard_relationship(
350
+ source_ref, target_ref, gpt_out["relationship_type"],
351
+ )
352
+
353
+ def add_standard_relationship(self, source_ref, target_ref, relationship_type):
354
+ descriptor = ' '.join(relationship_type.split('-'))
355
+ self.add_ref(self.new_relationship(
356
+ source_ref, target_ref, relationship_type,
357
+ description=f"{self.id_value_map.get(source_ref, source_ref)} {descriptor} {self.id_value_map.get(target_ref, target_ref)}"
358
+ ))
359
+
360
+ def new_relationship(self, source_ref, target_ref, relationship_type, description=None, external_references=None):
361
+ return Relationship(
362
+ id="relationship--"
363
+ + str(
364
+ uuid.uuid5(
365
+ UUID_NAMESPACE, f"{relationship_type}+{source_ref}+{target_ref}"
366
+ )
367
+ ),
368
+ source_ref=source_ref,
369
+ target_ref=target_ref,
370
+ relationship_type=relationship_type,
371
+ created_by_ref=self.report.created_by_ref,
372
+ created=self.report.created,
373
+ description=description,
374
+ modified=self.report.modified,
375
+ object_marking_refs=self.report.object_marking_refs,
376
+ allow_custom=True,
377
+ external_references=external_references or [
378
+ {
379
+ "source_name": "txt2stix_report_id",
380
+ "external_id": self.uuid,
381
+ }
382
+ ],
383
+ )
384
+
385
+ def to_json(self):
386
+ return serialize(self.bundle, indent=4)
387
+
388
+ def process_observables(self, extractions, add_standard_relationship=False):
389
+ for ex in extractions:
390
+ try:
391
+ if ex.get('id', '').startswith('ai'): #so id is distinct across multiple AIExtractors
392
+ ex["id"] = f'{ex["id"]}_{self.observables_processed}'
393
+ ex["id"] = ex.get("id", f"ex_{self.observables_processed}")
394
+ self.observables_processed += 1
395
+ self.add_indicator(ex, add_standard_relationship)
396
+ except BaseException as e:
397
+ logger.debug(
398
+ f"ran into exception while processing observable `{ex}`",
399
+ stack_info=True,
400
+ )
401
+
402
+ def process_relationships(self, observables):
403
+ for relationship in observables:
404
+ try:
405
+ self.add_ai_relationship(relationship)
406
+ except BaseException as e:
407
+ logger.debug(
408
+ f"ran into exception while processing relationship `{relationship}`",
409
+ stack_info=True,
410
+ )
411
+
412
+ def indicator_id_from_value(self, value, stix_mapping):
413
+ return "indicator--" + str(
414
+ uuid.uuid5(UUID_NAMESPACE, f"txt2stix+{self.identity['id']}+{self.report_md5}+{stix_mapping}+{value}")
415
+ )
416
+
417
+ @property
418
+ def flow_objects(self):
419
+ return self._flow_objects
420
+
421
+ @flow_objects.setter
422
+ def flow_objects(self, objects):
423
+ for obj in objects:
424
+ if obj['id'] == self.report.id:
425
+ continue
426
+ is_report_object = obj['type'] != "extension-definition"
427
+ self.add_ref(obj, is_report_object=is_report_object)
428
+ self._flow_objects = objects
txt2stix/common.py ADDED
@@ -0,0 +1,23 @@
1
+ from typing import Any
2
+ from uuid import UUID
3
+
4
+ UUID_NAMESPACE = UUID("f92e15d9-6afc-5ae2-bb3e-85a1fd83a3b5")
5
+
6
+ class NamedDict(dict):
7
+ def __getattribute__(self, attr: str):
8
+ value = None
9
+ try:
10
+ value = super().__getattribute__(attr)
11
+ except:
12
+ pass
13
+ if value is not None:
14
+ return value
15
+ return super().get(attr, "")
16
+
17
+ def __setattr__(self, __name: str, __value: Any) -> None:
18
+ return super().__setitem__(__name, __value)
19
+
20
+ class FatalException(Exception):
21
+ pass
22
+ class MinorException(Exception):
23
+ pass
@@ -0,0 +1,59 @@
1
+ from typing import Any, Type
2
+ import yaml
3
+ from pathlib import Path
4
+
5
+ from typing import TYPE_CHECKING
6
+ if TYPE_CHECKING:
7
+ import txt2stix.pattern.extractors.base_extractor
8
+ from .common import NamedDict
9
+
10
+ class Extractor(NamedDict):
11
+ extraction_key = None
12
+ name = None
13
+ type: str = None
14
+ description = None
15
+ created = None
16
+ modified = None
17
+ created_by = None
18
+ version = None
19
+ prompt_base = None
20
+ prompt_helper = None
21
+ prompt_extraction_processing = None
22
+ prompt_positive_examples = None
23
+ prompt_negative_examples = None
24
+ stix_mapping = None
25
+ prompt_extraction_extra = None
26
+ pattern_extractor : 'Type[txt2stix.pattern.extractors.base_extractor.BaseExtractor]' = None
27
+
28
+
29
+ def __init__(self, key, dct, include_path=None, test_cases: dict[str, list[str]]=None):
30
+ super().__init__(dct)
31
+ self.extraction_key = key
32
+ self.slug = key
33
+ test_cases = test_cases or dict()
34
+ self.prompt_negative_examples = test_cases.get('test_negative_examples') or []
35
+ self.prompt_positive_examples = test_cases.get('test_positive_examples') or []
36
+ if self.file and not Path(self.file).is_absolute() and include_path:
37
+ self.file = Path(include_path) / self.file
38
+
39
+
40
+ def load(self):
41
+ if self.type == "lookup":
42
+ self.lookups = set()
43
+ file = Path(self.file)
44
+ for line in file.read_text().splitlines():
45
+ self.lookups.add(line.strip())
46
+
47
+ def parse_extraction_config(include_path: Path):
48
+ config = {}
49
+ test_cases = load_test_cases_config(include_path)
50
+ for p in include_path.glob("extractions/*/config.yaml"):
51
+ config.update(yaml.safe_load(p.open()))
52
+
53
+ return {k: Extractor(k, v, include_path, test_cases=test_cases.get(v.get('test_cases'))) for k, v in config.items()}
54
+
55
+ def load_test_cases_config(include_path: Path) -> dict[str, dict[str, list[str]]]:
56
+ config_file = include_path/'tests/test_cases.yaml'
57
+ if not config_file.exists():
58
+ return {}
59
+ return yaml.safe_load(config_file.open())
File without changes