txt2detection 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of txt2detection might be problematic. Click here for more details.

txt2detection/bundler.py CHANGED
@@ -15,8 +15,15 @@ from stix2 import (
15
15
  from stix2.serialization import serialize
16
16
  import hashlib
17
17
 
18
- from txt2detection import observables
19
- from txt2detection.models import AIDetection, BaseDetection, DetectionContainer, UUID_NAMESPACE, SigmaRuleDetection
18
+ from txt2detection import attack_flow, observables
19
+ from txt2detection.models import (
20
+ AIDetection,
21
+ BaseDetection,
22
+ DataContainer,
23
+ DetectionContainer,
24
+ UUID_NAMESPACE,
25
+ SigmaRuleDetection,
26
+ )
20
27
 
21
28
  from datetime import UTC, datetime as dt
22
29
  import uuid
@@ -28,56 +35,58 @@ from txt2detection.utils import STATUSES, remove_rule_specific_tags
28
35
 
29
36
  logger = logging.getLogger("txt2detection.bundler")
30
37
 
38
+
31
39
  class Bundler:
32
40
  identity = None
33
41
  object_marking_refs = []
34
42
  uuid = None
35
43
  id_map = dict()
36
- detections: DetectionContainer
44
+ data: DataContainer
45
+ ATTACK_FLOW_SMO_URL = "https://github.com/muchdogesec/stix2extensions/raw/refs/heads/main/remote-definitions/attack-flow.json"
37
46
  # https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/identity/txt2detection.json
38
- default_identity = Identity(**{
39
- "type": "identity",
40
- "spec_version": "2.1",
41
- "id": "identity--a4d70b75-6f4a-5d19-9137-da863edd33d7",
42
- "created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
43
- "created": "2020-01-01T00:00:00.000Z",
44
- "modified": "2020-01-01T00:00:00.000Z",
45
- "name": "txt2detection",
46
- "description": "https://github.com/muchdogesec/txt2detection",
47
- "identity_class": "system",
48
- "sectors": [
49
- "technology"
50
- ],
51
- "contact_information": "https://www.dogesec.com/contact/",
52
- "object_marking_refs": [
53
- "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
54
- "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb"
55
- ]
56
- })
47
+ default_identity = Identity(
48
+ **{
49
+ "type": "identity",
50
+ "spec_version": "2.1",
51
+ "id": "identity--a4d70b75-6f4a-5d19-9137-da863edd33d7",
52
+ "created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
53
+ "created": "2020-01-01T00:00:00.000Z",
54
+ "modified": "2020-01-01T00:00:00.000Z",
55
+ "name": "txt2detection",
56
+ "description": "https://github.com/muchdogesec/txt2detection",
57
+ "identity_class": "system",
58
+ "sectors": ["technology"],
59
+ "contact_information": "https://www.dogesec.com/contact/",
60
+ "object_marking_refs": [
61
+ "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
62
+ "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
63
+ ],
64
+ }
65
+ )
57
66
  # https://raw.githubusercontent.com/muchdogesec/stix4doge/refs/heads/main/objects/marking-definition/txt2detection.json
58
- default_marking = MarkingDefinition(**{
59
- "type": "marking-definition",
60
- "spec_version": "2.1",
61
- "id": "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
62
- "created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
63
- "created": "2020-01-01T00:00:00.000Z",
64
- "definition_type": "statement",
65
- "definition": {
66
- "statement": "This object was created using: https://github.com/muchdogesec/txt2detection"
67
- },
68
- "object_marking_refs": [
69
- "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
70
- "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb"
71
- ]
72
- })
67
+ default_marking = MarkingDefinition(
68
+ **{
69
+ "type": "marking-definition",
70
+ "spec_version": "2.1",
71
+ "id": "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
72
+ "created_by_ref": "identity--9779a2db-f98c-5f4b-8d08-8ee04e02dbb5",
73
+ "created": "2020-01-01T00:00:00.000Z",
74
+ "definition_type": "statement",
75
+ "definition": {
76
+ "statement": "This object was created using: https://github.com/muchdogesec/txt2detection"
77
+ },
78
+ "object_marking_refs": [
79
+ "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
80
+ "marking-definition--97ba4e8b-04f6-57e8-8f6e-3a0f0a7dc0fb",
81
+ ],
82
+ }
83
+ )
73
84
 
74
85
  @classmethod
75
86
  def generate_report_id(cls, created_by_ref, created, name):
76
87
  if not created_by_ref:
77
- created_by_ref = cls.default_identity['id']
78
- return str(
79
- uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}")
80
- )
88
+ created_by_ref = cls.default_identity["id"]
89
+ return str(uuid.uuid5(UUID_NAMESPACE, f"{created_by_ref}+{created}+{name}"))
81
90
 
82
91
  def __init__(
83
92
  self,
@@ -89,7 +98,7 @@ class Bundler:
89
98
  created=None,
90
99
  modified=None,
91
100
  report_id=None,
92
- external_refs: list=None,
101
+ external_refs: list = None,
93
102
  reference_urls=None,
94
103
  license=None,
95
104
  **kwargs,
@@ -97,14 +106,24 @@ class Bundler:
97
106
  self.created = created or dt.now(UTC)
98
107
  self.modified = modified or self.created
99
108
  self.identity = identity or self.default_identity
100
- self.tlp_level = TLP_LEVEL.get(tlp_level or 'clear')
101
- self.uuid = report_id or self.generate_report_id(self.identity.id, self.created, name)
109
+ self.tlp_level = TLP_LEVEL.get(tlp_level or "clear")
110
+ self.uuid = report_id or self.generate_report_id(
111
+ self.identity.id, self.created, name
112
+ )
102
113
  self.reference_urls = reference_urls or []
103
114
  self.labels = labels or []
104
115
  self.license = license
105
116
 
106
117
  self.job_id = f"report--{self.uuid}"
107
- self.external_refs = (external_refs or []) + [dict(source_name='txt2detection', url=url, description='txt2detection-reference') for url in self.reference_urls]
118
+ self.external_refs = (external_refs or []) + [
119
+ dict(
120
+ source_name="txt2detection",
121
+ url=url,
122
+ description="txt2detection-reference",
123
+ )
124
+ for url in self.reference_urls
125
+ ]
126
+ self.data = DataContainer.model_construct()
108
127
 
109
128
  self.report = Report(
110
129
  created_by_ref=self.identity.id,
@@ -124,7 +143,8 @@ class Bundler:
124
143
  source_name="description_md5_hash",
125
144
  external_id=hashlib.md5((description or "").encode()).hexdigest(),
126
145
  )
127
- ] + self.external_refs,
146
+ ]
147
+ + self.external_refs,
128
148
  )
129
149
  self.report.object_refs.clear() # clear object refs
130
150
  self.set_defaults()
@@ -150,13 +170,15 @@ class Bundler:
150
170
  self.all_objects.add(sdo_id)
151
171
 
152
172
  def add_rule_indicator(self, detection: SigmaRuleDetection):
153
- indicator_types = getattr(detection, 'indicator_types', None)
173
+ indicator_types = getattr(detection, "indicator_types", None)
154
174
  if isinstance(detection, AIDetection):
155
175
  detection = detection.to_sigma_rule_detection(self)
156
- assert isinstance(detection, SigmaRuleDetection), f"detection of type {type(detection)} not supported"
176
+ assert isinstance(
177
+ detection, SigmaRuleDetection
178
+ ), f"detection of type {type(detection)} not supported"
157
179
  indicator = {
158
180
  "type": "indicator",
159
- "id": "indicator--"+str(detection.detection_id),
181
+ "id": "indicator--" + str(detection.detection_id),
160
182
  "spec_version": "2.1",
161
183
  "created_by_ref": self.report.created_by_ref,
162
184
  "created": self.report.created,
@@ -165,66 +187,87 @@ class Bundler:
165
187
  "name": detection.title,
166
188
  "description": detection.description,
167
189
  "labels": remove_rule_specific_tags(self.labels),
168
- "pattern_type": 'sigma',
190
+ "pattern_type": "sigma",
169
191
  "pattern": detection.make_rule(self),
170
192
  "valid_from": self.report.created,
171
193
  "object_marking_refs": self.report.object_marking_refs,
172
194
  "external_references": self.external_refs + detection.external_references,
173
195
  }
174
- indicator['external_references'].append(
196
+ indicator["external_references"].append(
175
197
  {
176
- "source_name": "rule_md5_hash",
177
- "external_id": hashlib.md5(indicator['pattern'].encode()).hexdigest()
198
+ "source_name": "rule_md5_hash",
199
+ "external_id": hashlib.md5(indicator["pattern"].encode()).hexdigest(),
178
200
  }
179
201
  )
180
202
  logsource = detection.make_data_source()
181
203
 
182
204
  logger.debug(f"===== rule {detection.detection_id} =====")
183
- logger.debug("```yaml\n"+indicator['pattern']+"\n```")
205
+ logger.debug("```yaml\n" + indicator["pattern"] + "\n```")
184
206
  logger.debug(f" =================== end of rule =================== ")
185
207
 
208
+ self.data.attacks = dict.fromkeys(detection.mitre_attack_ids, "Not found")
186
209
  for obj in self.get_attack_objects(detection.mitre_attack_ids):
187
210
  self.add_ref(obj)
188
211
  self.add_relation(indicator, obj)
212
+ self.data.attacks[obj["external_references"][0]["external_id"]] = obj["id"]
189
213
 
214
+ self.data.cves = dict.fromkeys(detection.cve_ids, "Not found")
190
215
  for obj in self.get_cve_objects(detection.cve_ids):
191
216
  self.add_ref(obj)
192
217
  self.add_relation(indicator, obj)
218
+ self.data.cves[obj["name"]] = obj["id"]
193
219
 
194
220
  self.add_ref(parse_stix(indicator, allow_custom=True), append_report=True)
195
- print('everywhere')
196
221
  self.add_ref(logsource, append_report=True)
197
- print('here')
198
- self.add_relation(indicator, logsource, description=f'{indicator["name"]} is created from {make_logsouce_string(logsource)}')
199
- print('there')
222
+ self.add_relation(
223
+ indicator,
224
+ logsource,
225
+ description=f'{indicator["name"]} is created from {make_logsouce_string(logsource)}',
226
+ )
200
227
 
201
- for ob_type, ob_value in set(observables.find_stix_observables(detection.detection)):
228
+ self.data.observables = []
229
+ for ob_type, ob_value in set(
230
+ observables.find_stix_observables(detection.detection)
231
+ ):
232
+ self.data.observables.append(dict(type=ob_type, value=ob_value))
202
233
  try:
203
234
  obj = observables.to_stix_object(ob_type, ob_value)
204
235
  self.add_ref(obj)
205
- self.add_relation(indicator, obj, 'related-to', target_name=ob_value)
206
- except:
236
+ self.add_relation(indicator, obj, "related-to", target_name=ob_value)
237
+ except Exception as e:
238
+ self.data.observables[-1]["error"] = str(e)
207
239
  logger.exception(f"failed to process observable {ob_type}/{ob_value}")
208
240
 
209
- def add_relation(self, indicator, target_object, relationship_type='related-to', target_name=None, description=None):
241
+ def add_relation(
242
+ self,
243
+ indicator,
244
+ target_object,
245
+ relationship_type="related-to",
246
+ target_name=None,
247
+ description=None,
248
+ ):
210
249
  ext_refs = []
211
250
 
212
251
  with contextlib.suppress(Exception):
213
- indicator['external_references'].append(target_object['external_references'][0])
214
- ext_refs = [target_object['external_references'][0]]
252
+ indicator["external_references"].append(
253
+ target_object["external_references"][0]
254
+ )
255
+ ext_refs = [target_object["external_references"][0]]
215
256
 
216
257
  if not description:
217
- target_name = target_name or f"{target_object['external_references'][0]['external_id']} ({target_object['name']})"
258
+ target_name = (
259
+ target_name
260
+ or f"{target_object['external_references'][0]['external_id']} ({target_object['name']})"
261
+ )
218
262
  description = f"{indicator['name']} {relationship_type} {target_name}"
219
263
 
220
- rel = Relationship(
221
- id="relationship--" + str(
222
- uuid.uuid5(
223
- UUID_NAMESPACE, f"{indicator['id']}+{target_object['id']}"
224
- )
264
+ rel = Relationship(
265
+ id="relationship--"
266
+ + str(
267
+ uuid.uuid5(UUID_NAMESPACE, f"{indicator['id']}+{target_object['id']}")
225
268
  ),
226
- source_ref=indicator['id'],
227
- target_ref=target_object['id'],
269
+ source_ref=indicator["id"],
270
+ target_ref=target_object["id"],
228
271
  relationship_type=relationship_type,
229
272
  created_by_ref=self.report.created_by_ref,
230
273
  description=description,
@@ -247,50 +290,99 @@ class Bundler:
247
290
  if not attack_ids:
248
291
  return []
249
292
  logger.debug(f"retrieving attack objects: {attack_ids}")
250
- endpoint = urljoin(os.environ['CTIBUTLER_BASE_URL'] + '/', f"v1/attack-enterprise/objects/?attack_id="+','.join(attack_ids))
293
+ endpoint = urljoin(
294
+ os.environ["CTIBUTLER_BASE_URL"] + "/",
295
+ f"v1/attack-enterprise/objects/?attack_id=" + ",".join(attack_ids),
296
+ )
251
297
 
252
298
  headers = {}
253
- if api_key := os.environ.get('CTIBUTLER_API_KEY'):
254
- headers['API-KEY'] = api_key
299
+ if api_key := os.environ.get("CTIBUTLER_API_KEY"):
300
+ headers["API-KEY"] = api_key
255
301
 
256
302
  return self._get_objects(endpoint, headers)
257
303
 
258
- def get_cve_objects(self, cve_ids):
304
+ @classmethod
305
+ def get_attack_tactics(cls):
306
+ headers = {}
307
+ api_root = os.environ["CTIBUTLER_BASE_URL"] + "/"
308
+ if api_key := os.environ.get("CTIBUTLER_API_KEY"):
309
+ headers["API-KEY"] = api_key
310
+
311
+ endpoint = urljoin(
312
+ api_root, f"v1/attack-enterprise/objects/?attack_type=Tactic"
313
+ )
314
+ version_url = urljoin(api_root, f"v1/attack-enterprise/versions/installed/")
315
+ tactics = cls._get_objects(endpoint, headers=headers)
316
+ retval = dict(
317
+ version=requests.get(version_url, headers=headers).json()["latest"]
318
+ )
319
+ for tac in tactics:
320
+ retval[tac["x_mitre_shortname"]] = tac
321
+ retval[tac["external_references"][0]["external_id"]] = tac
322
+ return retval
323
+
324
+ @classmethod
325
+ def get_cve_objects(cls, cve_ids):
259
326
  if not cve_ids:
260
327
  return []
261
328
  logger.debug(f"retrieving cve objects: {cve_ids}")
262
- endpoint = urljoin(os.environ['VULMATCH_BASE_URL'] + '/', f"v1/cve/objects/?cve_id="+','.join(cve_ids))
329
+ endpoint = urljoin(
330
+ os.environ["VULMATCH_BASE_URL"] + "/",
331
+ f"v1/cve/objects/?cve_id=" + ",".join(cve_ids),
332
+ )
263
333
  headers = {}
264
- if api_key := os.environ.get('VULMATCH_API_KEY'):
265
- headers['API-KEY'] = api_key
334
+ if api_key := os.environ.get("VULMATCH_API_KEY"):
335
+ headers["API-KEY"] = api_key
266
336
 
267
- return self._get_objects(endpoint, headers)
337
+ return cls._get_objects(endpoint, headers)
268
338
 
269
- def _get_objects(self, endpoint, headers):
339
+ @classmethod
340
+ def _get_objects(cls, endpoint, headers):
270
341
  data = []
271
342
  page = 1
272
343
  while True:
273
- resp = requests.get(endpoint, params=dict(page=page, page_size=1000), headers=headers)
344
+ resp = requests.get(
345
+ endpoint, params=dict(page=page, page_size=1000), headers=headers
346
+ )
274
347
  if resp.status_code != 200:
275
348
  break
276
349
  d = resp.json()
277
- if len(d['objects']) == 0:
350
+ if len(d["objects"]) == 0:
278
351
  break
279
- data.extend(d['objects'])
280
- page+=1
281
- if d['page_results_count'] < d['page_size']:
352
+ data.extend(d["objects"])
353
+ page += 1
354
+ if d["page_results_count"] < d["page_size"]:
282
355
  break
283
356
  return data
284
357
 
285
358
  def bundle_detections(self, container: DetectionContainer):
286
- self.detections = container
359
+ self.data = DataContainer(detections=container)
287
360
  if not container.success:
288
361
  return
289
362
  for d in container.detections:
290
363
  self.add_rule_indicator(d)
291
364
 
365
+
366
+ @property
367
+ def flow_objects(self):
368
+ return self._flow_objects
369
+
370
+ @flow_objects.setter
371
+ def flow_objects(self, objects):
372
+ smo_objects = requests.get(self.ATTACK_FLOW_SMO_URL).json()["objects"]
373
+ objects.extend(smo_objects)
374
+ for obj in objects:
375
+ if obj["id"] == self.report.id:
376
+ continue
377
+ is_report_object = obj["type"] not in ["extension-definition", "identity"]
378
+ self.add_ref(obj, append_report=is_report_object)
379
+ self._flow_objects = objects
380
+
381
+
382
+
292
383
  def make_logsouce_string(source: dict):
293
- d = [f'{k}={v}' for k, v in source.items()
294
- if k in ['product', 'service', 'category']]
295
- d_str = ', '.join(d)
296
- return 'log-source {'+d_str+'}'
384
+ d = [
385
+ f"{k}={v}" for k, v in source.items() if k in ["product", "service", "category"]
386
+ ]
387
+ d_str = ", ".join(d)
388
+ return "log-source {" + d_str + "}"
@@ -5,7 +5,6 @@ from urllib.parse import urljoin
5
5
  import requests
6
6
 
7
7
 
8
-
9
8
  def check_llms():
10
9
  from txt2detection.__main__ import parse_model
11
10
 
@@ -23,14 +22,17 @@ def check_llms():
23
22
 
24
23
  def check_ctibutler_vulmatch(service):
25
24
  session = requests.Session()
26
- if service == 'vulmatch':
27
- base_url = os.getenv('VULMATCH_BASE_URL')
28
- url = urljoin(base_url, 'v1/cve/objects/vulnerability--f552f6f4-39da-48dc-8717-323772c99588/')
29
- session.headers['API-KEY'] = os.environ.get('VULMATCH_API_KEY')
30
- elif service == 'ctibutler':
31
- base_url = os.getenv('CTIBUTLER_BASE_URL')
32
- url = urljoin(base_url, 'v1/location/versions/available/')
33
- session.headers['API-KEY'] = os.environ.get('CTIBUTLER_API_KEY')
25
+ if service == "vulmatch":
26
+ base_url = os.getenv("VULMATCH_BASE_URL")
27
+ url = urljoin(
28
+ base_url,
29
+ "v1/cve/objects/vulnerability--f552f6f4-39da-48dc-8717-323772c99588/",
30
+ )
31
+ session.headers["API-KEY"] = os.environ.get("VULMATCH_API_KEY")
32
+ elif service == "ctibutler":
33
+ base_url = os.getenv("CTIBUTLER_BASE_URL")
34
+ url = urljoin(base_url, "v1/location/versions/available/")
35
+ session.headers["API-KEY"] = os.environ.get("CTIBUTLER_API_KEY")
34
36
 
35
37
  try:
36
38
  resp = session.get(url)
txt2detection/models.py CHANGED
@@ -19,6 +19,8 @@ from stix2 import (
19
19
  MarkingDefinition,
20
20
  )
21
21
 
22
+ from txt2detection.ai_extractor.models import AttackFlowList
23
+
22
24
  if typing.TYPE_CHECKING:
23
25
  from txt2detection.bundler import Bundler
24
26
 
@@ -398,6 +400,15 @@ class DetectionContainer(BaseModel):
398
400
  detections: list[Union[BaseDetection, AIDetection, SigmaRuleDetection]]
399
401
 
400
402
 
403
+ class DataContainer(BaseModel):
404
+ detections: DetectionContainer
405
+ attack_flow: AttackFlowList = Field(default=None)
406
+ navigator_layer: list = Field(default=None)
407
+ observables: list[dict] = Field(default=None)
408
+ cves: dict[str, str] = Field(default=None)
409
+ attacks: dict[str, str] = Field(default=None)
410
+
411
+
401
412
  def tlp_from_tags(tags: list[SigmaTag]):
402
413
  for tag in tags:
403
414
  ns, _, level = tag.partition(".")
@@ -183,4 +183,3 @@ def to_stix_object(observable_type: str, value):
183
183
 
184
184
  # for a, b in observables:
185
185
  # print(to_stix_object(a, b))
186
-
txt2detection/utils.py CHANGED
@@ -17,11 +17,14 @@ from .models import UUID_NAMESPACE
17
17
  class DetectionLanguage(SimpleNamespace):
18
18
  pass
19
19
 
20
+
20
21
  def parse_model(value: str):
21
- splits = value.split(':', 1)
22
+ splits = value.split(":", 1)
22
23
  provider = splits[0]
23
24
  if provider not in ALL_AI_EXTRACTORS:
24
- raise NotImplementedError(f"invalid AI provider in `{value}`, must be one of {list(ALL_AI_EXTRACTORS)}")
25
+ raise NotImplementedError(
26
+ f"invalid AI provider in `{value}`, must be one of {list(ALL_AI_EXTRACTORS)}"
27
+ )
25
28
  provider = ALL_AI_EXTRACTORS[provider]
26
29
  try:
27
30
  if len(splits) == 2:
@@ -30,8 +33,10 @@ def parse_model(value: str):
30
33
  except Exception as e:
31
34
  raise ModelError(f"Unable to initialize model `{value}`") from e
32
35
 
36
+
33
37
  def make_identity(name, namespace=None, created_by_ref=None, object_marking_refs=None):
34
38
  from .bundler import Bundler
39
+
35
40
  if isinstance(namespace, str):
36
41
  namespace = uuid.UUID(namespace)
37
42
  namespace = namespace or UUID_NAMESPACE
@@ -41,25 +46,31 @@ def make_identity(name, namespace=None, created_by_ref=None, object_marking_refs
41
46
  created_by_ref=created_by_ref or Bundler.default_identity.id,
42
47
  created=datetime(2020, 1, 1),
43
48
  modified=datetime(2020, 1, 1),
44
- object_marking_refs=object_marking_refs or [
49
+ object_marking_refs=object_marking_refs
50
+ or [
45
51
  "marking-definition--94868c89-83c2-464b-929b-a1a8aa3c8487",
46
- "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7"
52
+ "marking-definition--a4d70b75-6f4a-5d19-9137-da863edd33d7",
47
53
  ],
48
54
  )
49
55
 
50
56
 
51
57
  def validate_token_count(max_tokens, input, extractor: BaseAIExtractor):
52
- logging.info('INPUT_TOKEN_LIMIT = %d', max_tokens)
58
+ logging.info("INPUT_TOKEN_LIMIT = %d", max_tokens)
53
59
  token_count = extractor.count_tokens(input)
54
- logging.info('TOKEN COUNT FOR %s: %d', extractor.extractor_name, token_count)
55
- if token_count > max_tokens:
56
- raise Exception(f"{extractor.extractor_name}: input_file token count ({token_count}) exceeds INPUT_TOKEN_LIMIT ({max_tokens})")
60
+ logging.info("TOKEN COUNT FOR %s: %d", extractor.extractor_name, token_count)
61
+ if token_count > max_tokens:
62
+ raise Exception(
63
+ f"{extractor.extractor_name}: input_file token count ({token_count}) exceeds INPUT_TOKEN_LIMIT ({max_tokens})"
64
+ )
57
65
 
58
66
 
59
67
  @lru_cache(maxsize=5)
60
68
  def get_licenses(date):
61
- resp = requests.get("https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json")
62
- return {l['licenseId']: l['name'] for l in resp.json()['licenses']}
69
+ resp = requests.get(
70
+ "https://github.com/spdx/license-list-data/raw/refs/heads/main/json/licenses.json"
71
+ )
72
+ return {l["licenseId"]: l["name"] for l in resp.json()["licenses"]}
73
+
63
74
 
64
75
  def valid_licenses():
65
76
  return get_licenses(datetime.now().date().isoformat())
@@ -75,9 +86,10 @@ def remove_rule_specific_tags(tags):
75
86
  return labels
76
87
 
77
88
 
78
- def as_date(d: 'date|datetime'):
89
+ def as_date(d: "date|datetime"):
79
90
  if isinstance(d, datetime):
80
91
  return d.date()
81
92
  return d
82
93
 
83
- STATUSES = ['stable', 'test', 'experimental', 'deprecated', 'unsupported']
94
+
95
+ STATUSES = ["stable", "test", "experimental", "deprecated", "unsupported"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: txt2detection
3
- Version: 1.0.8
3
+ Version: 1.0.10
4
4
  Summary: A command line tool that takes a txt file containing threat intelligence and turns it into a detection rule.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/txt2detection
6
6
  Project-URL: Issues, https://github.com/muchdogesec/txt2detection/issues
@@ -72,12 +72,6 @@ txt2detection allows a user to enter some threat intelligence as a file to consi
72
72
  2. Based on the user input, AI prompts structured and sent to produce an intelligence rule
73
73
  3. Rules converted into STIX objects
74
74
 
75
- ## tl;dr
76
-
77
- [![txt2detection](https://img.youtube.com/vi/uJWXYKyu3Xg/0.jpg)](https://www.youtube.com/watch?v=uJWXYKyu3Xg)
78
-
79
- [Watch the demo](https://www.youtube.com/watch?v=uJWXYKyu3Xg).
80
-
81
75
  ## Usage
82
76
 
83
77
  ### Setup
@@ -162,12 +156,14 @@ Use this mode to generate a set of rules from an input text file;
162
156
  * `--license` (optional): [License of the rule according the SPDX ID specification](https://spdx.org/licenses/). Will be added to the rule.
163
157
  * `--reference_urls` (optional): A list of URLs to be added as `references` in the Sigma Rule property and in the `external_references` property of the Indicator and Report STIX object created. e.g `"https://www.google.com/" "https://www.facebook.com/"`
164
158
  * `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
165
- * `ai_provider` (required): defines the `provider:model` to be used to generate the rule. Select one option. Currently supports:
159
+ * `--ai_provider` (required): defines the `provider:model` to be used to generate the rule. Select one option. Currently supports:
166
160
  * Provider (env var required `OPENROUTER_API_KEY`): `openrouter:`, providers/models `openai/gpt-4o`, `deepseek/deepseek-chat` ([More here](https://openrouter.ai/models))
167
161
  * Provider (env var required `OPENAI_API_KEY`): `openai:`, models e.g.: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` ([More here](https://platform.openai.com/docs/models))
168
162
  * Provider (env var required `ANTHROPIC_API_KEY`): `anthropic:`, models e.g.: `claude-3-5-sonnet-latest`, `claude-3-5-haiku-latest`, `claude-3-opus-latest` ([More here](https://docs.anthropic.com/en/docs/about-claude/models))
169
163
  * Provider (env var required `GOOGLE_API_KEY`): `gemini:models/`, models: `gemini-1.5-pro-latest`, `gemini-1.5-flash-latest` ([More here](https://ai.google.dev/gemini-api/docs/models/gemini))
170
164
  * Provider (env var required `DEEPSEEK_API_KEY`): `deepseek:`, models `deepseek-chat` ([More here](https://api-docs.deepseek.com/quick_start/pricing))
165
+ * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
166
+ * `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
171
167
 
172
168
  Note, in this mode, the following values will be automatically assigned to the rule
173
169
 
@@ -194,6 +190,8 @@ Note, in this mode you should be aware of a few things;
194
190
  * `--external_refs` (optional): txt2detection will automatically populate the `external_references` of the report object it creates for the input. You can use this value to add additional objects to `external_references`. Note, you can only add `source_name` and `external_id` values currently. Pass as `source_name=external_id`. e.g. `--external_refs txt2stix=demo1 source=id` would create the following objects under the `external_references` property: `{"source_name":"txt2stix","external_id":"demo1"},{"source_name":"source","external_id":"id"}`
195
191
  * `status` (optional): either `stable`, `test`, `experimental`, `deprecated`, `unsupported`. If passed, will overwrite any existing `status` recorded in the rule
196
192
  * `level` (optional): either `informational`, `low`, `medium`, `high`, `critical`. If passed, will overwrite any existing `level` recorded in the rule
193
+ * `--ai_create_attack_flow` (boolean): passing this flag will also prompt the AI model (the same entered for `--ai_provider`, default `false`) to generate an [Attack Flow](https://center-for-threat-informed-defense.github.io/attack-flow/) for the MITRE ATT&CK tags to define the logical order in which they are being described. Note, Sigma currently supports ATT&CK Enterprise only.
194
+ * `--ai_create_attack_navigator_layer` (boolean, default `false`): passing this flag will generate a [MITRE ATT&CK Navigator layer](https://mitre-attack.github.io/attack-navigator/) for MITRE ATT&CK tags. Note, Sigma currently supports ATT&CK Enterprise only. You don't need to pass this if `--ai_create_attack_flow` is set to `true` (as this mode relies on this setting being true)
197
195
 
198
196
  ### A note on observable extraction
199
197
 
@@ -0,0 +1,24 @@
1
+ txt2detection/__init__.py,sha256=Fc460P0q_eb2u3Xc89z-fwl-4ai3jrPqPNVwJQYNkNQ,89
2
+ txt2detection/__main__.py,sha256=s5XcIctE59ALjys6Y8lRIqS_pQWi1mlNo2gyG8_XS5s,11622
3
+ txt2detection/attack_flow.py,sha256=x6GhDZZ8xOzugfMELvHvrhclcIqozGIt9_mzyr2KKnA,8741
4
+ txt2detection/bundler.py,sha256=eGCIwLY0J_SVyOI_1IFsm_8RgvaE_32t5MIc_UyJwm0,13994
5
+ txt2detection/credential_checker.py,sha256=NuKk7WlDshtdpGecxY1exoi4fUHCygunPH2lZ20oEA8,2598
6
+ txt2detection/models.py,sha256=_-sR03FEWI46OUZdL7U0tibNn909B0NU9LWNzopBtiY,12888
7
+ txt2detection/observables.py,sha256=RxgJchvk6_Z2pBxJ6MAGsx00gj8TyRt9W2BTQTb1F9o,6762
8
+ txt2detection/utils.py,sha256=EJ5lMhnghUgW0JbcRmeiDXYwm5GaB6XrG4cUjru-52g,2812
9
+ txt2detection/ai_extractor/__init__.py,sha256=itcwTF0-S80mx-SuSvfrKazvcwsojR-QsBN-UvnSDwE,418
10
+ txt2detection/ai_extractor/anthropic.py,sha256=YOi2rHUeeoRMS4CFG6mX7xUU4q4rw9qNl72R74UN6ZM,420
11
+ txt2detection/ai_extractor/base.py,sha256=2C3d4BoH7I4fnvp6cLxbtjiFVPm4WJLFwnS_lAppHr8,3210
12
+ txt2detection/ai_extractor/deepseek.py,sha256=2XehIYbWXG6Odq68nQX4CNtl5GdmBlAmjLP_lG2eEFo,660
13
+ txt2detection/ai_extractor/gemini.py,sha256=hlcKkiHGzQJ0dQECfIhjx2LfdhZoquAF9POwz61RAhw,557
14
+ txt2detection/ai_extractor/models.py,sha256=xMTvUHoxIflbBA4mkGLTjwf657DVEOxd6gqLpEUciQ4,963
15
+ txt2detection/ai_extractor/openai.py,sha256=ggonpHtckNz9GEJIR0ADMzZWDKi6EWuicP0fsxvkP3A,616
16
+ txt2detection/ai_extractor/openrouter.py,sha256=rL-SnzRhzrCnPJGLxbTlRyxU0NAw42RmSq3ouuo3Iag,658
17
+ txt2detection/ai_extractor/prompts.py,sha256=xI82PelsTidnRzi5wnNbEC4lmkio92YUDd8SZu4CQiE,10961
18
+ txt2detection/ai_extractor/utils.py,sha256=SUxyPhkGp5yDbX_H_E018i93R8IbyLsQ00PIBDecfuc,540
19
+ txt2detection/config/detection_languages.yaml,sha256=dgQUJPxhDRJ_IiFEFOiH0yhEer3SkFSIhY4pS3BsX2c,287
20
+ txt2detection-1.0.10.dist-info/METADATA,sha256=CHTRZrV_v6gfyAyEW6hfNaQutVpSv5yM7w084u_x7U4,15870
21
+ txt2detection-1.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ txt2detection-1.0.10.dist-info/entry_points.txt,sha256=ep_rLlS2r1-kKE7S3iKf3SVwbCU9-FZhU9zUebitw7A,62
23
+ txt2detection-1.0.10.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
24
+ txt2detection-1.0.10.dist-info/RECORD,,