stix2arango 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stix2arango might be problematic. Click here for more details.

@@ -16,12 +16,14 @@ from tqdm import tqdm
16
16
  from ..services.arangodb_service import ArangoDBService
17
17
  from jsonschema import validate
18
18
  from arango.collection import StandardCollection
19
+ import arango.exceptions
19
20
 
20
21
 
21
22
  from .. import utils
22
23
 
23
24
  module_logger = logging.getLogger("data_ingestion_service")
24
25
  SMO_TYPES = ["marking-definition", "extension-definition", "language-content"]
26
+ LARGE_FILE_SIZE = 80 * 1024 * 1024
25
27
 
26
28
 
27
29
  class Stix2Arango:
@@ -52,9 +54,9 @@ class Stix2Arango:
52
54
  """
53
55
  `modify_fn` should modify in-place, returned value is discarded
54
56
  """
55
-
57
+
56
58
  self.alter_functions = []
57
-
59
+
58
60
  self.core_collection_vertex, self.core_collection_edge = (
59
61
  utils.get_vertex_and_edge_collection_names(collection)
60
62
  )
@@ -76,7 +78,7 @@ class Stix2Arango:
76
78
  self.arangodb_extra_data = {}
77
79
 
78
80
  self.file = file
79
- self.is_large_file = is_large_file
81
+ self._is_large_file = is_large_file
80
82
  self.note = stix2arango_note or ""
81
83
  self.identity_ref = utils.load_file_from_url(config.STIX2ARANGO_IDENTITY)
82
84
  self.default_ref_objects = [
@@ -98,6 +100,10 @@ class Stix2Arango:
98
100
  if self.file:
99
101
  self.filename = Path(self.file).name
100
102
 
103
+ @property
104
+ def is_large_file(self):
105
+ return self._is_large_file or os.path.getsize(self.file) > LARGE_FILE_SIZE
106
+
101
107
  def alter_objects(self, objects: list[dict]):
102
108
  for obj in objects:
103
109
  obj.update(self.arangodb_extra_data)
@@ -106,7 +112,9 @@ class Stix2Arango:
106
112
  fn(obj)
107
113
  except Exception as e:
108
114
  logging.warning(f"alter function {fn} failed on {obj}")
109
- logging.warning(f"alter function {fn} failed on {obj}", exc_info=True)
115
+ logging.warning(
116
+ f"alter function {fn} failed on {obj}", exc_info=True
117
+ )
110
118
 
111
119
  def add_object_alter_fn(self, modify_fn):
112
120
  if not callable(modify_fn):
@@ -157,16 +165,37 @@ class Stix2Arango:
157
165
  )
158
166
  )
159
167
 
168
+ def create_analyzer(self, *args, **kwargs):
169
+ try:
170
+ return self.arango.db.create_analyzer(*args, **kwargs)
171
+ except arango.exceptions.AnalyzerCreateError as e:
172
+ if e.error_code != 10:
173
+ raise
174
+
160
175
  def create_taxii_views(self):
161
176
  views = set()
177
+ self.create_analyzer(
178
+ name="date_transform",
179
+ analyzer_type="aql",
180
+ features=[],
181
+ properties={
182
+ "queryString": "RETURN DATE_TIMESTAMP(@param)*1000 + TO_NUMBER(LAST(REGEX_MATCHES(@param, '.+\\\\.(\\\\d{6}).*')))%1000",
183
+ "collapsePositions": False,
184
+ "keepNull": True,
185
+ "batchSize": 1000,
186
+ "memoryLimit": 10485760,
187
+ "returnType": "number",
188
+ },
189
+ )
162
190
  for name, collection in self.arango.collections.items():
191
+ logging.info(f'creating taxii index for {name}')
163
192
  collection.add_index(
164
193
  dict(
165
194
  type="inverted",
166
195
  name="taxii_search",
167
196
  sparse=True,
168
197
  fields=[
169
- "_record_created",
198
+ dict(name="_record_created", analyzer="date_transform"),
170
199
  "modified",
171
200
  "id",
172
201
  "_taxii.visible",
@@ -182,7 +211,12 @@ class Stix2Arango:
182
211
  },
183
212
  )
184
213
  )
185
- views.add('ats__' + name.removesuffix('_vertex_collection').removesuffix('_edge_collection'))
214
+ views.add(
215
+ "ats__"
216
+ + name.removesuffix("_vertex_collection").removesuffix(
217
+ "_edge_collection"
218
+ )
219
+ )
186
220
 
187
221
  def create_default_indexes(self):
188
222
  for name, collection in self.arango.collections.items():
@@ -354,17 +388,23 @@ class Stix2Arango:
354
388
  module_logger.info(
355
389
  f"Inserting objects into database. Total objects: {len(objects)}"
356
390
  )
357
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
391
+ with self.arango.transactional(
392
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
393
+ ):
358
394
  inserted_object_ids, existing_objects = (
359
395
  self.arango.insert_several_objects_chunked(
360
396
  objects, self.core_collection_vertex
361
397
  )
362
398
  )
363
399
  deprecated_key_ids = self.arango.update_is_latest_several_chunked(
364
- inserted_object_ids, self.core_collection_vertex, self.core_collection_edge
400
+ inserted_object_ids,
401
+ self.core_collection_vertex,
402
+ self.core_collection_edge,
365
403
  )
366
404
 
367
- self.update_object_key_mapping(self.core_collection_vertex, objects, existing_objects)
405
+ self.update_object_key_mapping(
406
+ self.core_collection_vertex, objects, existing_objects
407
+ )
368
408
  return inserted_object_ids, existing_objects, deprecated_key_ids
369
409
 
370
410
  def update_object_key_mapping(self, collection, objects, existing_objects={}):
@@ -409,16 +449,22 @@ class Stix2Arango:
409
449
  module_logger.info(
410
450
  f"Inserting relationship into database. Total objects: {len(objects)}"
411
451
  )
412
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
452
+ with self.arango.transactional(
453
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
454
+ ):
413
455
  inserted_object_ids, existing_objects = (
414
456
  self.arango.insert_relationships_chunked(
415
457
  objects, self.object_key_mapping, self.core_collection_edge
416
458
  )
417
459
  )
418
460
  deprecated_key_ids = self.arango.update_is_latest_several_chunked(
419
- inserted_object_ids, self.core_collection_edge, self.core_collection_edge
461
+ inserted_object_ids,
462
+ self.core_collection_edge,
463
+ self.core_collection_edge,
420
464
  )
421
- self.update_object_key_mapping(self.core_collection_edge, objects, existing_objects)
465
+ self.update_object_key_mapping(
466
+ self.core_collection_edge, objects, existing_objects
467
+ )
422
468
  return inserted_object_ids, deprecated_key_ids
423
469
 
424
470
  def map_embedded_relationships(self, bundle_objects, inserted_object_ids):
@@ -441,7 +487,7 @@ class Stix2Arango:
441
487
  targets=targets,
442
488
  relationship=ref_type,
443
489
  arango_obj=self,
444
- bundle_id=self.bundle_id or '',
490
+ bundle_id=self.bundle_id or "",
445
491
  insert_statement=objects,
446
492
  extra_data=self.arangodb_extra_data,
447
493
  )
@@ -451,15 +497,20 @@ class Stix2Arango:
451
497
  )
452
498
 
453
499
  self.alter_objects(objects)
454
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
455
- inserted_object_ids, existing_objects = (
456
- self.arango.insert_relationships_chunked(
457
- objects, self.object_key_mapping, self.core_collection_edge
500
+ inserted_object_ids = []
501
+ existing_objects = {}
502
+ for chunk in utils.chunked(objects, 20_000):
503
+ with self.arango.transactional(
504
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
505
+ ):
506
+ inserted, existing = self.arango.insert_relationships_chunked(
507
+ chunk, self.object_key_mapping, self.core_collection_edge
458
508
  )
459
- )
460
- self.arango.update_is_latest_several_chunked(
461
- inserted_object_ids, self.core_collection_edge, self.core_collection_edge
462
- )
509
+ inserted_object_ids.extend(inserted)
510
+ existing_objects.update(existing)
511
+ self.arango.update_is_latest_several_chunked(
512
+ inserted_object_ids, self.core_collection_edge, self.core_collection_edge
513
+ )
463
514
  return inserted_object_ids, existing_objects
464
515
 
465
516
  def import_default_objects(self):
@@ -536,8 +587,9 @@ class Stix2Arango:
536
587
  all_objects, inserted_object_ids + inserted_relationship_ids
537
588
  )
538
589
 
539
-
540
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
590
+ with self.arango.transactional(
591
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
592
+ ):
541
593
  self.arango.deprecate_relationships(
542
594
  deprecated_key_ids1, self.core_collection_edge
543
595
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stix2arango
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: stix2arango is a command line tool that takes a group of STIX 2.1 objects in a bundle and inserts them into ArangoDB. It can also handle updates to existing objects in ArangoDB imported in a bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/stix2arango
6
6
  Project-URL: Issues, https://github.com/muchdogesec/stix2arango/issues
@@ -7,10 +7,10 @@ stix2arango/services/arangodb_service.py,sha256=mXBQUeKR9IHErmvuZ73AiASZKawZOpuR
7
7
  stix2arango/services/version_annotator.py,sha256=Sd1MIaXzK0fpNopNxRoB_3etodzAjX5D_p3uGQSWzOI,2946
8
8
  stix2arango/stix2arango/__init__.py,sha256=OqxWEEsHqR1QQpznM5DbFJ5bO5numKYtoYhjXYJMEyg,36
9
9
  stix2arango/stix2arango/bundle_loader.py,sha256=qi-0E_bMIMPZXzISvjhrWX8K-f7iFv9vOekldOGVczU,4603
10
- stix2arango/stix2arango/stix2arango.py,sha256=AWObgUYS1rc1XpR1qgH1PhBMO_kQg2JcqlVzw08_fzY,20559
10
+ stix2arango/stix2arango/stix2arango.py,sha256=PyrDzW8YuKzibpMA0UImwpDBf1BeiP5pUz5tE_nVwG4,22073
11
11
  stix2arango/templates/marking-definition.json,sha256=0q9y35mUmiF6xIWSLpkATL4JTHGSCNyLbejqZiQ0AuE,3113
12
- stix2arango-1.0.1.dist-info/METADATA,sha256=JWUiWOqhJXzl6bOaLtR2LZYHVOR52eYnTx1o9Z-uAIQ,6873
13
- stix2arango-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- stix2arango-1.0.1.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
- stix2arango-1.0.1.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
- stix2arango-1.0.1.dist-info/RECORD,,
12
+ stix2arango-1.0.3.dist-info/METADATA,sha256=Y_g0HMF6JKVOSw1JJz0d9n8sSH6Vtkbm9kwoh_NSTjA,6873
13
+ stix2arango-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ stix2arango-1.0.3.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
+ stix2arango-1.0.3.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
+ stix2arango-1.0.3.dist-info/RECORD,,