stix2arango 1.0.2__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stix2arango might be problematic. Click here for more details.

@@ -220,7 +220,8 @@ class ArangoDBService:
220
220
  )
221
221
  out = [dict(zip(('id', '_key', 'modified', '_record_modified', '_is_latest', '_id'), obj_tuple)) for obj_tuple in out]
222
222
  annotated, deprecated = annotate_versions(out)
223
- self.db.collection(collection_name).update_many(annotated, sync=True, keep_none=False)
223
+ for chunk in utils.chunked(annotated, 10_000):
224
+ self.db.collection(collection_name).update_many(chunk, sync=True, keep_none=False)
224
225
  return deprecated
225
226
 
226
227
 
@@ -297,8 +298,9 @@ class ArangoDBService:
297
298
  @contextlib.contextmanager
298
299
  def transactional(self, write=None, exclusive=None, sync=True):
299
300
  original_db = self.db
300
- transactional_db = self.db.begin_transaction(allow_implicit=True, write=write, exclusive=exclusive, sync=sync)
301
+ transactional_db = self.db.begin_transaction(allow_implicit=True, write=write, exclusive=exclusive, sync=sync, lock_timeout=300)
301
302
  try:
303
+ logging.info(f"entering transaction: {transactional_db.transaction_status()}")
302
304
  self.db = transactional_db
303
305
  yield self
304
306
  transactional_db.commit_transaction()
@@ -306,4 +308,5 @@ class ArangoDBService:
306
308
  transactional_db.abort_transaction()
307
309
  raise
308
310
  finally:
311
+ logging.info(f"exiting transaction: {transactional_db.transaction_status()}")
309
312
  self.db = original_db
@@ -16,12 +16,14 @@ from tqdm import tqdm
16
16
  from ..services.arangodb_service import ArangoDBService
17
17
  from jsonschema import validate
18
18
  from arango.collection import StandardCollection
19
+ import arango.exceptions
19
20
 
20
21
 
21
22
  from .. import utils
22
23
 
23
24
  module_logger = logging.getLogger("data_ingestion_service")
24
25
  SMO_TYPES = ["marking-definition", "extension-definition", "language-content"]
26
+ LARGE_FILE_SIZE = 80 * 1024 * 1024
25
27
 
26
28
 
27
29
  class Stix2Arango:
@@ -52,9 +54,9 @@ class Stix2Arango:
52
54
  """
53
55
  `modify_fn` should modify in-place, returned value is discarded
54
56
  """
55
-
57
+
56
58
  self.alter_functions = []
57
-
59
+
58
60
  self.core_collection_vertex, self.core_collection_edge = (
59
61
  utils.get_vertex_and_edge_collection_names(collection)
60
62
  )
@@ -76,7 +78,7 @@ class Stix2Arango:
76
78
  self.arangodb_extra_data = {}
77
79
 
78
80
  self.file = file
79
- self.is_large_file = is_large_file
81
+ self._is_large_file = is_large_file
80
82
  self.note = stix2arango_note or ""
81
83
  self.identity_ref = utils.load_file_from_url(config.STIX2ARANGO_IDENTITY)
82
84
  self.default_ref_objects = [
@@ -98,6 +100,10 @@ class Stix2Arango:
98
100
  if self.file:
99
101
  self.filename = Path(self.file).name
100
102
 
103
+ @property
104
+ def is_large_file(self):
105
+ return self._is_large_file or os.path.getsize(self.file) > LARGE_FILE_SIZE
106
+
101
107
  def alter_objects(self, objects: list[dict]):
102
108
  for obj in objects:
103
109
  obj.update(self.arangodb_extra_data)
@@ -106,7 +112,9 @@ class Stix2Arango:
106
112
  fn(obj)
107
113
  except Exception as e:
108
114
  logging.warning(f"alter function {fn} failed on {obj}")
109
- logging.warning(f"alter function {fn} failed on {obj}", exc_info=True)
115
+ logging.warning(
116
+ f"alter function {fn} failed on {obj}", exc_info=True
117
+ )
110
118
 
111
119
  def add_object_alter_fn(self, modify_fn):
112
120
  if not callable(modify_fn):
@@ -157,16 +165,37 @@ class Stix2Arango:
157
165
  )
158
166
  )
159
167
 
168
+ def create_analyzer(self, *args, **kwargs):
169
+ try:
170
+ return self.arango.db.create_analyzer(*args, **kwargs)
171
+ except arango.exceptions.AnalyzerCreateError as e:
172
+ if e.error_code != 10:
173
+ raise
174
+
160
175
  def create_taxii_views(self):
161
176
  views = set()
177
+ self.create_analyzer(
178
+ name="date_transform",
179
+ analyzer_type="aql",
180
+ features=[],
181
+ properties={
182
+ "queryString": "RETURN DATE_TIMESTAMP(@param)*1000 + TO_NUMBER(LAST(REGEX_MATCHES(@param, '.+\\\\.(\\\\d{6}).*')))%1000",
183
+ "collapsePositions": False,
184
+ "keepNull": True,
185
+ "batchSize": 1000,
186
+ "memoryLimit": 10485760,
187
+ "returnType": "number",
188
+ },
189
+ )
162
190
  for name, collection in self.arango.collections.items():
191
+ logging.info(f'creating taxii index for {name}')
163
192
  collection.add_index(
164
193
  dict(
165
194
  type="inverted",
166
195
  name="taxii_search",
167
196
  sparse=True,
168
197
  fields=[
169
- "_record_created",
198
+ dict(name="_record_created", analyzer="date_transform"),
170
199
  "modified",
171
200
  "id",
172
201
  "_taxii.visible",
@@ -182,7 +211,12 @@ class Stix2Arango:
182
211
  },
183
212
  )
184
213
  )
185
- views.add('ats__' + name.removesuffix('_vertex_collection').removesuffix('_edge_collection'))
214
+ views.add(
215
+ "ats__"
216
+ + name.removesuffix("_vertex_collection").removesuffix(
217
+ "_edge_collection"
218
+ )
219
+ )
186
220
 
187
221
  def create_default_indexes(self):
188
222
  for name, collection in self.arango.collections.items():
@@ -354,17 +388,23 @@ class Stix2Arango:
354
388
  module_logger.info(
355
389
  f"Inserting objects into database. Total objects: {len(objects)}"
356
390
  )
357
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
391
+ with self.arango.transactional(
392
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
393
+ ):
358
394
  inserted_object_ids, existing_objects = (
359
395
  self.arango.insert_several_objects_chunked(
360
396
  objects, self.core_collection_vertex
361
397
  )
362
398
  )
363
399
  deprecated_key_ids = self.arango.update_is_latest_several_chunked(
364
- inserted_object_ids, self.core_collection_vertex, self.core_collection_edge
400
+ inserted_object_ids,
401
+ self.core_collection_vertex,
402
+ self.core_collection_edge,
365
403
  )
366
404
 
367
- self.update_object_key_mapping(self.core_collection_vertex, objects, existing_objects)
405
+ self.update_object_key_mapping(
406
+ self.core_collection_vertex, objects, existing_objects
407
+ )
368
408
  return inserted_object_ids, existing_objects, deprecated_key_ids
369
409
 
370
410
  def update_object_key_mapping(self, collection, objects, existing_objects={}):
@@ -409,16 +449,22 @@ class Stix2Arango:
409
449
  module_logger.info(
410
450
  f"Inserting relationship into database. Total objects: {len(objects)}"
411
451
  )
412
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
452
+ with self.arango.transactional(
453
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
454
+ ):
413
455
  inserted_object_ids, existing_objects = (
414
456
  self.arango.insert_relationships_chunked(
415
457
  objects, self.object_key_mapping, self.core_collection_edge
416
458
  )
417
459
  )
418
460
  deprecated_key_ids = self.arango.update_is_latest_several_chunked(
419
- inserted_object_ids, self.core_collection_edge, self.core_collection_edge
461
+ inserted_object_ids,
462
+ self.core_collection_edge,
463
+ self.core_collection_edge,
420
464
  )
421
- self.update_object_key_mapping(self.core_collection_edge, objects, existing_objects)
465
+ self.update_object_key_mapping(
466
+ self.core_collection_edge, objects, existing_objects
467
+ )
422
468
  return inserted_object_ids, deprecated_key_ids
423
469
 
424
470
  def map_embedded_relationships(self, bundle_objects, inserted_object_ids):
@@ -441,7 +487,7 @@ class Stix2Arango:
441
487
  targets=targets,
442
488
  relationship=ref_type,
443
489
  arango_obj=self,
444
- bundle_id=self.bundle_id or '',
490
+ bundle_id=self.bundle_id or "",
445
491
  insert_statement=objects,
446
492
  extra_data=self.arangodb_extra_data,
447
493
  )
@@ -454,11 +500,11 @@ class Stix2Arango:
454
500
  inserted_object_ids = []
455
501
  existing_objects = {}
456
502
  for chunk in utils.chunked(objects, 20_000):
457
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
458
- inserted, existing = (
459
- self.arango.insert_relationships_chunked(
460
- chunk, self.object_key_mapping, self.core_collection_edge
461
- )
503
+ with self.arango.transactional(
504
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
505
+ ):
506
+ inserted, existing = self.arango.insert_relationships_chunked(
507
+ chunk, self.object_key_mapping, self.core_collection_edge
462
508
  )
463
509
  inserted_object_ids.extend(inserted)
464
510
  existing_objects.update(existing)
@@ -541,8 +587,9 @@ class Stix2Arango:
541
587
  all_objects, inserted_object_ids + inserted_relationship_ids
542
588
  )
543
589
 
544
-
545
- with self.arango.transactional(exclusive=[self.core_collection_edge, self.core_collection_vertex]):
590
+ with self.arango.transactional(
591
+ exclusive=[self.core_collection_edge, self.core_collection_vertex]
592
+ ):
546
593
  self.arango.deprecate_relationships(
547
594
  deprecated_key_ids1, self.core_collection_edge
548
595
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stix2arango
3
- Version: 1.0.2
3
+ Version: 1.1.0
4
4
  Summary: stix2arango is a command line tool that takes a group of STIX 2.1 objects in a bundle and inserts them into ArangoDB. It can also handle updates to existing objects in ArangoDB imported in a bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/stix2arango
6
6
  Project-URL: Issues, https://github.com/muchdogesec/stix2arango/issues
@@ -3,14 +3,14 @@ stix2arango/__main__.py,sha256=wbR_iO70Vld2NYiml6Kz4rH396uOiNwTtjNBl4AHZEg,1987
3
3
  stix2arango/config.py,sha256=NZFrcnEfz-0QBrut2Rh7xMF78v0bk6U6y2TY_7mHxSs,1407
4
4
  stix2arango/utils.py,sha256=heln_kXBSTLo0R4n1IrfCHbRnCHTQf9Y2URhvwQ0Smc,4845
5
5
  stix2arango/services/__init__.py,sha256=E87fB-dxI4mPxMVs00jdLhjp9jFhkVfjhMKIqGLRJlY,45
6
- stix2arango/services/arangodb_service.py,sha256=mXBQUeKR9IHErmvuZ73AiASZKawZOpuRiJQXWaN8bx8,11706
6
+ stix2arango/services/arangodb_service.py,sha256=qgE7yb-ysqyJ1oUjqAV6eGa53WvWHAFAd6I7aKZ6KyU,11960
7
7
  stix2arango/services/version_annotator.py,sha256=Sd1MIaXzK0fpNopNxRoB_3etodzAjX5D_p3uGQSWzOI,2946
8
8
  stix2arango/stix2arango/__init__.py,sha256=OqxWEEsHqR1QQpznM5DbFJ5bO5numKYtoYhjXYJMEyg,36
9
9
  stix2arango/stix2arango/bundle_loader.py,sha256=qi-0E_bMIMPZXzISvjhrWX8K-f7iFv9vOekldOGVczU,4603
10
- stix2arango/stix2arango/stix2arango.py,sha256=UVv1OVy_BzqXYN9cWzjKK3QUSRuS-7jf3n_IgGQwx_8,20769
10
+ stix2arango/stix2arango/stix2arango.py,sha256=PyrDzW8YuKzibpMA0UImwpDBf1BeiP5pUz5tE_nVwG4,22073
11
11
  stix2arango/templates/marking-definition.json,sha256=0q9y35mUmiF6xIWSLpkATL4JTHGSCNyLbejqZiQ0AuE,3113
12
- stix2arango-1.0.2.dist-info/METADATA,sha256=uuH8tlqansoJvxNkqCxf4h_h1MteaRUQIlSQQG2DNOA,6873
13
- stix2arango-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- stix2arango-1.0.2.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
- stix2arango-1.0.2.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
- stix2arango-1.0.2.dist-info/RECORD,,
12
+ stix2arango-1.1.0.dist-info/METADATA,sha256=DHq9KWi0BV3juGOP-JLh5y9rxB-htxKu_l4vrw19fjg,6873
13
+ stix2arango-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ stix2arango-1.1.0.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
+ stix2arango-1.1.0.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
+ stix2arango-1.1.0.dist-info/RECORD,,