stix2arango 1.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stix2arango/__init__.py +0 -0
- stix2arango/__main__.py +81 -0
- stix2arango/config.py +41 -0
- stix2arango/services/__init__.py +1 -0
- stix2arango/services/arangodb_service.py +313 -0
- stix2arango/services/version_annotator.py +95 -0
- stix2arango/stix2arango/__init__.py +1 -0
- stix2arango/stix2arango/bundle_loader.py +143 -0
- stix2arango/stix2arango/stix2arango.py +601 -0
- stix2arango/templates/marking-definition.json +111 -0
- stix2arango/utils.py +150 -0
- stix2arango-1.1.10.dist-info/METADATA +171 -0
- stix2arango-1.1.10.dist-info/RECORD +16 -0
- stix2arango-1.1.10.dist-info/WHEEL +4 -0
- stix2arango-1.1.10.dist-info/entry_points.txt +2 -0
- stix2arango-1.1.10.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,601 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import pkgutil
|
|
8
|
+
import re
|
|
9
|
+
import time
|
|
10
|
+
import uuid
|
|
11
|
+
|
|
12
|
+
from .bundle_loader import BundleLoader
|
|
13
|
+
|
|
14
|
+
from .. import config
|
|
15
|
+
from tqdm import tqdm
|
|
16
|
+
from ..services.arangodb_service import ArangoDBService
|
|
17
|
+
from jsonschema import validate
|
|
18
|
+
from arango.collection import StandardCollection
|
|
19
|
+
import arango.exceptions
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
from .. import utils
|
|
23
|
+
|
|
24
|
+
module_logger = logging.getLogger("data_ingestion_service")
|
|
25
|
+
SMO_TYPES = ["marking-definition", "extension-definition", "language-content"]
|
|
26
|
+
LARGE_FILE_SIZE = 80 * 1024 * 1024
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Stix2Arango:
|
|
30
|
+
EMBEDDED_RELATIONSHIP_RE = re.compile(r"([a-z\-_]+)[_\-]refs{0,1}")
|
|
31
|
+
filename = "bundle.json"
|
|
32
|
+
ARANGODB_URL = f"http://{config.ARANGODB_HOST}:{config.ARANGODB_PORT}"
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
database,
|
|
37
|
+
collection,
|
|
38
|
+
file,
|
|
39
|
+
create_collection=True,
|
|
40
|
+
create_db=True,
|
|
41
|
+
stix2arango_note="",
|
|
42
|
+
ignore_embedded_relationships=False,
|
|
43
|
+
ignore_embedded_relationships_sro=True,
|
|
44
|
+
ignore_embedded_relationships_smo=True,
|
|
45
|
+
include_embedded_relationships_attributes=None,
|
|
46
|
+
bundle_id=None,
|
|
47
|
+
username=config.ARANGODB_USERNAME,
|
|
48
|
+
password=config.ARANGODB_PASSWORD,
|
|
49
|
+
host_url=ARANGODB_URL,
|
|
50
|
+
is_large_file=False,
|
|
51
|
+
skip_default_indexes=False,
|
|
52
|
+
create_taxii_views=True,
|
|
53
|
+
**kwargs,
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
`modify_fn` should modify in-place, returned value is discarded
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
self.alter_functions = []
|
|
60
|
+
|
|
61
|
+
self.core_collection_vertex, self.core_collection_edge = (
|
|
62
|
+
utils.get_vertex_and_edge_collection_names(collection)
|
|
63
|
+
)
|
|
64
|
+
EDGE_COLLECTIONS = [self.core_collection_edge]
|
|
65
|
+
VERTEX_COLLECTIONS = [self.core_collection_vertex]
|
|
66
|
+
|
|
67
|
+
self.arango = ArangoDBService(
|
|
68
|
+
database,
|
|
69
|
+
VERTEX_COLLECTIONS,
|
|
70
|
+
EDGE_COLLECTIONS,
|
|
71
|
+
create=create_collection,
|
|
72
|
+
create_db=create_db,
|
|
73
|
+
username=username,
|
|
74
|
+
password=password,
|
|
75
|
+
host_url=host_url,
|
|
76
|
+
**kwargs,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
self.arangodb_extra_data = {}
|
|
80
|
+
|
|
81
|
+
self.file = file
|
|
82
|
+
self._is_large_file = is_large_file
|
|
83
|
+
self.note = stix2arango_note or ""
|
|
84
|
+
self.identity_ref = utils.load_file_from_url(config.STIX2ARANGO_IDENTITY).copy()
|
|
85
|
+
self.default_ref_objects = [
|
|
86
|
+
utils.load_file_from_url(link).copy()
|
|
87
|
+
for link in config.MARKING_DEFINITION_REFS + config.IDENTITY_REFS
|
|
88
|
+
]
|
|
89
|
+
self.bundle_id = bundle_id
|
|
90
|
+
self.ignore_embedded_relationships = ignore_embedded_relationships
|
|
91
|
+
self.ignore_embedded_relationships_smo = ignore_embedded_relationships_smo
|
|
92
|
+
self.ignore_embedded_relationships_sro = ignore_embedded_relationships_sro
|
|
93
|
+
self.include_embedded_relationships_attributes = include_embedded_relationships_attributes
|
|
94
|
+
self.object_key_mapping = {}
|
|
95
|
+
if create_collection:
|
|
96
|
+
self.create_s2a_indexes()
|
|
97
|
+
if not skip_default_indexes:
|
|
98
|
+
self.create_default_indexes()
|
|
99
|
+
if create_taxii_views:
|
|
100
|
+
self.create_taxii_views()
|
|
101
|
+
|
|
102
|
+
if self.file:
|
|
103
|
+
self.filename = Path(self.file).name
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def is_large_file(self):
|
|
107
|
+
return self._is_large_file or os.path.getsize(self.file) > LARGE_FILE_SIZE
|
|
108
|
+
|
|
109
|
+
def alter_objects(self, objects: list[dict]):
|
|
110
|
+
for obj in objects:
|
|
111
|
+
obj.update(self.arangodb_extra_data)
|
|
112
|
+
for fn in self.alter_functions:
|
|
113
|
+
try:
|
|
114
|
+
fn(obj)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logging.warning(f"alter function {fn} failed on {obj}")
|
|
117
|
+
logging.warning(
|
|
118
|
+
f"alter function {fn} failed on {obj}", exc_info=True
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def add_object_alter_fn(self, modify_fn):
|
|
122
|
+
if not callable(modify_fn):
|
|
123
|
+
raise ValueError("Bad modification function passed")
|
|
124
|
+
self.alter_functions.append(modify_fn)
|
|
125
|
+
|
|
126
|
+
def create_s2a_indexes(self):
|
|
127
|
+
for name, collection in self.arango.collections.items():
|
|
128
|
+
collection.add_index(
|
|
129
|
+
dict(
|
|
130
|
+
type="persistent",
|
|
131
|
+
name="s2a_search",
|
|
132
|
+
sparse=False,
|
|
133
|
+
fields=[
|
|
134
|
+
"id",
|
|
135
|
+
"modified",
|
|
136
|
+
"_is_latest",
|
|
137
|
+
],
|
|
138
|
+
inBackground=True,
|
|
139
|
+
storedValues=["_record_modified", "_key", "_id"],
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
collection.add_index(
|
|
144
|
+
dict(
|
|
145
|
+
type="persistent",
|
|
146
|
+
name="s2a_unique_constraint",
|
|
147
|
+
unique=True,
|
|
148
|
+
fields=[
|
|
149
|
+
"id",
|
|
150
|
+
"_record_md5_hash",
|
|
151
|
+
],
|
|
152
|
+
inBackground=True,
|
|
153
|
+
)
|
|
154
|
+
)
|
|
155
|
+
if name.endswith("_edge_collection"):
|
|
156
|
+
collection.add_index(
|
|
157
|
+
dict(
|
|
158
|
+
type="persistent",
|
|
159
|
+
name="s2a_search_edge",
|
|
160
|
+
sparse=True,
|
|
161
|
+
fields=[
|
|
162
|
+
"_from",
|
|
163
|
+
"_is_latest",
|
|
164
|
+
],
|
|
165
|
+
inBackground=True,
|
|
166
|
+
storedValues=["_id"],
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def create_analyzer(self, *args, **kwargs):
|
|
171
|
+
try:
|
|
172
|
+
return self.arango.db.create_analyzer(*args, **kwargs)
|
|
173
|
+
except arango.exceptions.AnalyzerCreateError as e:
|
|
174
|
+
if e.error_code != 10:
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
def create_taxii_views(self):
|
|
178
|
+
views = set()
|
|
179
|
+
self.create_analyzer(
|
|
180
|
+
name="date_transform",
|
|
181
|
+
analyzer_type="aql",
|
|
182
|
+
features=[],
|
|
183
|
+
properties={
|
|
184
|
+
"queryString": "RETURN DATE_TIMESTAMP(@param)*1000 + TO_NUMBER(LAST(REGEX_MATCHES(@param, '.+\\\\.(\\\\d{6}).*')))%1000",
|
|
185
|
+
"collapsePositions": False,
|
|
186
|
+
"keepNull": True,
|
|
187
|
+
"batchSize": 1000,
|
|
188
|
+
"memoryLimit": 10485760,
|
|
189
|
+
"returnType": "number",
|
|
190
|
+
},
|
|
191
|
+
)
|
|
192
|
+
for name, collection in self.arango.collections.items():
|
|
193
|
+
logging.info(f'creating taxii index for {name}')
|
|
194
|
+
collection.add_index(
|
|
195
|
+
dict(
|
|
196
|
+
type="inverted",
|
|
197
|
+
name="taxii_search",
|
|
198
|
+
sparse=True,
|
|
199
|
+
fields=[
|
|
200
|
+
dict(name="_record_created", analyzer="date_transform"),
|
|
201
|
+
"modified",
|
|
202
|
+
"id",
|
|
203
|
+
"_taxii.visible",
|
|
204
|
+
"_taxii.last",
|
|
205
|
+
"_taxii.first",
|
|
206
|
+
"spec_version",
|
|
207
|
+
"type",
|
|
208
|
+
],
|
|
209
|
+
inBackground=True,
|
|
210
|
+
storedValues=["_key", "_created"],
|
|
211
|
+
primarySort={
|
|
212
|
+
"fields": [{"field": "_record_created", "direction": "asc"}]
|
|
213
|
+
},
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
views.add(
|
|
217
|
+
"ats__"
|
|
218
|
+
+ name.removesuffix("_vertex_collection").removesuffix(
|
|
219
|
+
"_edge_collection"
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def create_default_indexes(self):
|
|
224
|
+
for name, collection in self.arango.collections.items():
|
|
225
|
+
module_logger.info(
|
|
226
|
+
f"creating indexes for collection {collection.db_name}/{name}"
|
|
227
|
+
)
|
|
228
|
+
time = int(datetime.now().timestamp())
|
|
229
|
+
|
|
230
|
+
collection.add_index(
|
|
231
|
+
dict(
|
|
232
|
+
type="persistent",
|
|
233
|
+
fields=["id"],
|
|
234
|
+
storedValues=[
|
|
235
|
+
"modified",
|
|
236
|
+
"created",
|
|
237
|
+
"type",
|
|
238
|
+
"_record_modified",
|
|
239
|
+
"spec_version",
|
|
240
|
+
"_record_md5_hash",
|
|
241
|
+
],
|
|
242
|
+
inBackground=True,
|
|
243
|
+
name=f"by_stix_id_{time}",
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
collection.add_index(
|
|
247
|
+
dict(
|
|
248
|
+
type="persistent",
|
|
249
|
+
fields=["id", "type"],
|
|
250
|
+
storedValues=[
|
|
251
|
+
"modified",
|
|
252
|
+
"created",
|
|
253
|
+
"_record_modified",
|
|
254
|
+
"spec_version",
|
|
255
|
+
"_record_md5_hash",
|
|
256
|
+
],
|
|
257
|
+
inBackground=True,
|
|
258
|
+
name=f"by_stix_id_type_{time}",
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
collection.add_index(
|
|
262
|
+
dict(
|
|
263
|
+
type="persistent",
|
|
264
|
+
fields=["modified", "created"],
|
|
265
|
+
storedValues=[
|
|
266
|
+
"type",
|
|
267
|
+
"_record_modified",
|
|
268
|
+
"id",
|
|
269
|
+
"spec_version",
|
|
270
|
+
"_record_md5_hash",
|
|
271
|
+
],
|
|
272
|
+
inBackground=True,
|
|
273
|
+
name=f"by_stix_version_{time}",
|
|
274
|
+
)
|
|
275
|
+
)
|
|
276
|
+
collection.add_index(
|
|
277
|
+
dict(
|
|
278
|
+
type="persistent",
|
|
279
|
+
fields=["type"],
|
|
280
|
+
storedValues=[
|
|
281
|
+
"modified",
|
|
282
|
+
"created",
|
|
283
|
+
"_record_modified",
|
|
284
|
+
"id",
|
|
285
|
+
"spec_version",
|
|
286
|
+
"_record_md5_hash",
|
|
287
|
+
],
|
|
288
|
+
inBackground=True,
|
|
289
|
+
name=f"by_stix_type_{time}",
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
collection.add_index(
|
|
293
|
+
dict(
|
|
294
|
+
type="persistent",
|
|
295
|
+
fields=["_record_modified", "_record_created"],
|
|
296
|
+
storedValues=[
|
|
297
|
+
"modified",
|
|
298
|
+
"created",
|
|
299
|
+
"type",
|
|
300
|
+
"id",
|
|
301
|
+
"spec_version",
|
|
302
|
+
"_record_md5_hash",
|
|
303
|
+
],
|
|
304
|
+
inBackground=True,
|
|
305
|
+
name=f"by_insertion_time_{time}",
|
|
306
|
+
)
|
|
307
|
+
)
|
|
308
|
+
if name.endswith("_edge_collection"):
|
|
309
|
+
collection.add_index(
|
|
310
|
+
dict(
|
|
311
|
+
type="persistent",
|
|
312
|
+
fields=["source_ref", "target_ref", "relationship_type"],
|
|
313
|
+
storedValues=[
|
|
314
|
+
"modified",
|
|
315
|
+
"created",
|
|
316
|
+
"type",
|
|
317
|
+
"_record_modified",
|
|
318
|
+
"spec_version",
|
|
319
|
+
"_record_md5_hash",
|
|
320
|
+
"id",
|
|
321
|
+
],
|
|
322
|
+
inBackground=True,
|
|
323
|
+
name=f"relation_from_{time}",
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
collection.add_index(
|
|
327
|
+
dict(
|
|
328
|
+
type="persistent",
|
|
329
|
+
fields=["target_ref", "source_ref", "relationship_type"],
|
|
330
|
+
storedValues=[
|
|
331
|
+
"modified",
|
|
332
|
+
"created",
|
|
333
|
+
"type",
|
|
334
|
+
"_record_modified",
|
|
335
|
+
"spec_version",
|
|
336
|
+
"_record_md5_hash",
|
|
337
|
+
"id",
|
|
338
|
+
],
|
|
339
|
+
inBackground=True,
|
|
340
|
+
name=f"relation_to_{time}",
|
|
341
|
+
)
|
|
342
|
+
)
|
|
343
|
+
collection.add_index(
|
|
344
|
+
dict(
|
|
345
|
+
type="persistent",
|
|
346
|
+
fields=["relationship_type", "target_ref", "source_ref"],
|
|
347
|
+
storedValues=[
|
|
348
|
+
"modified",
|
|
349
|
+
"created",
|
|
350
|
+
"type",
|
|
351
|
+
"_record_modified",
|
|
352
|
+
"spec_version",
|
|
353
|
+
"_record_md5_hash",
|
|
354
|
+
"id",
|
|
355
|
+
],
|
|
356
|
+
inBackground=True,
|
|
357
|
+
name=f"relation_type_{time}",
|
|
358
|
+
)
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def default_objects(self):
|
|
362
|
+
object_list = self.default_ref_objects
|
|
363
|
+
for obj in json.loads(
|
|
364
|
+
pkgutil.get_data("stix2arango", "templates/marking-definition.json")
|
|
365
|
+
):
|
|
366
|
+
object_list.append(obj)
|
|
367
|
+
return object_list
|
|
368
|
+
|
|
369
|
+
def process_bundle_into_graph(
|
|
370
|
+
self, objects_in, notes=None, is_default_objects=False
|
|
371
|
+
):
|
|
372
|
+
module_logger.info(f"Reading vertex from file {self.file} now")
|
|
373
|
+
|
|
374
|
+
objects = []
|
|
375
|
+
insert_data = [] # That would be the overall statement
|
|
376
|
+
for obj in tqdm(objects_in, desc="upload_vertices"):
|
|
377
|
+
if obj.get("type") == "relationship":
|
|
378
|
+
continue
|
|
379
|
+
obj.setdefault("_stix2arango_note", notes or self.note)
|
|
380
|
+
obj["_record_md5_hash"] = utils.generate_md5(obj)
|
|
381
|
+
if not is_default_objects:
|
|
382
|
+
obj["_bundle_id"] = self.bundle_id or ""
|
|
383
|
+
obj["_file_name"] = self.filename or ""
|
|
384
|
+
obj.update(self.arangodb_extra_data)
|
|
385
|
+
objects.append(obj)
|
|
386
|
+
insert_data.append(
|
|
387
|
+
[obj.get("type"), obj.get("id"), True if "modified" in obj else False]
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
module_logger.info(
|
|
391
|
+
f"Inserting objects into database. Total objects: {len(objects)}"
|
|
392
|
+
)
|
|
393
|
+
with self.arango.transactional(
|
|
394
|
+
exclusive=[self.core_collection_edge, self.core_collection_vertex]
|
|
395
|
+
):
|
|
396
|
+
inserted_object_ids, existing_objects = (
|
|
397
|
+
self.arango.insert_several_objects_chunked(
|
|
398
|
+
objects, self.core_collection_vertex
|
|
399
|
+
)
|
|
400
|
+
)
|
|
401
|
+
deprecated_key_ids = self.arango.update_is_latest_several_chunked(
|
|
402
|
+
inserted_object_ids,
|
|
403
|
+
self.core_collection_vertex,
|
|
404
|
+
self.core_collection_edge,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
self.update_object_key_mapping(
|
|
408
|
+
self.core_collection_vertex, objects, existing_objects
|
|
409
|
+
)
|
|
410
|
+
return inserted_object_ids, existing_objects, deprecated_key_ids
|
|
411
|
+
|
|
412
|
+
def update_object_key_mapping(self, collection, objects, existing_objects={}):
|
|
413
|
+
for obj in objects:
|
|
414
|
+
if db_key := existing_objects.get(f"{obj['id']};{obj['_record_md5_hash']}"):
|
|
415
|
+
self.object_key_mapping[obj["id"]] = db_key
|
|
416
|
+
else:
|
|
417
|
+
self.object_key_mapping[obj["id"]] = "{collection}/{_key}".format(
|
|
418
|
+
collection=collection,
|
|
419
|
+
_key=obj.get("_key", "not_imported"),
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def map_relationships(self, filename, objects_in):
|
|
423
|
+
|
|
424
|
+
module_logger.info("Mapping Prebuilt Relationship Objects -> ")
|
|
425
|
+
objects = []
|
|
426
|
+
inserted_data = []
|
|
427
|
+
obj: dict
|
|
428
|
+
for obj in tqdm(objects_in, desc="upload_edges"):
|
|
429
|
+
if obj.get("type") == "relationship":
|
|
430
|
+
|
|
431
|
+
source_ref = obj.get("source_ref")
|
|
432
|
+
target_ref = obj.get("target_ref")
|
|
433
|
+
|
|
434
|
+
obj.setdefault("_from", f"{self.core_collection_vertex}/{source_ref}")
|
|
435
|
+
obj.setdefault("_to", f"{self.core_collection_vertex}/{target_ref}")
|
|
436
|
+
obj["_bundle_id"] = self.bundle_id or ""
|
|
437
|
+
obj["_file_name"] = filename
|
|
438
|
+
obj.setdefault("_is_ref", False)
|
|
439
|
+
obj.setdefault("_stix2arango_note", self.note)
|
|
440
|
+
# obj['_record_md5_hash'] = utils.generate_md5(obj)
|
|
441
|
+
obj.update(self.arangodb_extra_data)
|
|
442
|
+
objects.append(obj)
|
|
443
|
+
inserted_data.append(
|
|
444
|
+
[
|
|
445
|
+
obj.get("type"),
|
|
446
|
+
obj.get("id"),
|
|
447
|
+
True if "modified" in obj else False,
|
|
448
|
+
]
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
module_logger.info(
|
|
452
|
+
f"Inserting relationship into database. Total objects: {len(objects)}"
|
|
453
|
+
)
|
|
454
|
+
with self.arango.transactional(
|
|
455
|
+
exclusive=[self.core_collection_edge, self.core_collection_vertex]
|
|
456
|
+
):
|
|
457
|
+
inserted_object_ids, existing_objects = (
|
|
458
|
+
self.arango.insert_relationships_chunked(
|
|
459
|
+
objects, self.object_key_mapping, self.core_collection_edge
|
|
460
|
+
)
|
|
461
|
+
)
|
|
462
|
+
deprecated_key_ids = self.arango.update_is_latest_several_chunked(
|
|
463
|
+
inserted_object_ids,
|
|
464
|
+
self.core_collection_edge,
|
|
465
|
+
self.core_collection_edge,
|
|
466
|
+
)
|
|
467
|
+
self.update_object_key_mapping(
|
|
468
|
+
self.core_collection_edge, objects, existing_objects
|
|
469
|
+
)
|
|
470
|
+
return inserted_object_ids, deprecated_key_ids
|
|
471
|
+
|
|
472
|
+
def map_embedded_relationships(self, bundle_objects, inserted_object_ids):
|
|
473
|
+
objects = []
|
|
474
|
+
for obj in tqdm(bundle_objects, desc="upload_embedded_edges"):
|
|
475
|
+
if obj["id"] not in inserted_object_ids:
|
|
476
|
+
continue
|
|
477
|
+
if self.include_embedded_relationships_attributes:
|
|
478
|
+
pass
|
|
479
|
+
elif (
|
|
480
|
+
self.ignore_embedded_relationships_smo and obj["type"] in SMO_TYPES
|
|
481
|
+
) or (
|
|
482
|
+
self.ignore_embedded_relationships_sro and obj["type"] == "relationship"
|
|
483
|
+
):
|
|
484
|
+
continue
|
|
485
|
+
|
|
486
|
+
for ref_type, targets in utils.get_embedded_refs(obj, attributes=self.include_embedded_relationships_attributes):
|
|
487
|
+
utils.create_relationship_obj(
|
|
488
|
+
obj=obj,
|
|
489
|
+
source=obj.get("id"),
|
|
490
|
+
targets=targets,
|
|
491
|
+
relationship=ref_type,
|
|
492
|
+
arango_obj=self,
|
|
493
|
+
bundle_id=self.bundle_id or "",
|
|
494
|
+
insert_statement=objects,
|
|
495
|
+
extra_data=self.arangodb_extra_data,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
module_logger.info(
|
|
499
|
+
f"Inserting embedded relationship into database. Total objects: {len(objects)}"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
self.alter_objects(objects)
|
|
503
|
+
inserted_object_ids = []
|
|
504
|
+
existing_objects = {}
|
|
505
|
+
for chunk in utils.chunked(objects, 20_000):
|
|
506
|
+
with self.arango.transactional(
|
|
507
|
+
exclusive=[self.core_collection_edge, self.core_collection_vertex]
|
|
508
|
+
):
|
|
509
|
+
inserted, existing = self.arango.insert_relationships_chunked(
|
|
510
|
+
chunk, self.object_key_mapping, self.core_collection_edge
|
|
511
|
+
)
|
|
512
|
+
inserted_object_ids.extend(inserted)
|
|
513
|
+
existing_objects.update(existing)
|
|
514
|
+
self.arango.update_is_latest_several_chunked(
|
|
515
|
+
inserted_object_ids, self.core_collection_edge, self.core_collection_edge
|
|
516
|
+
)
|
|
517
|
+
return inserted_object_ids, existing_objects
|
|
518
|
+
|
|
519
|
+
def import_default_objects(self):
|
|
520
|
+
self.process_bundle_into_graph(
|
|
521
|
+
self.default_objects(),
|
|
522
|
+
notes="automatically imported on collection creation",
|
|
523
|
+
is_default_objects=True,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def run(self, data=None):
|
|
527
|
+
if not data and not self.file:
|
|
528
|
+
raise Exception("file or data must be passed")
|
|
529
|
+
|
|
530
|
+
if not data:
|
|
531
|
+
if self.is_large_file:
|
|
532
|
+
module_logger.info("using large file mode")
|
|
533
|
+
os.makedirs("db_loader_tempfiles", exist_ok=True)
|
|
534
|
+
bundle_loader = BundleLoader(
|
|
535
|
+
self.file, db_path=f"db_loader_tempfiles/mydb_{time.time()}.sqlite"
|
|
536
|
+
)
|
|
537
|
+
for chunk in bundle_loader.chunks:
|
|
538
|
+
self.run_with_bundle(
|
|
539
|
+
{
|
|
540
|
+
"type": "bundle",
|
|
541
|
+
"objects": chunk,
|
|
542
|
+
"id": bundle_loader.bundle_id,
|
|
543
|
+
}
|
|
544
|
+
)
|
|
545
|
+
return
|
|
546
|
+
else:
|
|
547
|
+
module_logger.info("using regular file mode")
|
|
548
|
+
with open(self.file, "r") as input_file:
|
|
549
|
+
file_data = input_file.read()
|
|
550
|
+
try:
|
|
551
|
+
data = json.loads(file_data)
|
|
552
|
+
self.bundle_id = self.bundle_id or data["id"]
|
|
553
|
+
except Exception as e:
|
|
554
|
+
raise Exception("Invalid file type")
|
|
555
|
+
try:
|
|
556
|
+
validate(instance=data, schema=config.json_schema)
|
|
557
|
+
except Exception as e:
|
|
558
|
+
raise Exception("Invalid File structure")
|
|
559
|
+
|
|
560
|
+
self.run_with_bundle(data)
|
|
561
|
+
|
|
562
|
+
def run_with_bundle(self, bundle_dict):
|
|
563
|
+
if bundle_dict.get("type", None) != "bundle":
|
|
564
|
+
raise Exception("Provided file is not a STIX bundle. Aborted")
|
|
565
|
+
|
|
566
|
+
all_objects = bundle_dict["objects"]
|
|
567
|
+
self.alter_objects(all_objects)
|
|
568
|
+
|
|
569
|
+
module_logger.info(
|
|
570
|
+
f"Loading default objects from url and store into {self.core_collection_vertex}"
|
|
571
|
+
)
|
|
572
|
+
self.import_default_objects()
|
|
573
|
+
|
|
574
|
+
module_logger.info(
|
|
575
|
+
f"Load objects from file: {self.file} and store into {self.core_collection_vertex}"
|
|
576
|
+
)
|
|
577
|
+
inserted_object_ids, _, deprecated_key_ids1 = self.process_bundle_into_graph(
|
|
578
|
+
all_objects
|
|
579
|
+
)
|
|
580
|
+
module_logger.info("Mapping relationships now -> ")
|
|
581
|
+
inserted_relationship_ids, deprecated_key_ids2 = self.map_relationships(
|
|
582
|
+
self.filename, all_objects
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
if (not self.ignore_embedded_relationships) or self.include_embedded_relationships_attributes:
|
|
586
|
+
module_logger.info(
|
|
587
|
+
"Creating new embedded relationships using _refs and _ref"
|
|
588
|
+
)
|
|
589
|
+
self.map_embedded_relationships(
|
|
590
|
+
all_objects, inserted_object_ids + inserted_relationship_ids
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
with self.arango.transactional(
|
|
594
|
+
exclusive=[self.core_collection_edge, self.core_collection_vertex]
|
|
595
|
+
):
|
|
596
|
+
self.arango.deprecate_relationships(
|
|
597
|
+
deprecated_key_ids1, self.core_collection_edge
|
|
598
|
+
)
|
|
599
|
+
self.arango.deprecate_relationships(
|
|
600
|
+
deprecated_key_ids2, self.core_collection_edge
|
|
601
|
+
)
|