stix2arango 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of stix2arango might be problematic. Click here for more details.
- stix2arango/stix2arango/bundle_loader.py +38 -21
- stix2arango/utils.py +3 -0
- {stix2arango-1.1.4.dist-info → stix2arango-1.1.6.dist-info}/METADATA +1 -1
- {stix2arango-1.1.4.dist-info → stix2arango-1.1.6.dist-info}/RECORD +7 -7
- {stix2arango-1.1.4.dist-info → stix2arango-1.1.6.dist-info}/WHEEL +0 -0
- {stix2arango-1.1.4.dist-info → stix2arango-1.1.6.dist-info}/entry_points.txt +0 -0
- {stix2arango-1.1.4.dist-info → stix2arango-1.1.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,6 +10,9 @@ import ijson
|
|
|
10
10
|
import json
|
|
11
11
|
from collections import Counter
|
|
12
12
|
|
|
13
|
+
from stix2arango.utils import get_embedded_refs
|
|
14
|
+
|
|
15
|
+
|
|
13
16
|
class BundleLoader:
|
|
14
17
|
def __init__(self, file_path, chunk_size_min=20_000, db_path=""):
|
|
15
18
|
self.file_path = Path(file_path)
|
|
@@ -19,34 +22,37 @@ class BundleLoader:
|
|
|
19
22
|
|
|
20
23
|
self.db_path = db_path
|
|
21
24
|
if not self.db_path:
|
|
22
|
-
self.temp_path = tempfile.NamedTemporaryFile(
|
|
25
|
+
self.temp_path = tempfile.NamedTemporaryFile(
|
|
26
|
+
prefix="s2a_bundle_loader--", suffix=".sqlite"
|
|
27
|
+
)
|
|
23
28
|
self.db_path = self.temp_path.name
|
|
24
29
|
self._init_db()
|
|
25
30
|
|
|
26
31
|
def _init_db(self):
|
|
27
32
|
"""Initialize SQLite DB with objects table."""
|
|
28
33
|
self.conn = sqlite3.connect(self.db_path)
|
|
29
|
-
self.conn.execute(
|
|
34
|
+
self.conn.execute(
|
|
35
|
+
"""
|
|
30
36
|
CREATE TABLE IF NOT EXISTS objects (
|
|
31
37
|
id TEXT PRIMARY KEY,
|
|
32
38
|
type TEXT,
|
|
33
39
|
raw TEXT
|
|
34
40
|
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
self.conn.execute(
|
|
38
|
-
self.conn.execute(
|
|
41
|
+
"""
|
|
42
|
+
)
|
|
43
|
+
self.conn.execute("PRAGMA synchronous = OFF;")
|
|
44
|
+
self.conn.execute("PRAGMA journal_mode = MEMORY;")
|
|
45
|
+
self.conn.execute("PRAGMA temp_store = MEMORY;")
|
|
39
46
|
self.conn.commit()
|
|
40
47
|
|
|
41
|
-
|
|
42
48
|
def save_to_sqlite(self, objects):
|
|
43
49
|
"""Save one STIX object to the SQLite database."""
|
|
44
|
-
self.inserted = getattr(self,
|
|
50
|
+
self.inserted = getattr(self, "inserted", 0)
|
|
45
51
|
|
|
46
52
|
try:
|
|
47
53
|
self.conn.executemany(
|
|
48
54
|
"INSERT OR REPLACE INTO objects (id, type, raw) VALUES (?, ?, ?)",
|
|
49
|
-
[(obj[
|
|
55
|
+
[(obj["id"], obj["type"], json.dumps(obj)) for obj in objects],
|
|
50
56
|
)
|
|
51
57
|
except sqlite3.IntegrityError as e:
|
|
52
58
|
print(f"Failed to insert len({objects}) objects: {e}")
|
|
@@ -55,6 +61,13 @@ class BundleLoader:
|
|
|
55
61
|
self.inserted += len(objects)
|
|
56
62
|
# logging.info(f"inserted {self.inserted}")
|
|
57
63
|
|
|
64
|
+
@staticmethod
|
|
65
|
+
def get_refs(obj):
|
|
66
|
+
refs = []
|
|
67
|
+
for _type, targets in get_embedded_refs(obj):
|
|
68
|
+
refs.extend(targets)
|
|
69
|
+
return refs
|
|
70
|
+
|
|
58
71
|
def build_groups(self):
|
|
59
72
|
"""
|
|
60
73
|
Iterates the STIX bundle and uses union-find to group IDs such that for every
|
|
@@ -63,30 +76,35 @@ class BundleLoader:
|
|
|
63
76
|
"""
|
|
64
77
|
all_ids: dict[str, list[str]] = dict() # All object IDs in the file
|
|
65
78
|
logging.info(f"loading into {self.db_path}")
|
|
66
|
-
|
|
67
|
-
with open(self.file_path,
|
|
68
|
-
objects = ijson.items(f,
|
|
79
|
+
|
|
80
|
+
with open(self.file_path, "rb") as f:
|
|
81
|
+
objects = ijson.items(f, "objects.item", use_float=True)
|
|
69
82
|
to_insert = []
|
|
70
83
|
for obj in objects:
|
|
71
|
-
obj_id = obj.get(
|
|
84
|
+
obj_id = obj.get("id")
|
|
72
85
|
to_insert.append(obj)
|
|
73
86
|
all_ids.setdefault(obj_id, [])
|
|
74
|
-
if obj[
|
|
75
|
-
|
|
87
|
+
if obj["type"] == "relationship" and all(
|
|
88
|
+
x in obj for x in ["target_ref", "source_ref"]
|
|
89
|
+
):
|
|
90
|
+
sr, tr = [obj["source_ref"], obj["target_ref"]]
|
|
76
91
|
all_ids[obj_id].extend([sr, tr])
|
|
77
92
|
all_ids.setdefault(sr, []).extend([tr, obj_id])
|
|
78
93
|
all_ids.setdefault(tr, []).extend([sr, obj_id])
|
|
94
|
+
for ref in self.get_refs(obj):
|
|
95
|
+
all_ids[obj_id].append(ref)
|
|
79
96
|
if len(to_insert) >= self.chunk_size_min:
|
|
80
97
|
self.save_to_sqlite(to_insert)
|
|
81
98
|
to_insert.clear()
|
|
82
99
|
if to_insert:
|
|
83
100
|
self.save_to_sqlite(to_insert)
|
|
84
|
-
|
|
101
|
+
|
|
85
102
|
logging.info(f"loaded {self.inserted} into {self.db_path}")
|
|
86
103
|
handled = set()
|
|
87
104
|
|
|
88
105
|
self.groups = []
|
|
89
106
|
group = set()
|
|
107
|
+
|
|
90
108
|
def from_ids(all_ids):
|
|
91
109
|
for obj_id in all_ids:
|
|
92
110
|
if obj_id in handled:
|
|
@@ -104,18 +122,17 @@ class BundleLoader:
|
|
|
104
122
|
if group:
|
|
105
123
|
self.groups.append(tuple(group))
|
|
106
124
|
return self.groups
|
|
107
|
-
|
|
125
|
+
|
|
108
126
|
def load_objects_by_ids(self, ids):
|
|
109
127
|
"""Retrieve a list of STIX objects by their IDs from the SQLite database."""
|
|
110
|
-
placeholders =
|
|
128
|
+
placeholders = ",".join(["?"] * len(ids))
|
|
111
129
|
query = f"SELECT raw FROM objects WHERE id IN ({placeholders})"
|
|
112
130
|
cursor = self.conn.execute(query, list(ids))
|
|
113
131
|
return [json.loads(row[0]) for row in cursor.fetchall()]
|
|
114
132
|
|
|
115
|
-
|
|
116
133
|
def get_objects(self, group):
|
|
117
134
|
return list(self.load_objects_by_ids(group))
|
|
118
|
-
|
|
135
|
+
|
|
119
136
|
@property
|
|
120
137
|
def chunks(self):
|
|
121
138
|
for group in self.groups or self.build_groups():
|
|
@@ -123,4 +140,4 @@ class BundleLoader:
|
|
|
123
140
|
|
|
124
141
|
def __del__(self):
|
|
125
142
|
with contextlib.suppress(Exception):
|
|
126
|
-
os.remove(self.db_path)
|
|
143
|
+
os.remove(self.db_path)
|
stix2arango/utils.py
CHANGED
|
@@ -125,6 +125,9 @@ def get_embedded_refs(object: list | dict, xpath: list = [], attributes=None):
|
|
|
125
125
|
if match := EMBEDDED_RELATIONSHIP_RE.fullmatch(key):
|
|
126
126
|
relationship_type = "-".join(xpath + match.group(1).split("_"))
|
|
127
127
|
targets = value if isinstance(value, list) else [value]
|
|
128
|
+
targets = [_target for _target in targets if _target and isinstance(_target, str)]
|
|
129
|
+
if not targets:
|
|
130
|
+
continue
|
|
128
131
|
if attributes and key not in attributes:
|
|
129
132
|
continue
|
|
130
133
|
embedded_refs.append((relationship_type, targets))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: stix2arango
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.6
|
|
4
4
|
Summary: stix2arango is a command line tool that takes a group of STIX 2.1 objects in a bundle and inserts them into ArangoDB. It can also handle updates to existing objects in ArangoDB imported in a bundle.
|
|
5
5
|
Project-URL: Homepage, https://github.com/muchdogesec/stix2arango
|
|
6
6
|
Project-URL: Issues, https://github.com/muchdogesec/stix2arango/issues
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
stix2arango/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
stix2arango/__main__.py,sha256=zsCi_bfDULLDkqlRwXyGhFuLvSRcvESEc4MMN7h1lbQ,2835
|
|
3
3
|
stix2arango/config.py,sha256=NZFrcnEfz-0QBrut2Rh7xMF78v0bk6U6y2TY_7mHxSs,1407
|
|
4
|
-
stix2arango/utils.py,sha256=
|
|
4
|
+
stix2arango/utils.py,sha256=C-VDwsCABFU2hrv1EwF7oaQUYOE2MWG40_7WGzHu0A4,4796
|
|
5
5
|
stix2arango/services/__init__.py,sha256=E87fB-dxI4mPxMVs00jdLhjp9jFhkVfjhMKIqGLRJlY,45
|
|
6
6
|
stix2arango/services/arangodb_service.py,sha256=jr6zXFueluCU60WOJy7XuA9Ty0zW5FzGNBJGtJzq0PY,11964
|
|
7
7
|
stix2arango/services/version_annotator.py,sha256=Sd1MIaXzK0fpNopNxRoB_3etodzAjX5D_p3uGQSWzOI,2946
|
|
8
8
|
stix2arango/stix2arango/__init__.py,sha256=OqxWEEsHqR1QQpznM5DbFJ5bO5numKYtoYhjXYJMEyg,36
|
|
9
|
-
stix2arango/stix2arango/bundle_loader.py,sha256=
|
|
9
|
+
stix2arango/stix2arango/bundle_loader.py,sha256=YphKnJIiHjg_nuQUf59OUthDOVIQYoOIoOqqQUlU1II,4982
|
|
10
10
|
stix2arango/stix2arango/stix2arango.py,sha256=HJXDqA9NWxXVQSHPmbpkEKurpWEbZmy5bng5SQ1OsjE,22412
|
|
11
11
|
stix2arango/templates/marking-definition.json,sha256=0q9y35mUmiF6xIWSLpkATL4JTHGSCNyLbejqZiQ0AuE,3113
|
|
12
|
-
stix2arango-1.1.
|
|
13
|
-
stix2arango-1.1.
|
|
14
|
-
stix2arango-1.1.
|
|
15
|
-
stix2arango-1.1.
|
|
16
|
-
stix2arango-1.1.
|
|
12
|
+
stix2arango-1.1.6.dist-info/METADATA,sha256=ga8FQKsBxCFYGOyV6YAV9gZOaMv8-0nkLiyTpTsxWKA,7797
|
|
13
|
+
stix2arango-1.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
stix2arango-1.1.6.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
|
|
15
|
+
stix2arango-1.1.6.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
|
|
16
|
+
stix2arango-1.1.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|