stix2arango 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stix2arango might be problematic. Click here for more details.

stix2arango/__main__.py CHANGED
@@ -1,27 +1,81 @@
1
1
  import argparse
2
2
  from stix2arango.stix2arango import Stix2Arango
3
3
 
4
+
4
5
  def parse_bool(value: str):
5
6
  value = value.lower()
6
7
  # ["false", "no", "n"]
7
8
  return value in ["yes", "y", "true", "1"]
8
9
 
10
+ def parse_ref(value: str):
11
+ if not (value.endswith('_ref') or value.endswith('_refs')):
12
+ raise argparse.ArgumentTypeError('value must end with _ref or _refs')
13
+ return value
14
+
15
+
9
16
  def parse_arguments():
10
17
  parser = argparse.ArgumentParser(description="Import STIX JSON into ArangoDB")
11
18
  parser.add_argument("--file", required=True, help="Path to STIX JSON file")
12
- parser.add_argument("--is_large_file", action="store_true", help="Use large file mode [Use this mode when the bundle is very large, this will enable you stix2arango to chunk before loading into memory]")
19
+ parser.add_argument(
20
+ "--is_large_file",
21
+ action="store_true",
22
+ help="Use large file mode [Use this mode when the bundle is very large, this will enable you stix2arango to chunk before loading into memory]",
23
+ )
13
24
  parser.add_argument("--database", required=True, help="ArangoDB database name")
14
- parser.add_argument("--create_db", default=True, type=parse_bool, help="whether or not to skip the creation of database, requires admin permission")
25
+ parser.add_argument(
26
+ "--create_db",
27
+ default=True,
28
+ type=parse_bool,
29
+ help="whether or not to skip the creation of database, requires admin permission",
30
+ )
15
31
  parser.add_argument("--collection", required=True, help="ArangoDB collection name")
16
- parser.add_argument("--stix2arango_note", required=False, help="Note for the import", default="")
17
- parser.add_argument("--ignore_embedded_relationships", required=False, help="Ignore Embedded Relationship for the import", type=parse_bool, default=False)
18
- parser.add_argument("--ignore_embedded_relationships_sro", required=False, help="Ignore Embedded Relationship for imported SROs", type=parse_bool, default=False)
19
- parser.add_argument("--ignore_embedded_relationships_smo", required=False, help="Ignore Embedded Relationship for imported SMOs", type=parse_bool, default=False)
20
-
32
+ parser.add_argument(
33
+ "--stix2arango_note", required=False, help="Note for the import", default=""
34
+ )
35
+ parser.add_argument(
36
+ "--ignore_embedded_relationships",
37
+ required=False,
38
+ help="Ignore Embedded Relationship for the import",
39
+ type=parse_bool,
40
+ default=False,
41
+ )
42
+ parser.add_argument(
43
+ "--ignore_embedded_relationships_sro",
44
+ required=False,
45
+ help="Ignore Embedded Relationship for imported SROs",
46
+ type=parse_bool,
47
+ default=False,
48
+ )
49
+ parser.add_argument(
50
+ "--ignore_embedded_relationships_smo",
51
+ required=False,
52
+ help="Ignore Embedded Relationship for imported SMOs",
53
+ type=parse_bool,
54
+ default=False,
55
+ )
56
+ parser.add_argument(
57
+ "--include_embedded_relationships_attributes",
58
+ required=False,
59
+ help="Only create embedded relationships for keys",
60
+ action="extend",
61
+ nargs="+",
62
+ type=parse_ref
63
+ )
21
64
  return parser.parse_args()
22
65
 
23
66
 
24
67
  def main():
25
68
  args = parse_arguments()
26
- stix_obj = Stix2Arango(args.database, args.collection, file=args.file, create_db=args.create_db, stix2arango_note=args.stix2arango_note, ignore_embedded_relationships=args.ignore_embedded_relationships, ignore_embedded_relationships_sro=args.ignore_embedded_relationships_sro, ignore_embedded_relationships_smo=args.ignore_embedded_relationships_smo, is_large_file=args.is_large_file)
27
- stix_obj.run()
69
+ stix_obj = Stix2Arango(
70
+ database=args.database,
71
+ collection=args.collection,
72
+ file=args.file,
73
+ create_db=args.create_db,
74
+ stix2arango_note=args.stix2arango_note,
75
+ ignore_embedded_relationships=args.ignore_embedded_relationships,
76
+ ignore_embedded_relationships_sro=args.ignore_embedded_relationships_sro,
77
+ ignore_embedded_relationships_smo=args.ignore_embedded_relationships_smo,
78
+ is_large_file=args.is_large_file,
79
+ include_embedded_relationships_attributes=args.include_embedded_relationships_attributes,
80
+ )
81
+ stix_obj.run()
@@ -10,6 +10,9 @@ import ijson
10
10
  import json
11
11
  from collections import Counter
12
12
 
13
+ from stix2arango.utils import get_embedded_refs
14
+
15
+
13
16
  class BundleLoader:
14
17
  def __init__(self, file_path, chunk_size_min=20_000, db_path=""):
15
18
  self.file_path = Path(file_path)
@@ -19,34 +22,37 @@ class BundleLoader:
19
22
 
20
23
  self.db_path = db_path
21
24
  if not self.db_path:
22
- self.temp_path = tempfile.NamedTemporaryFile(prefix='s2a_bundle_loader--', suffix='.sqlite')
25
+ self.temp_path = tempfile.NamedTemporaryFile(
26
+ prefix="s2a_bundle_loader--", suffix=".sqlite"
27
+ )
23
28
  self.db_path = self.temp_path.name
24
29
  self._init_db()
25
30
 
26
31
  def _init_db(self):
27
32
  """Initialize SQLite DB with objects table."""
28
33
  self.conn = sqlite3.connect(self.db_path)
29
- self.conn.execute('''
34
+ self.conn.execute(
35
+ """
30
36
  CREATE TABLE IF NOT EXISTS objects (
31
37
  id TEXT PRIMARY KEY,
32
38
  type TEXT,
33
39
  raw TEXT
34
40
  )
35
- ''')
36
- self.conn.execute('PRAGMA synchronous = OFF;')
37
- self.conn.execute('PRAGMA journal_mode = MEMORY;')
38
- self.conn.execute('PRAGMA temp_store = MEMORY;')
41
+ """
42
+ )
43
+ self.conn.execute("PRAGMA synchronous = OFF;")
44
+ self.conn.execute("PRAGMA journal_mode = MEMORY;")
45
+ self.conn.execute("PRAGMA temp_store = MEMORY;")
39
46
  self.conn.commit()
40
47
 
41
-
42
48
  def save_to_sqlite(self, objects):
43
49
  """Save one STIX object to the SQLite database."""
44
- self.inserted = getattr(self, 'inserted', 0)
50
+ self.inserted = getattr(self, "inserted", 0)
45
51
 
46
52
  try:
47
53
  self.conn.executemany(
48
54
  "INSERT OR REPLACE INTO objects (id, type, raw) VALUES (?, ?, ?)",
49
- [(obj['id'], obj['type'], json.dumps(obj)) for obj in objects]
55
+ [(obj["id"], obj["type"], json.dumps(obj)) for obj in objects],
50
56
  )
51
57
  except sqlite3.IntegrityError as e:
52
58
  print(f"Failed to insert len({objects}) objects: {e}")
@@ -55,6 +61,15 @@ class BundleLoader:
55
61
  self.inserted += len(objects)
56
62
  # logging.info(f"inserted {self.inserted}")
57
63
 
64
+ @staticmethod
65
+ def get_refs(obj):
66
+ refs = []
67
+ for _type, targets in get_embedded_refs(obj):
68
+ if _type in ["created-by", "object-marking"]:
69
+ continue
70
+ refs.extend(targets)
71
+ return refs
72
+
58
73
  def build_groups(self):
59
74
  """
60
75
  Iterates the STIX bundle and uses union-find to group IDs such that for every
@@ -63,30 +78,36 @@ class BundleLoader:
63
78
  """
64
79
  all_ids: dict[str, list[str]] = dict() # All object IDs in the file
65
80
  logging.info(f"loading into {self.db_path}")
66
-
67
- with open(self.file_path, 'rb') as f:
68
- objects = ijson.items(f, 'objects.item', use_float=True)
81
+
82
+ with open(self.file_path, "rb") as f:
83
+ objects = ijson.items(f, "objects.item", use_float=True)
69
84
  to_insert = []
70
85
  for obj in objects:
71
- obj_id = obj.get('id')
86
+ obj_id = obj.get("id")
72
87
  to_insert.append(obj)
73
88
  all_ids.setdefault(obj_id, [])
74
- if obj['type'] == 'relationship' and all(x in obj for x in ['target_ref', 'source_ref']):
75
- sr, tr = [obj['source_ref'], obj['target_ref']]
89
+ if obj["type"] == "relationship" and all(
90
+ x in obj for x in ["target_ref", "source_ref"]
91
+ ):
92
+ sr, tr = [obj["source_ref"], obj["target_ref"]]
76
93
  all_ids[obj_id].extend([sr, tr])
77
94
  all_ids.setdefault(sr, []).extend([tr, obj_id])
78
95
  all_ids.setdefault(tr, []).extend([sr, obj_id])
96
+ for ref in self.get_refs(obj):
97
+ all_ids[obj_id].append(ref)
98
+ all_ids.setdefault(ref, []).append(obj_id)
79
99
  if len(to_insert) >= self.chunk_size_min:
80
100
  self.save_to_sqlite(to_insert)
81
101
  to_insert.clear()
82
102
  if to_insert:
83
103
  self.save_to_sqlite(to_insert)
84
-
104
+
85
105
  logging.info(f"loaded {self.inserted} into {self.db_path}")
86
106
  handled = set()
87
107
 
88
108
  self.groups = []
89
109
  group = set()
110
+
90
111
  def from_ids(all_ids):
91
112
  for obj_id in all_ids:
92
113
  if obj_id in handled:
@@ -104,18 +125,17 @@ class BundleLoader:
104
125
  if group:
105
126
  self.groups.append(tuple(group))
106
127
  return self.groups
107
-
128
+
108
129
  def load_objects_by_ids(self, ids):
109
130
  """Retrieve a list of STIX objects by their IDs from the SQLite database."""
110
- placeholders = ','.join(['?'] * len(ids))
131
+ placeholders = ",".join(["?"] * len(ids))
111
132
  query = f"SELECT raw FROM objects WHERE id IN ({placeholders})"
112
133
  cursor = self.conn.execute(query, list(ids))
113
134
  return [json.loads(row[0]) for row in cursor.fetchall()]
114
135
 
115
-
116
136
  def get_objects(self, group):
117
137
  return list(self.load_objects_by_ids(group))
118
-
138
+
119
139
  @property
120
140
  def chunks(self):
121
141
  for group in self.groups or self.build_groups():
@@ -123,4 +143,4 @@ class BundleLoader:
123
143
 
124
144
  def __del__(self):
125
145
  with contextlib.suppress(Exception):
126
- os.remove(self.db_path)
146
+ os.remove(self.db_path)
@@ -42,6 +42,7 @@ class Stix2Arango:
42
42
  ignore_embedded_relationships=False,
43
43
  ignore_embedded_relationships_sro=True,
44
44
  ignore_embedded_relationships_smo=True,
45
+ include_embedded_relationships_attributes=None,
45
46
  bundle_id=None,
46
47
  username=config.ARANGODB_USERNAME,
47
48
  password=config.ARANGODB_PASSWORD,
@@ -89,6 +90,7 @@ class Stix2Arango:
89
90
  self.ignore_embedded_relationships = ignore_embedded_relationships
90
91
  self.ignore_embedded_relationships_smo = ignore_embedded_relationships_smo
91
92
  self.ignore_embedded_relationships_sro = ignore_embedded_relationships_sro
93
+ self.include_embedded_relationships_attributes = include_embedded_relationships_attributes
92
94
  self.object_key_mapping = {}
93
95
  if create_collection:
94
96
  self.create_s2a_indexes()
@@ -472,14 +474,16 @@ class Stix2Arango:
472
474
  for obj in tqdm(bundle_objects, desc="upload_embedded_edges"):
473
475
  if obj["id"] not in inserted_object_ids:
474
476
  continue
475
- if (
477
+ if self.include_embedded_relationships_attributes:
478
+ pass
479
+ elif (
476
480
  self.ignore_embedded_relationships_smo and obj["type"] in SMO_TYPES
477
481
  ) or (
478
482
  self.ignore_embedded_relationships_sro and obj["type"] == "relationship"
479
483
  ):
480
484
  continue
481
485
 
482
- for ref_type, targets in utils.get_embedded_refs(obj):
486
+ for ref_type, targets in utils.get_embedded_refs(obj, attributes=self.include_embedded_relationships_attributes):
483
487
  utils.create_relationship_obj(
484
488
  obj=obj,
485
489
  source=obj.get("id"),
@@ -578,7 +582,7 @@ class Stix2Arango:
578
582
  self.filename, all_objects
579
583
  )
580
584
 
581
- if not self.ignore_embedded_relationships:
585
+ if (not self.ignore_embedded_relationships) or self.include_embedded_relationships_attributes:
582
586
  module_logger.info(
583
587
  "Creating new embedded relationships using _refs and _ref"
584
588
  )
stix2arango/utils.py CHANGED
@@ -116,7 +116,7 @@ def remove_duplicates(objects):
116
116
  return list(objects_hashmap.values())
117
117
 
118
118
 
119
- def get_embedded_refs(object: list | dict, xpath: list = []):
119
+ def get_embedded_refs(object: list | dict, xpath: list = [], attributes=None):
120
120
  embedded_refs = []
121
121
  if isinstance(object, dict):
122
122
  for key, value in object.items():
@@ -125,11 +125,16 @@ def get_embedded_refs(object: list | dict, xpath: list = []):
125
125
  if match := EMBEDDED_RELATIONSHIP_RE.fullmatch(key):
126
126
  relationship_type = "-".join(xpath + match.group(1).split("_"))
127
127
  targets = value if isinstance(value, list) else [value]
128
+ targets = [_target for _target in targets if _target and isinstance(_target, str)]
129
+ if not targets:
130
+ continue
131
+ if attributes and key not in attributes:
132
+ continue
128
133
  embedded_refs.append((relationship_type, targets))
129
134
  elif isinstance(value, list):
130
- embedded_refs.extend(get_embedded_refs(value, xpath + [key]))
135
+ embedded_refs.extend(get_embedded_refs(value, xpath + [key], attributes=attributes))
131
136
  elif isinstance(object, list):
132
137
  for obj in object:
133
138
  if isinstance(obj, dict):
134
- embedded_refs.extend(get_embedded_refs(obj, xpath))
139
+ embedded_refs.extend(get_embedded_refs(obj, xpath, attributes=attributes))
135
140
  return embedded_refs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: stix2arango
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: stix2arango is a command line tool that takes a group of STIX 2.1 objects in a bundle and inserts them into ArangoDB. It can also handle updates to existing objects in ArangoDB imported in a bundle.
5
5
  Project-URL: Homepage, https://github.com/muchdogesec/stix2arango
6
6
  Project-URL: Issues, https://github.com/muchdogesec/stix2arango/issues
@@ -60,20 +60,6 @@ Note, the installation assumes ArangoDB is already installed locally.
60
60
 
61
61
  [You can install ArangoDB here](https://arangodb.com/download/). stix2arango is compatible with both the Enterprise and Community versions.
62
62
 
63
- #### A note for Mac users
64
-
65
- Fellow Mac users, ArangoDB can be installed and run using homebrew as follows;
66
-
67
- ```shell
68
- ## Install
69
- brew install arangodb
70
- ## Run
71
- brew services start arangodb
72
- ## will now be accessible in a browser at: http://127.0.0.1:8529 . Default username is root with no password set (leave blank)
73
- ## Stop
74
- brew services stop arangodb
75
- ```
76
-
77
63
  ### Configuration options
78
64
 
79
65
  stix2arango has various settings that are defined in an `.env` file.
@@ -100,12 +86,14 @@ python3 stix2arango.py \
100
86
  Where;
101
87
 
102
88
  * `--file` (required): is the path to the valid STIX 2.1 bundle .json file
103
- * `--database` (required): is the name of the Arango database the objects should be stored in. If database does not exist, stix2arango will create it
89
+ * `--database` (required): is the name of the Arango database the objects should be stored in.
90
+ * `--create_db` (default `true`): If database does not exist, stix2arango will create it. You can set to `false` to stop this behaviour (and avoid the risk of incorrect DBs being created). Generally setting to `false` is a good idea if you know the databases exist. This setting will only work if the Arango user being used to authenticate has permissions to create new databases.
104
91
  * `--collection` (required): is the name of the Arango collection in the database specified the objects should be stored in. If the collection does not exist, stix2arango will create it
105
92
  * `--stix2arango_note` (optional): Will be stored under the `_stix2arango_note` custom attribute in ArangoDB. Useful as can be used in AQL. `a-z` characters only. Max 24 chars.
106
93
  * `--ignore_embedded_relationships` (optional, boolean): if `true` passed, this will stop ANY embedded relationships from being generated. This applies for all object types (SDO, SCO, SRO, SMO). If you want to target certain object types see `ignore_embedded_relationships_sro` and `ignore_embedded_relationships_sro` flags. ` Default is `false`
107
94
  * `--ignore_embedded_relationships_sro` (optional, boolean): if `true` passed, will stop any embedded relationships from being generated from SRO objects (`type` = `relationship`). Default is `false`
108
- * `--ignore_embedded_relationships_smo` (optional, boolean): if `true` passed, will stop any embedded relationships from being generated from SMO objects (`type` = `marking-definition`, `extension-definition`, `language-content`). Default is `false`
95
+ * `--ignore_embedded_relationships_smo` (optional, boolean): if `true` passed, will stop any embedded relationships from being generated from SMO objects (`type` = `marking-defirnition`, `extension-definition`, `language-content`). Default is `false`
96
+ * `--include_embedded_relationships_attributes` (optional, stix `_ref` or `_refs` attribute): if you only want to create embedded relationships from certain keys (attributes) in a STIX object you can pass a list of attributes here. e.g. `object_refs created_by_ref` . In this example, embedded relationships to all objects listed in `object_refs` and objects in `created_by_ref` will be created between source (the objects that house these attibutes) and destinations (the objects listed as values for these attributes)
109
97
  * `--is_large_file` (pass flag): Use this mode when the bundle is very large (>100mb), this will chunk the input into multiple files before loading into memory.
110
98
 
111
99
  For example, [using the MITRE ATT&CK Enterprise bundle](https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json);
@@ -132,6 +120,18 @@ python3 stix2arango.py \
132
120
  --is_large_file
133
121
  ```
134
122
 
123
+ If you want to include embedded relationships for `created_by_ref` and `object_marking_refs` attibutes collection, you would run;
124
+
125
+ ```shell
126
+ python3 stix2arango.py \
127
+ --file cti_knowledge_base_store/mitre-attack-enterprise/enterprise-attack-15_1.json \
128
+ --database stix2arango_demo \
129
+ --collection demo_2 \
130
+ --stix2arango_note v15.1 \
131
+ --include_embedded_relationships_attributes object_refs created_by_ref \
132
+ --is_large_file
133
+ ```
134
+
135
135
  #### A note on embedded relationships
136
136
 
137
137
  stix2arango can handle all embedded references to other STIX objects under `_ref` and `_refs` properties in a STIX object when `--ignore_embedded_relationships` is set to false.
@@ -1,16 +1,16 @@
1
1
  stix2arango/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- stix2arango/__main__.py,sha256=wbR_iO70Vld2NYiml6Kz4rH396uOiNwTtjNBl4AHZEg,1987
2
+ stix2arango/__main__.py,sha256=zsCi_bfDULLDkqlRwXyGhFuLvSRcvESEc4MMN7h1lbQ,2835
3
3
  stix2arango/config.py,sha256=NZFrcnEfz-0QBrut2Rh7xMF78v0bk6U6y2TY_7mHxSs,1407
4
- stix2arango/utils.py,sha256=eVAMvXZVylM2RXzi2ph0RVW__eoSjAHWSWSG3900yjk,4487
4
+ stix2arango/utils.py,sha256=C-VDwsCABFU2hrv1EwF7oaQUYOE2MWG40_7WGzHu0A4,4796
5
5
  stix2arango/services/__init__.py,sha256=E87fB-dxI4mPxMVs00jdLhjp9jFhkVfjhMKIqGLRJlY,45
6
6
  stix2arango/services/arangodb_service.py,sha256=jr6zXFueluCU60WOJy7XuA9Ty0zW5FzGNBJGtJzq0PY,11964
7
7
  stix2arango/services/version_annotator.py,sha256=Sd1MIaXzK0fpNopNxRoB_3etodzAjX5D_p3uGQSWzOI,2946
8
8
  stix2arango/stix2arango/__init__.py,sha256=OqxWEEsHqR1QQpznM5DbFJ5bO5numKYtoYhjXYJMEyg,36
9
- stix2arango/stix2arango/bundle_loader.py,sha256=qi-0E_bMIMPZXzISvjhrWX8K-f7iFv9vOekldOGVczU,4603
10
- stix2arango/stix2arango/stix2arango.py,sha256=sC-br0nptUtZMzNza6v3s6rjdgJk-EG0_KErN9JN9qQ,22060
9
+ stix2arango/stix2arango/bundle_loader.py,sha256=lqhW4RwELRdRax7erSuYv7h02Nata7SXNWRxaA97r_w,5128
10
+ stix2arango/stix2arango/stix2arango.py,sha256=HJXDqA9NWxXVQSHPmbpkEKurpWEbZmy5bng5SQ1OsjE,22412
11
11
  stix2arango/templates/marking-definition.json,sha256=0q9y35mUmiF6xIWSLpkATL4JTHGSCNyLbejqZiQ0AuE,3113
12
- stix2arango-1.1.3.dist-info/METADATA,sha256=z9lCPIr6WmDFUpxzg4CQhEt_hSlBpGFjqdignPw0mSw,6873
13
- stix2arango-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- stix2arango-1.1.3.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
- stix2arango-1.1.3.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
- stix2arango-1.1.3.dist-info/RECORD,,
12
+ stix2arango-1.1.5.dist-info/METADATA,sha256=Jxs4Bp4z67-Bzy3AF3fYZlX4v6f_ToemEy7SKLN4oUY,7797
13
+ stix2arango-1.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ stix2arango-1.1.5.dist-info/entry_points.txt,sha256=k2WnxMsHFLoyC6rqfvjhIMS1zwtWin51-MbNCGmSMYE,58
15
+ stix2arango-1.1.5.dist-info/licenses/LICENSE,sha256=BK8Ppqlc4pdgnNzIxnxde0taoQ1BgicdyqmBvMiNYgY,11364
16
+ stix2arango-1.1.5.dist-info/RECORD,,