splunk-soar-sdk 3.4.0__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. soar_sdk/abstract.py +7 -6
  2. soar_sdk/action_results.py +7 -7
  3. soar_sdk/actions_manager.py +6 -6
  4. soar_sdk/apis/artifact.py +3 -3
  5. soar_sdk/apis/container.py +2 -2
  6. soar_sdk/apis/utils.py +3 -2
  7. soar_sdk/apis/vault.py +1 -0
  8. soar_sdk/app.py +24 -27
  9. soar_sdk/app_cli_runner.py +7 -6
  10. soar_sdk/app_client.py +3 -4
  11. soar_sdk/asset.py +7 -9
  12. soar_sdk/asset_state.py +1 -2
  13. soar_sdk/async_utils.py +1 -2
  14. soar_sdk/cli/cli.py +2 -2
  15. soar_sdk/cli/init/cli.py +5 -5
  16. soar_sdk/cli/manifests/deserializers.py +4 -3
  17. soar_sdk/cli/manifests/processors.py +2 -2
  18. soar_sdk/cli/manifests/serializers.py +4 -4
  19. soar_sdk/cli/package/cli.py +14 -14
  20. soar_sdk/cli/package/utils.py +3 -2
  21. soar_sdk/cli/path_utils.py +1 -1
  22. soar_sdk/code_renderers/action_renderer.py +5 -4
  23. soar_sdk/code_renderers/app_renderer.py +1 -1
  24. soar_sdk/code_renderers/asset_renderer.py +1 -1
  25. soar_sdk/code_renderers/renderer.py +2 -2
  26. soar_sdk/compat.py +2 -1
  27. soar_sdk/decorators/__init__.py +3 -3
  28. soar_sdk/decorators/action.py +7 -11
  29. soar_sdk/decorators/make_request.py +9 -11
  30. soar_sdk/decorators/on_es_poll.py +7 -10
  31. soar_sdk/decorators/on_poll.py +7 -11
  32. soar_sdk/decorators/test_connectivity.py +5 -6
  33. soar_sdk/decorators/view_handler.py +6 -7
  34. soar_sdk/decorators/webhook.py +3 -5
  35. soar_sdk/extras/__init__.py +0 -0
  36. soar_sdk/extras/email/__init__.py +9 -0
  37. soar_sdk/extras/email/processor.py +1171 -0
  38. soar_sdk/extras/email/rfc5322.py +335 -0
  39. soar_sdk/extras/email/utils.py +178 -0
  40. soar_sdk/input_spec.py +4 -3
  41. soar_sdk/logging.py +5 -4
  42. soar_sdk/meta/actions.py +3 -3
  43. soar_sdk/meta/dependencies.py +6 -9
  44. soar_sdk/meta/webhooks.py +2 -1
  45. soar_sdk/models/__init__.py +1 -1
  46. soar_sdk/models/artifact.py +1 -0
  47. soar_sdk/models/attachment_input.py +1 -1
  48. soar_sdk/models/container.py +2 -1
  49. soar_sdk/models/finding.py +2 -1
  50. soar_sdk/models/vault_attachment.py +1 -0
  51. soar_sdk/models/view.py +2 -0
  52. soar_sdk/params.py +4 -5
  53. soar_sdk/shims/phantom/action_result.py +1 -1
  54. soar_sdk/shims/phantom/app.py +1 -1
  55. soar_sdk/shims/phantom/base_connector.py +3 -4
  56. soar_sdk/shims/phantom/connector_result.py +0 -1
  57. soar_sdk/shims/phantom/install_info.py +1 -1
  58. soar_sdk/shims/phantom/ph_ipc.py +2 -1
  59. soar_sdk/shims/phantom/vault.py +8 -6
  60. soar_sdk/shims/phantom_common/app_interface/app_interface.py +1 -0
  61. soar_sdk/types.py +1 -1
  62. soar_sdk/views/component_registry.py +0 -1
  63. soar_sdk/views/template_filters.py +4 -4
  64. soar_sdk/views/template_renderer.py +3 -2
  65. soar_sdk/views/view_parser.py +8 -6
  66. soar_sdk/webhooks/models.py +3 -3
  67. soar_sdk/webhooks/routing.py +3 -4
  68. {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.5.0.dist-info}/METADATA +3 -1
  69. splunk_soar_sdk-3.5.0.dist-info/RECORD +115 -0
  70. splunk_soar_sdk-3.4.0.dist-info/RECORD +0 -110
  71. {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.5.0.dist-info}/WHEEL +0 -0
  72. {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.5.0.dist-info}/entry_points.txt +0 -0
  73. {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1171 @@
1
+ import base64
2
+ import contextlib
3
+ import email
4
+ import hashlib
5
+ import json
6
+ import mimetypes
7
+ import re
8
+ import shutil
9
+ import tempfile
10
+ from copy import deepcopy
11
+ from dataclasses import dataclass
12
+ from email.header import decode_header, make_header
13
+ from email.message import Message
14
+ from html import unescape
15
+ from pathlib import Path
16
+ from typing import Any, TypedDict
17
+
18
+ from bs4 import BeautifulSoup, UnicodeDammit # type: ignore[attr-defined]
19
+ from pydantic import HttpUrl, ValidationError
20
+ from requests.structures import CaseInsensitiveDict
21
+
22
+ from soar_sdk.abstract import SOARClient
23
+ from soar_sdk.extras.email.utils import (
24
+ clean_url,
25
+ create_dict_hash,
26
+ decode_uni_string,
27
+ get_file_contains,
28
+ get_string,
29
+ is_ip,
30
+ is_sha1,
31
+ remove_child_info,
32
+ )
33
+ from soar_sdk.logging import getLogger
34
+ from soar_sdk.shims import phantom
35
+ from soar_sdk.shims.phantom.app import APP_ERROR, APP_SUCCESS
36
+ from soar_sdk.shims.phantom.vault import VaultBase
37
+
38
+ logger = getLogger()
39
+
40
+
41
+ def validate_url(value: str) -> None:
42
+ """Validate a URL using pydantic."""
43
+ try:
44
+ HttpUrl(value)
45
+ except ValidationError as e:
46
+ raise ValueError(f"Invalid URL: {e}") from e
47
+
48
+
49
+ _container_common = {"run_automation": False}
50
+ _artifact_common = {"run_automation": False}
51
+
52
+ DEFAULT_ARTIFACT_COUNT = 100
53
+ DEFAULT_CONTAINER_COUNT = 100
54
+ HASH_FIXED_PHANTOM_VERSION = "2.0.201"
55
+
56
+ PROC_EMAIL_JSON_FILES = "files"
57
+ PROC_EMAIL_JSON_BODIES = "bodies"
58
+ PROC_EMAIL_JSON_DATE = "date"
59
+ PROC_EMAIL_JSON_FROM = "from"
60
+ PROC_EMAIL_JSON_SUBJECT = "subject"
61
+ PROC_EMAIL_JSON_TO = "to"
62
+ PROC_EMAIL_JSON_START_TIME = "start_time"
63
+ PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS = "extract_attachments"
64
+ PROC_EMAIL_JSON_EXTRACT_BODY = "add_body_to_header_artifacts"
65
+ PROC_EMAIL_JSON_EXTRACT_URLS = "extract_urls"
66
+ PROC_EMAIL_JSON_EXTRACT_IPS = "extract_ips"
67
+ PROC_EMAIL_JSON_EXTRACT_DOMAINS = "extract_domains"
68
+ PROC_EMAIL_JSON_EXTRACT_HASHES = "extract_hashes"
69
+ PROC_EMAIL_JSON_IPS = "ips"
70
+ PROC_EMAIL_JSON_HASHES = "hashes"
71
+ PROC_EMAIL_JSON_URLS = "urls"
72
+ PROC_EMAIL_JSON_DOMAINS = "domains"
73
+ PROC_EMAIL_JSON_MSG_ID = "message_id"
74
+ PROC_EMAIL_JSON_EMAIL_HEADERS = "email_headers"
75
+ PROC_EMAIL_CONTENT_TYPE_MESSAGE = "message/rfc822"
76
+
77
+ URI_REGEX = r"[Hh][Tt][Tt][Pp][Ss]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
78
+ EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@+[A-Z0-9.-]+\.[A-Z]{2,}\b"
79
+ EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
80
+ HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
81
+ IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
82
+ IPV6_REGEX = r"\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|"
83
+ IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}"
84
+ IPV6_REGEX += (
85
+ r"|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))"
86
+ )
87
+ IPV6_REGEX += r"|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})"
88
+ IPV6_REGEX += r"|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|"
89
+ IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})"
90
+ IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
91
+ IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})"
92
+ IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
93
+ IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})"
94
+ IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
95
+ IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})"
96
+ IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
97
+ IPV6_REGEX += (
98
+ r"(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d"
99
+ )
100
+ IPV6_REGEX += r"|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*"
101
+
102
+
103
+ class EmailBodyDict(TypedDict):
104
+ """Type definition for email body dictionary."""
105
+
106
+ file_path: str
107
+ charset: str | None
108
+
109
+
110
+ @dataclass
111
+ class ProcessEmailContext:
112
+ """Context object for email processing with SDK components."""
113
+
114
+ soar: SOARClient
115
+ vault: VaultBase
116
+ app_id: str
117
+ folder_name: str
118
+ is_hex: bool
119
+ action_name: str | None = None
120
+ app_run_id: int | None = None
121
+
122
+
123
+ class EmailProcessor:
124
+ """Email processor for parsing and extracting artifacts from RFC822 emails."""
125
+
126
+ def __init__(self, context: ProcessEmailContext, config: dict[str, Any]) -> None:
127
+ self.context = context
128
+ self._config = config
129
+ self._email_id_contains: list[str] = []
130
+ self._container: dict[str, Any] = {}
131
+ self._artifacts: list[dict[str, Any]] = []
132
+ self._attachments: list[dict[str, Any]] = []
133
+ self._external_headers: list[CaseInsensitiveDict] = []
134
+ self._external_attachments: list[dict[str, Any]] = []
135
+ self._parsed_mail: dict[str, Any] | None = None
136
+ self._guid_to_hash: dict[str, str] = {}
137
+ self._tmp_dirs: list[str] = []
138
+
139
+ def _extract_urls_domains(
140
+ self, file_data: str, urls: set[str], domains: set[str]
141
+ ) -> None:
142
+ if (not self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]) and (
143
+ not self._config[PROC_EMAIL_JSON_EXTRACT_URLS]
144
+ ):
145
+ return
146
+
147
+ try:
148
+ soup = BeautifulSoup(file_data, "html.parser")
149
+ except Exception as e:
150
+ logger.debug(f"Error occurred while extracting domains of the URLs: {e}")
151
+ return
152
+
153
+ uris = []
154
+ links = soup.find_all(href=True)
155
+ srcs = soup.find_all(src=True)
156
+
157
+ if links:
158
+ for x in links:
159
+ uris.append(clean_url(x.get_text()))
160
+ if not x["href"].startswith("mailto:"):
161
+ uris.append(x["href"])
162
+
163
+ if srcs:
164
+ for x in srcs:
165
+ uris.append(clean_url(x.get_text()))
166
+ uris.append(x["src"])
167
+
168
+ file_data = unescape(file_data)
169
+ regex_uris = re.findall(URI_REGEX, file_data)
170
+ uris.extend(clean_url(x) for x in regex_uris)
171
+
172
+ validated_urls = []
173
+ for url in uris:
174
+ try:
175
+ validate_url(url)
176
+ validated_urls.append(url)
177
+ except Exception as e:
178
+ logger.debug(f"URL validation failed for {url}: {e}")
179
+
180
+ if self._config[PROC_EMAIL_JSON_EXTRACT_URLS]:
181
+ urls |= set(validated_urls)
182
+
183
+ if self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]:
184
+ for uri in validated_urls:
185
+ domain = phantom.get_host_from_url(uri) # type: ignore[attr-defined]
186
+ if domain and (not is_ip(domain)):
187
+ domains.add(domain)
188
+ if links:
189
+ mailtos = [
190
+ x["href"] for x in links if (x["href"].startswith("mailto:"))
191
+ ]
192
+ for curr_email in mailtos:
193
+ domain = curr_email[curr_email.find("@") + 1 :]
194
+ if domain and (not is_ip(domain)):
195
+ if "?" in domain:
196
+ domain = domain[: domain.find("?")]
197
+ domains.add(domain)
198
+
199
+ def _get_ips(self, file_data: str, ips: set[str]) -> None:
200
+ for match in re.finditer(IP_REGEX, file_data):
201
+ ip_candidate = match.group(0).strip()
202
+ if is_ip(ip_candidate):
203
+ ips.add(ip_candidate)
204
+
205
+ for match in re.finditer(IPV6_REGEX, file_data):
206
+ ip_candidate = match.group(0).strip()
207
+ ips.add(ip_candidate)
208
+
209
+ def _handle_body(
210
+ self,
211
+ body: EmailBodyDict,
212
+ parsed_mail: dict[str, Any],
213
+ body_index: int,
214
+ email_id: str,
215
+ ) -> int:
216
+ local_file_path = body["file_path"]
217
+ charset = body.get("charset")
218
+
219
+ ips = parsed_mail[PROC_EMAIL_JSON_IPS]
220
+ hashes = parsed_mail[PROC_EMAIL_JSON_HASHES]
221
+ urls = parsed_mail[PROC_EMAIL_JSON_URLS]
222
+ domains = parsed_mail[PROC_EMAIL_JSON_DOMAINS]
223
+
224
+ file_data_raw: str | bytes | None = None
225
+ try:
226
+ with open(local_file_path) as f:
227
+ file_data_raw = f.read()
228
+ except Exception:
229
+ with open(local_file_path, "rb") as f:
230
+ file_data_raw = f.read()
231
+
232
+ if (file_data_raw is None) or (len(file_data_raw) == 0):
233
+ return APP_ERROR
234
+
235
+ file_data: str = (
236
+ UnicodeDammit(file_data_raw).unicode_markup.encode("utf-8").decode("utf-8")
237
+ )
238
+
239
+ self._parse_email_headers_as_inline(file_data, parsed_mail, charset, email_id)
240
+
241
+ if self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]:
242
+ emails = []
243
+ emails.extend(re.findall(EMAIL_REGEX, file_data, re.IGNORECASE))
244
+ emails.extend(re.findall(EMAIL_REGEX2, file_data, re.IGNORECASE))
245
+
246
+ for curr_email in emails:
247
+ domain = curr_email[curr_email.rfind("@") + 1 :]
248
+ domains.add(domain)
249
+
250
+ self._extract_urls_domains(file_data, urls, domains)
251
+
252
+ if self._config[PROC_EMAIL_JSON_EXTRACT_IPS]:
253
+ self._get_ips(file_data, ips)
254
+
255
+ if self._config[PROC_EMAIL_JSON_EXTRACT_HASHES]:
256
+ hashs_in_mail = re.findall(HASH_REGEX, file_data)
257
+ if hashs_in_mail:
258
+ hashes |= set(hashs_in_mail)
259
+
260
+ return APP_SUCCESS
261
+
262
+ def _add_artifacts(
263
+ self,
264
+ cef_key: str,
265
+ input_set: set[str],
266
+ artifact_name: str,
267
+ start_index: int,
268
+ artifacts: list[dict[str, Any]],
269
+ ) -> int:
270
+ added_artifacts = 0
271
+ for entry in input_set:
272
+ if not entry:
273
+ continue
274
+
275
+ artifact: dict[str, Any] = {}
276
+ artifact.update(_artifact_common)
277
+ artifact["source_data_identifier"] = str(start_index + added_artifacts)
278
+ artifact["cef"] = {cef_key: entry}
279
+ artifact["name"] = artifact_name
280
+ logger.debug(f"Artifact: {artifact}")
281
+ artifacts.append(artifact)
282
+ added_artifacts += 1
283
+
284
+ return added_artifacts
285
+
286
+ def _parse_email_headers_as_inline(
287
+ self,
288
+ file_data: str,
289
+ parsed_mail: dict[str, Any],
290
+ charset: str | None,
291
+ email_id: str,
292
+ ) -> int:
293
+ email_text = re.sub(
294
+ r"(?im)^.*forwarded message.*\r?\n", "", file_data.strip(), count=1
295
+ )
296
+ mail = email.message_from_string(email_text)
297
+ self._parse_email_headers(parsed_mail, mail, charset, add_email_id=email_id)
298
+ return APP_SUCCESS
299
+
300
+ def _add_email_header_artifacts(
301
+ self,
302
+ email_header_artifacts: list[dict[str, Any]],
303
+ start_index: int,
304
+ artifacts: list[dict[str, Any]],
305
+ ) -> int:
306
+ added_artifacts = 0
307
+ for artifact in email_header_artifacts:
308
+ artifact["source_data_identifier"] = str(start_index + added_artifacts)
309
+ artifacts.append(artifact)
310
+ added_artifacts += 1
311
+ return added_artifacts
312
+
313
+ def _create_artifacts(self, parsed_mail: dict[str, Any]) -> int:
314
+ ips = parsed_mail[PROC_EMAIL_JSON_IPS]
315
+ hashes = parsed_mail[PROC_EMAIL_JSON_HASHES]
316
+ urls = parsed_mail[PROC_EMAIL_JSON_URLS]
317
+ domains = parsed_mail[PROC_EMAIL_JSON_DOMAINS]
318
+ email_headers = parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS]
319
+
320
+ artifact_id = 0
321
+
322
+ added_artifacts = self._add_artifacts(
323
+ "sourceAddress", ips, "IP Artifact", artifact_id, self._artifacts
324
+ )
325
+ artifact_id += added_artifacts
326
+
327
+ added_artifacts = self._add_artifacts(
328
+ "fileHash", hashes, "Hash Artifact", artifact_id, self._artifacts
329
+ )
330
+ artifact_id += added_artifacts
331
+
332
+ added_artifacts = self._add_artifacts(
333
+ "requestURL", urls, "URL Artifact", artifact_id, self._artifacts
334
+ )
335
+ artifact_id += added_artifacts
336
+
337
+ added_artifacts = self._add_artifacts(
338
+ "destinationDnsDomain",
339
+ domains,
340
+ "Domain Artifact",
341
+ artifact_id,
342
+ self._artifacts,
343
+ )
344
+ artifact_id += added_artifacts
345
+
346
+ added_artifacts = self._add_email_header_artifacts(
347
+ email_headers, artifact_id, self._artifacts
348
+ )
349
+ artifact_id += added_artifacts
350
+
351
+ return APP_SUCCESS
352
+
353
+ def _get_container_name(self, parsed_mail: dict[str, Any], email_id: str) -> str:
354
+ def_cont_name = f"Email ID: {email_id}"
355
+ subject = parsed_mail.get(PROC_EMAIL_JSON_SUBJECT)
356
+
357
+ if not subject:
358
+ return def_cont_name
359
+
360
+ try:
361
+ return str(make_header(decode_header(subject)))
362
+ except Exception:
363
+ return decode_uni_string(subject, def_cont_name)
364
+
365
+ def _handle_if_body(
366
+ self,
367
+ content_disp: str | None,
368
+ content_id: str | None,
369
+ content_type: str | None,
370
+ part: Message,
371
+ bodies: list[EmailBodyDict],
372
+ file_path: str,
373
+ ) -> tuple[int, bool]:
374
+ process_as_body = False
375
+
376
+ if content_disp is None or (
377
+ content_disp.lower().strip() == "inline"
378
+ and content_type
379
+ and (("text/html" in content_type) or ("text/plain" in content_type))
380
+ ):
381
+ process_as_body = True
382
+
383
+ if not process_as_body:
384
+ return APP_SUCCESS, True
385
+
386
+ part_payload = part.get_payload(decode=True)
387
+
388
+ if not part_payload:
389
+ return APP_SUCCESS, False
390
+
391
+ with open(file_path, "wb") as f:
392
+ f.write(part_payload) # type: ignore[arg-type]
393
+
394
+ bodies.append({"file_path": file_path, "charset": part.get_content_charset()})
395
+
396
+ return APP_SUCCESS, False
397
+
398
+ def _handle_attachment(self, part: Message, file_name: str, file_path: str) -> int:
399
+ if self._parsed_mail is None:
400
+ return APP_ERROR
401
+
402
+ files = self._parsed_mail[PROC_EMAIL_JSON_FILES]
403
+
404
+ if not self._config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]:
405
+ return APP_SUCCESS
406
+
407
+ part_base64_encoded = part.get_payload()
408
+
409
+ headers = self._get_email_headers_from_part(part)
410
+
411
+ attach_meta_info: dict[str, Any] = {}
412
+
413
+ if headers:
414
+ attach_meta_info = {"headers": dict(headers)}
415
+
416
+ for curr_attach in self._external_attachments:
417
+ if curr_attach.get("should_ignore", False):
418
+ continue
419
+
420
+ try:
421
+ attach_content = curr_attach["content"]
422
+ except Exception as e:
423
+ logger.debug(f"Failed to get attachment content: {e}")
424
+ continue
425
+
426
+ if attach_content.strip().replace("\r\n", "") == str(
427
+ part_base64_encoded
428
+ ).strip().replace("\r\n", ""):
429
+ attach_meta_info.update(dict(curr_attach))
430
+ del attach_meta_info["content"]
431
+ curr_attach["should_ignore"] = True
432
+
433
+ part_payload = part.get_payload(decode=True)
434
+ if not part_payload:
435
+ return APP_SUCCESS
436
+
437
+ try:
438
+ with open(file_path, "wb") as f:
439
+ f.write(part_payload) # type: ignore[arg-type]
440
+ except OSError as e:
441
+ try:
442
+ if "File name too long" in str(e):
443
+ new_file_name = "ph_long_file_name_temp"
444
+ file_path = "{}{}".format(
445
+ remove_child_info(file_path).rstrip(
446
+ file_name.replace("<", "").replace(">", "").replace(" ", "")
447
+ ),
448
+ new_file_name,
449
+ )
450
+ logger.debug(f"Original filename: {file_name}")
451
+ logger.debug(f"Modified filename: {new_file_name}")
452
+ with open(file_path, "wb") as long_file:
453
+ long_file.write(part_payload) # type: ignore[arg-type]
454
+ else:
455
+ logger.debug(f"Error occurred while adding file to Vault: {e}")
456
+ return APP_ERROR
457
+ except Exception as e:
458
+ logger.debug(f"Error occurred while adding file to Vault: {e}")
459
+ return APP_ERROR
460
+ except Exception as e:
461
+ logger.debug(f"Error occurred while adding file to Vault: {e}")
462
+ return APP_ERROR
463
+
464
+ files.append(
465
+ {
466
+ "file_name": file_name,
467
+ "file_path": file_path,
468
+ "meta_info": attach_meta_info,
469
+ }
470
+ )
471
+
472
+ return APP_SUCCESS
473
+
474
+ def _handle_part(
475
+ self,
476
+ part: Message,
477
+ part_index: int,
478
+ tmp_dir: str,
479
+ extract_attach: bool,
480
+ parsed_mail: dict[str, Any],
481
+ ) -> int:
482
+ bodies: list[EmailBodyDict] = parsed_mail[PROC_EMAIL_JSON_BODIES]
483
+
484
+ file_name = part.get_filename()
485
+ content_disp = part.get("Content-Disposition")
486
+ content_type = part.get("Content-Type")
487
+ content_id = part.get("Content-ID")
488
+
489
+ if file_name is None:
490
+ name = f"part_{part_index}"
491
+ extension = f".{part_index}"
492
+
493
+ if content_type is not None:
494
+ ext_guess = mimetypes.guess_extension(re.sub(";.*", "", content_type))
495
+ if ext_guess:
496
+ extension = ext_guess
497
+
498
+ if content_id is not None:
499
+ name = content_id
500
+
501
+ file_name = f"{name}{extension}"
502
+ else:
503
+ file_name = decode_uni_string(file_name, file_name)
504
+
505
+ file_path = "{}/{}_{}".format(
506
+ tmp_dir,
507
+ part_index,
508
+ file_name.translate(file_name.maketrans("", "", "".join(["<", ">", " "]))),
509
+ )
510
+
511
+ logger.debug(f"file_path: {file_path}")
512
+
513
+ _status, process_further = self._handle_if_body(
514
+ content_disp, content_id, content_type, part, bodies, file_path
515
+ )
516
+
517
+ if not process_further:
518
+ return APP_SUCCESS
519
+
520
+ if (content_type is not None) and (
521
+ content_type.find(PROC_EMAIL_CONTENT_TYPE_MESSAGE) != -1
522
+ ):
523
+ return APP_SUCCESS
524
+
525
+ self._handle_attachment(part, file_name, file_path)
526
+
527
+ return APP_SUCCESS
528
+
529
+ def _update_headers(self, headers: CaseInsensitiveDict) -> int:
530
+ if not self._external_headers:
531
+ return APP_SUCCESS
532
+
533
+ if not headers:
534
+ return APP_SUCCESS
535
+
536
+ headers_ci = CaseInsensitiveDict(headers)
537
+
538
+ for curr_header_lower in self._external_headers:
539
+ if (
540
+ headers_ci.get("message-id", "default_value1").strip()
541
+ == curr_header_lower.get("message-id", "default_value2").strip()
542
+ ):
543
+ headers.update(curr_header_lower)
544
+
545
+ return APP_SUCCESS
546
+
547
+ def _get_email_headers_from_part(
548
+ self, part: Message, charset: str | None = None
549
+ ) -> CaseInsensitiveDict:
550
+ email_headers = list(part.items())
551
+
552
+ if not email_headers:
553
+ return CaseInsensitiveDict()
554
+
555
+ if charset is None:
556
+ charset = part.get_content_charset() or "utf-8"
557
+
558
+ headers: CaseInsensitiveDict = CaseInsensitiveDict()
559
+ try:
560
+ for header_item in email_headers:
561
+ headers.update({header_item[0]: get_string(header_item[1], charset)})
562
+ except Exception as e:
563
+ logger.debug(
564
+ f"Error converting header with charset {charset}: {e}. Using raw values."
565
+ )
566
+ for header_item in email_headers:
567
+ headers.update({header_item[0]: header_item[1]})
568
+
569
+ try:
570
+ received_headers = [
571
+ get_string(x[1], charset)
572
+ for x in email_headers
573
+ if x[0].lower() == "received"
574
+ ]
575
+ except Exception as e:
576
+ logger.debug(f"Error converting received headers: {e}")
577
+ received_headers = [
578
+ x[1] for x in email_headers if x[0].lower() == "received"
579
+ ]
580
+
581
+ if received_headers:
582
+ headers["Received"] = received_headers
583
+
584
+ subject = headers.get("Subject")
585
+ if subject:
586
+ try:
587
+ headers["decodedSubject"] = str(make_header(decode_header(subject)))
588
+ except Exception:
589
+ headers["decodedSubject"] = decode_uni_string(subject, subject)
590
+
591
+ to_data = headers.get("To")
592
+ if to_data:
593
+ headers["decodedTo"] = decode_uni_string(to_data, to_data)
594
+
595
+ from_data = headers.get("From")
596
+ if from_data:
597
+ headers["decodedFrom"] = decode_uni_string(from_data, from_data)
598
+
599
+ cc_data = headers.get("CC")
600
+ if cc_data:
601
+ headers["decodedCC"] = decode_uni_string(cc_data, cc_data)
602
+
603
+ return headers
604
+
605
+ def _parse_email_headers(
606
+ self,
607
+ parsed_mail: dict[str, Any],
608
+ part: Message,
609
+ charset: str | None = None,
610
+ add_email_id: str | None = None,
611
+ ) -> int:
612
+ email_header_artifacts = parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS]
613
+
614
+ headers = self._get_email_headers_from_part(part, charset)
615
+
616
+ if not headers:
617
+ return 0
618
+
619
+ cef_artifact: dict[str, Any] = {}
620
+ cef_types: dict[str, list[str]] = {}
621
+
622
+ if headers.get("From"):
623
+ cef_artifact.update({"fromEmail": headers["From"]})
624
+
625
+ if headers.get("To"):
626
+ cef_artifact.update({"toEmail": headers["To"]})
627
+
628
+ message_id = headers.get("message-id")
629
+ if (not cef_artifact) and (message_id is None):
630
+ return 0
631
+
632
+ cef_types.update({"fromEmail": ["email"], "toEmail": ["email"]})
633
+
634
+ self._update_headers(headers)
635
+ cef_artifact["emailHeaders"] = dict(headers)
636
+
637
+ body = None
638
+
639
+ for curr_key in list(cef_artifact["emailHeaders"].keys()):
640
+ if curr_key.lower().startswith("body"):
641
+ body = cef_artifact["emailHeaders"].pop(curr_key)
642
+ elif curr_key in ("parentInternetMessageId", "parentGuid", "emailGuid"):
643
+ curr_value = cef_artifact["emailHeaders"].pop(curr_key)
644
+ cef_artifact.update({curr_key: curr_value})
645
+
646
+ if self._config.get(PROC_EMAIL_JSON_EXTRACT_BODY, False) and not body:
647
+ queue: list[Message] = [part]
648
+ i = 1
649
+ while len(queue) > 0:
650
+ cur_part = queue.pop(0)
651
+ payload = cur_part.get_payload()
652
+ if isinstance(payload, list):
653
+ queue.extend(payload) # type: ignore[arg-type]
654
+ else:
655
+ encoding = cur_part["Content-Transfer-Encoding"]
656
+ if encoding:
657
+ if "base64" in encoding.lower():
658
+ payload = base64.b64decode(
659
+ "".join(str(payload).splitlines())
660
+ )
661
+ elif encoding != "8bit":
662
+ payload = cur_part.get_payload(decode=True)
663
+ payload = (
664
+ UnicodeDammit(payload)
665
+ .unicode_markup.encode("utf-8")
666
+ .decode("utf-8")
667
+ )
668
+ try:
669
+ json.dumps({"body": payload})
670
+ except (TypeError, UnicodeDecodeError):
671
+ try:
672
+ payload = payload.decode("UTF-8") # type: ignore[union-attr]
673
+ except (UnicodeDecodeError, AttributeError):
674
+ logger.debug(
675
+ "Email body caused unicode exception. Encoding as base64."
676
+ )
677
+ if isinstance(payload, bytes):
678
+ payload = base64.b64encode(payload).decode("UTF-8")
679
+ else:
680
+ payload = base64.b64encode(
681
+ str(payload).encode("UTF-8")
682
+ ).decode("UTF-8")
683
+ cef_artifact["body_base64encoded"] = True
684
+
685
+ cef_artifact.update({f"bodyPart{i}": payload if payload else None})
686
+ cef_artifact.update(
687
+ {
688
+ f"bodyPart{i}ContentType": cur_part["Content-Type"]
689
+ if cur_part["Content-Type"]
690
+ else None
691
+ }
692
+ )
693
+ i += 1
694
+
695
+ if add_email_id:
696
+ cef_artifact["emailId"] = add_email_id
697
+ if self._email_id_contains:
698
+ cef_types.update({"emailId": self._email_id_contains})
699
+
700
+ artifact: dict[str, Any] = {}
701
+ artifact.update(_artifact_common)
702
+ artifact["name"] = "Email Artifact"
703
+ artifact["cef"] = cef_artifact
704
+ artifact["cef_types"] = cef_types
705
+ email_header_artifacts.append(artifact)
706
+
707
+ return len(email_header_artifacts)
708
+
709
+ def _handle_mail_object(
710
+ self,
711
+ mail: Message,
712
+ email_id: str,
713
+ rfc822_email: str,
714
+ tmp_dir: str,
715
+ start_time_epoch: float,
716
+ ) -> int:
717
+ self._parsed_mail = {}
718
+
719
+ tmp_dir_path = Path(tmp_dir)
720
+ if not tmp_dir_path.exists():
721
+ tmp_dir_path.mkdir(parents=True)
722
+
723
+ extract_attach = self._config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]
724
+
725
+ self._parsed_mail[PROC_EMAIL_JSON_SUBJECT] = mail.get("Subject", "")
726
+ self._parsed_mail[PROC_EMAIL_JSON_FROM] = mail.get("From", "")
727
+ self._parsed_mail[PROC_EMAIL_JSON_TO] = mail.get("To", "")
728
+ self._parsed_mail[PROC_EMAIL_JSON_DATE] = mail.get("Date", "")
729
+ self._parsed_mail[PROC_EMAIL_JSON_MSG_ID] = mail.get("Message-ID", "")
730
+ self._parsed_mail[PROC_EMAIL_JSON_FILES] = files = [] # type: ignore[var-annotated]
731
+ bodies: list[EmailBodyDict] = []
732
+ self._parsed_mail[PROC_EMAIL_JSON_BODIES] = bodies
733
+ self._parsed_mail[PROC_EMAIL_JSON_START_TIME] = start_time_epoch
734
+ self._parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS] = []
735
+
736
+ if mail.is_multipart():
737
+ for i, part in enumerate(mail.walk()):
738
+ add_email_id = None
739
+ if i == 0:
740
+ add_email_id = email_id
741
+
742
+ self._parse_email_headers(
743
+ self._parsed_mail, part, add_email_id=add_email_id
744
+ )
745
+
746
+ if part.is_multipart():
747
+ continue
748
+ try:
749
+ ret_val = self._handle_part(
750
+ part, i, tmp_dir, extract_attach, self._parsed_mail
751
+ )
752
+ except Exception as e:
753
+ logger.debug(f"ErrorExp in _handle_part # {i}: {e}")
754
+ continue
755
+
756
+ if ret_val == APP_ERROR:
757
+ continue
758
+
759
+ else:
760
+ self._parse_email_headers(self._parsed_mail, mail, add_email_id=email_id)
761
+ file_path = f"{tmp_dir}/part_1.text"
762
+ payload = mail.get_payload(decode=True)
763
+ if payload:
764
+ with open(file_path, "wb") as f:
765
+ f.write(payload) # type: ignore[arg-type]
766
+ bodies.append(
767
+ {"file_path": file_path, "charset": mail.get_content_charset()}
768
+ )
769
+
770
+ container_name = self._get_container_name(self._parsed_mail, email_id)
771
+
772
+ if container_name is None:
773
+ return APP_ERROR
774
+
775
+ container: dict[str, Any] = {}
776
+ container_data = dict(self._parsed_mail)
777
+
778
+ del container_data[PROC_EMAIL_JSON_EMAIL_HEADERS]
779
+ container.update(_container_common)
780
+
781
+ if not self.context.is_hex:
782
+ try:
783
+ folder_hex = hashlib.sha256(self.context.folder_name) # type: ignore[arg-type]
784
+ except Exception:
785
+ folder_hex = hashlib.sha256(self.context.folder_name.encode())
786
+
787
+ folder_sdi = folder_hex.hexdigest()
788
+ else:
789
+ folder_sdi = self.context.folder_name
790
+
791
+ self._container["source_data_identifier"] = f"{folder_sdi} : {email_id}"
792
+ self._container["name"] = container_name
793
+ self._container["data"] = {"raw_email": rfc822_email}
794
+
795
+ self._parsed_mail[PROC_EMAIL_JSON_IPS] = set()
796
+ self._parsed_mail[PROC_EMAIL_JSON_HASHES] = set()
797
+ self._parsed_mail[PROC_EMAIL_JSON_URLS] = set()
798
+ self._parsed_mail[PROC_EMAIL_JSON_DOMAINS] = set()
799
+
800
+ for i, body in enumerate(bodies):
801
+ if not body:
802
+ continue
803
+
804
+ try:
805
+ self._handle_body(body, self._parsed_mail, i, email_id)
806
+ except Exception as e:
807
+ logger.debug(f"ErrorExp in _handle_body # {i}: {e!s}")
808
+ continue
809
+
810
+ self._attachments.extend(files)
811
+
812
+ self._create_artifacts(self._parsed_mail)
813
+
814
+ return APP_SUCCESS
815
+
816
+ def _set_email_id_contains(self, email_id: str) -> None:
817
+ email_id_str = str(email_id)
818
+
819
+ if is_sha1(email_id_str):
820
+ self._email_id_contains = ["vault id"]
821
+
822
+ def _int_process_email(
823
+ self, rfc822_email: str, email_id: str, start_time_epoch: float
824
+ ) -> tuple[int, str, list[dict[str, Any]]]:
825
+ mail = email.message_from_string(rfc822_email)
826
+
827
+ tmp_dir = tempfile.mkdtemp(prefix="ph_email_")
828
+ self._tmp_dirs.append(tmp_dir)
829
+
830
+ try:
831
+ ret_val = self._handle_mail_object(
832
+ mail, email_id, rfc822_email, tmp_dir, start_time_epoch
833
+ )
834
+ except Exception as e:
835
+ message = f"ErrorExp in self._handle_mail_object: {e}"
836
+ logger.debug(message)
837
+ return APP_ERROR, message, []
838
+
839
+ results = [
840
+ {
841
+ "container": self._container,
842
+ "artifacts": self._artifacts,
843
+ "files": self._attachments,
844
+ "temp_directory": tmp_dir,
845
+ }
846
+ ]
847
+
848
+ return ret_val, "Email Parsed", results
849
+
850
+ def process_email(
851
+ self,
852
+ base_connector: object,
853
+ rfc822_email: str,
854
+ email_id: str,
855
+ config: dict[str, Any],
856
+ epoch: float,
857
+ container_id: int | None = None,
858
+ email_headers: list[dict[str, Any]] | None = None,
859
+ attachments_data: list[dict[str, Any]] | None = None,
860
+ ) -> tuple[int, str]:
861
+ """Process an email and extract artifacts."""
862
+ self._config = config
863
+
864
+ if email_headers:
865
+ for curr_header in email_headers:
866
+ self._external_headers.append(CaseInsensitiveDict(curr_header))
867
+
868
+ if (config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]) and (
869
+ attachments_data is not None
870
+ ):
871
+ self._external_attachments = attachments_data
872
+
873
+ with contextlib.suppress(Exception):
874
+ self._set_email_id_contains(email_id)
875
+
876
+ ret_val, message, results = self._int_process_email(
877
+ rfc822_email, email_id, epoch
878
+ )
879
+
880
+ if not ret_val:
881
+ self._del_tmp_dirs()
882
+ return APP_ERROR, message
883
+
884
+ try:
885
+ self._parse_results(results, container_id)
886
+ except Exception:
887
+ self._del_tmp_dirs()
888
+ raise
889
+
890
+ return APP_SUCCESS, "Email Processed"
891
+
892
+ def _save_ingested(
893
+ self, container: dict[str, Any], using_dummy: bool
894
+ ) -> tuple[int, str, int | None]:
895
+ if using_dummy:
896
+ cid = container["id"]
897
+ artifacts = container["artifacts"]
898
+ for artifact in artifacts:
899
+ artifact["container_id"] = cid
900
+ try:
901
+ _ids = self.context.soar.save_artifacts(artifacts) # type: ignore[attr-defined]
902
+ ret_val, message = APP_SUCCESS, "Success"
903
+ logger.debug(
904
+ f"save_artifacts returns, value: {ret_val}, reason: {message}"
905
+ )
906
+ except Exception as e:
907
+ ret_val, message = APP_ERROR, str(e)
908
+ logger.debug(f"save_artifacts failed: {e}")
909
+ return ret_val, message, None
910
+
911
+ return ret_val, message, cid
912
+ else:
913
+ try:
914
+ cid = self.context.soar.save_container(container) # type: ignore[attr-defined]
915
+ ret_val, message = APP_SUCCESS, "Success"
916
+ logger.debug(
917
+ f"save_container (with artifacts) returns, value: {ret_val}, reason: {message}, id: {cid}"
918
+ )
919
+ except Exception as e:
920
+ ret_val, message, cid = APP_ERROR, str(e), None
921
+ logger.debug(f"save_container failed: {e}")
922
+
923
+ return ret_val, message, cid
924
+
925
+ def _handle_save_ingested(
926
+ self,
927
+ artifacts: list[dict[str, Any]],
928
+ container: dict[str, Any] | None,
929
+ container_id: int | None,
930
+ files: list[dict[str, Any]],
931
+ ) -> None:
932
+ using_dummy = False
933
+
934
+ if container_id:
935
+ using_dummy = True
936
+ container = {
937
+ "name": "Dummy Container",
938
+ "dummy": True,
939
+ "id": container_id,
940
+ "artifacts": artifacts,
941
+ }
942
+ elif container:
943
+ container["artifacts"] = artifacts
944
+ else:
945
+ return
946
+
947
+ for artifact in [
948
+ x
949
+ for x in container.get("artifacts", [])
950
+ if not x.get("source_data_identifier")
951
+ ]:
952
+ self._set_sdi(artifact)
953
+
954
+ if files and container.get("artifacts"):
955
+ container["artifacts"][-1]["run_automation"] = False
956
+
957
+ ret_val, message, container_id = self._save_ingested(container, using_dummy)
958
+
959
+ if ret_val == APP_ERROR:
960
+ message = f"Failed to save ingested artifacts, error msg: {message}"
961
+ logger.debug(message)
962
+ return
963
+
964
+ if not container_id:
965
+ message = "save_container did not return a container_id"
966
+ logger.debug(message)
967
+ return
968
+
969
+ vault_ids: list[str] = []
970
+ vault_artifacts_added = 0
971
+
972
+ last_file = len(files) - 1
973
+ for i, curr_file in enumerate(files):
974
+ run_automation = i == last_file
975
+ ret_val, added_to_vault = self._handle_file(
976
+ curr_file,
977
+ vault_ids,
978
+ container_id,
979
+ vault_artifacts_added,
980
+ run_automation,
981
+ )
982
+
983
+ if added_to_vault:
984
+ vault_artifacts_added += 1
985
+
986
+ def _parse_results(
987
+ self, results: list[dict[str, Any]], container_id: int | None = None
988
+ ) -> int:
989
+ container_count = DEFAULT_CONTAINER_COUNT
990
+ results = results[:container_count]
991
+
992
+ for result in results:
993
+ if container_id is None:
994
+ container = result.get("container")
995
+
996
+ if not container:
997
+ continue
998
+
999
+ container.update(_container_common)
1000
+
1001
+ else:
1002
+ container = None
1003
+
1004
+ artifacts = result.get("artifacts", [])
1005
+ for _j, artifact in enumerate(artifacts):
1006
+ if not artifact:
1007
+ continue
1008
+
1009
+ self._set_sdi(artifact)
1010
+
1011
+ if not artifacts:
1012
+ continue
1013
+
1014
+ len_artifacts = len(artifacts)
1015
+
1016
+ for j, artifact in enumerate(artifacts):
1017
+ if not artifact:
1018
+ continue
1019
+
1020
+ if (j + 1) == len_artifacts:
1021
+ artifact["run_automation"] = True
1022
+
1023
+ cef_artifact = artifact.get("cef")
1024
+ if "parentGuid" in cef_artifact:
1025
+ parent_guid = cef_artifact.pop("parentGuid")
1026
+ if parent_guid in self._guid_to_hash:
1027
+ cef_artifact["parentSourceDataIdentifier"] = self._guid_to_hash[
1028
+ parent_guid
1029
+ ]
1030
+ if "emailGuid" in cef_artifact:
1031
+ del cef_artifact["emailGuid"]
1032
+
1033
+ self._handle_save_ingested(
1034
+ artifacts, container, container_id, result.get("files", [])
1035
+ )
1036
+
1037
+ for result in results:
1038
+ if result.get("temp_directory"):
1039
+ shutil.rmtree(result["temp_directory"], ignore_errors=True)
1040
+
1041
+ return APP_SUCCESS
1042
+
1043
+ def _add_vault_hashes_to_dictionary(
1044
+ self, cef_artifact: dict[str, Any], vault_id: str
1045
+ ) -> tuple[int, str]:
1046
+ try:
1047
+ vault_info_data = self.context.vault.get_attachment(vault_id=vault_id)
1048
+ except Exception:
1049
+ return APP_ERROR, "Could not retrieve vault file"
1050
+
1051
+ if not vault_info_data:
1052
+ return APP_ERROR, "Vault ID not found"
1053
+
1054
+ try:
1055
+ metadata = vault_info_data[0].get("metadata")
1056
+ except Exception:
1057
+ return APP_ERROR, "Failed to get vault item metadata"
1058
+
1059
+ if metadata:
1060
+ with contextlib.suppress(Exception):
1061
+ cef_artifact["fileHashSha256"] = metadata["sha256"]
1062
+
1063
+ with contextlib.suppress(Exception):
1064
+ cef_artifact["fileHashMd5"] = metadata["md5"]
1065
+
1066
+ with contextlib.suppress(Exception):
1067
+ cef_artifact["fileHashSha1"] = metadata["sha1"]
1068
+
1069
+ return APP_SUCCESS, "Mapped hash values"
1070
+
1071
+ def _handle_file(
1072
+ self,
1073
+ curr_file: dict[str, Any],
1074
+ vault_ids: list[str],
1075
+ container_id: int,
1076
+ artifact_id: int,
1077
+ run_automation: bool = False,
1078
+ ) -> tuple[int, int]:
1079
+ file_name = curr_file.get("file_name")
1080
+
1081
+ local_file_path = curr_file["file_path"]
1082
+
1083
+ contains = get_file_contains(local_file_path)
1084
+
1085
+ vault_attach_dict: dict[str, Any] = {}
1086
+
1087
+ if not file_name:
1088
+ file_name = Path(local_file_path).name
1089
+
1090
+ logger.debug(f"Vault file name: {file_name}")
1091
+
1092
+ vault_attach_dict[phantom.APP_JSON_ACTION_NAME] = self.context.action_name # type: ignore[attr-defined]
1093
+ vault_attach_dict[phantom.APP_JSON_APP_RUN_ID] = self.context.app_run_id # type: ignore[attr-defined]
1094
+
1095
+ file_name = decode_uni_string(file_name, file_name)
1096
+
1097
+ try:
1098
+ vault_id = self.context.vault.add_attachment(
1099
+ container_id=container_id,
1100
+ file_location=local_file_path,
1101
+ file_name=file_name,
1102
+ metadata=vault_attach_dict,
1103
+ )
1104
+ except Exception as e:
1105
+ logger.debug(f"Error adding file to vault: {e}")
1106
+ return APP_ERROR, APP_ERROR
1107
+
1108
+ cef_artifact = curr_file.get("meta_info", {})
1109
+ cef_artifact.update({"fileName": file_name})
1110
+
1111
+ if vault_id:
1112
+ cef_artifact.update(
1113
+ {"vaultId": vault_id, "cs6": vault_id, "cs6Label": "Vault ID"}
1114
+ )
1115
+ self._add_vault_hashes_to_dictionary(cef_artifact, vault_id)
1116
+
1117
+ artifact: dict[str, Any] = {}
1118
+ artifact.update(_artifact_common)
1119
+ artifact["container_id"] = container_id
1120
+ artifact["name"] = "Vault Artifact"
1121
+ artifact["cef"] = cef_artifact
1122
+ artifact["run_automation"] = run_automation
1123
+ if contains:
1124
+ artifact["cef_types"] = {"vaultId": contains, "cs6": contains}
1125
+ self._set_sdi(artifact)
1126
+
1127
+ if "parentGuid" in cef_artifact:
1128
+ parent_guid = cef_artifact.pop("parentGuid")
1129
+ cef_artifact["parentSourceDataIdentifier"] = self._guid_to_hash[parent_guid]
1130
+
1131
+ try:
1132
+ artifact_id_result = self.context.soar.save_artifact(artifact) # type: ignore[attr-defined]
1133
+ ret_val, status_string = APP_SUCCESS, "Success"
1134
+ logger.debug(
1135
+ f"save_artifact returns, value: {ret_val}, reason: {status_string}, id: {artifact_id_result}"
1136
+ )
1137
+ except Exception as e:
1138
+ ret_val, status_string = APP_ERROR, str(e)
1139
+ logger.debug(f"save_artifact failed: {e}")
1140
+
1141
+ return APP_SUCCESS, ret_val
1142
+
1143
+ def _set_sdi(self, input_dict: dict[str, Any]) -> int:
1144
+ input_dict.pop("source_data_identifier", None)
1145
+
1146
+ input_dict_hash = input_dict
1147
+
1148
+ cef = input_dict.get("cef")
1149
+
1150
+ curr_email_guid = None
1151
+
1152
+ if cef is not None and (("parentGuid" in cef) or ("emailGuid" in cef)):
1153
+ input_dict_hash = deepcopy(input_dict)
1154
+ cef = input_dict_hash["cef"]
1155
+ if "parentGuid" in cef:
1156
+ del cef["parentGuid"]
1157
+ curr_email_guid = cef.get("emailGuid")
1158
+ if curr_email_guid is not None:
1159
+ del cef["emailGuid"]
1160
+
1161
+ input_dict["source_data_identifier"] = create_dict_hash(input_dict_hash)
1162
+
1163
+ if curr_email_guid:
1164
+ self._guid_to_hash[curr_email_guid] = input_dict["source_data_identifier"]
1165
+
1166
+ return APP_SUCCESS
1167
+
1168
+ def _del_tmp_dirs(self) -> None:
1169
+ """Remove any tmp_dirs that were created."""
1170
+ for tmp_dir in self._tmp_dirs:
1171
+ shutil.rmtree(tmp_dir, ignore_errors=True)