splunk-soar-sdk 3.4.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soar_sdk/abstract.py +7 -6
- soar_sdk/action_results.py +7 -7
- soar_sdk/actions_manager.py +7 -13
- soar_sdk/apis/artifact.py +3 -3
- soar_sdk/apis/container.py +2 -2
- soar_sdk/apis/es/findings.py +27 -0
- soar_sdk/apis/utils.py +3 -2
- soar_sdk/apis/vault.py +1 -0
- soar_sdk/app.py +24 -27
- soar_sdk/app_cli_runner.py +7 -6
- soar_sdk/app_client.py +3 -4
- soar_sdk/asset.py +7 -9
- soar_sdk/asset_state.py +1 -2
- soar_sdk/async_utils.py +1 -2
- soar_sdk/cli/cli.py +2 -2
- soar_sdk/cli/init/cli.py +5 -5
- soar_sdk/cli/manifests/deserializers.py +4 -3
- soar_sdk/cli/manifests/processors.py +4 -2
- soar_sdk/cli/manifests/serializers.py +4 -4
- soar_sdk/cli/package/cli.py +14 -14
- soar_sdk/cli/package/utils.py +3 -2
- soar_sdk/cli/path_utils.py +1 -1
- soar_sdk/code_renderers/action_renderer.py +5 -4
- soar_sdk/code_renderers/app_renderer.py +1 -1
- soar_sdk/code_renderers/asset_renderer.py +1 -1
- soar_sdk/code_renderers/renderer.py +2 -2
- soar_sdk/compat.py +2 -1
- soar_sdk/decorators/__init__.py +3 -3
- soar_sdk/decorators/action.py +7 -11
- soar_sdk/decorators/make_request.py +9 -11
- soar_sdk/decorators/on_es_poll.py +105 -136
- soar_sdk/decorators/on_poll.py +7 -11
- soar_sdk/decorators/test_connectivity.py +5 -6
- soar_sdk/decorators/view_handler.py +6 -7
- soar_sdk/decorators/webhook.py +3 -5
- soar_sdk/es_client.py +43 -0
- soar_sdk/extras/__init__.py +0 -0
- soar_sdk/extras/email/__init__.py +9 -0
- soar_sdk/extras/email/processor.py +1171 -0
- soar_sdk/extras/email/rfc5322.py +335 -0
- soar_sdk/extras/email/utils.py +178 -0
- soar_sdk/input_spec.py +4 -3
- soar_sdk/logging.py +5 -4
- soar_sdk/meta/actions.py +3 -3
- soar_sdk/meta/app.py +1 -0
- soar_sdk/meta/dependencies.py +47 -11
- soar_sdk/meta/webhooks.py +2 -1
- soar_sdk/models/__init__.py +1 -1
- soar_sdk/models/artifact.py +1 -0
- soar_sdk/models/attachment_input.py +1 -1
- soar_sdk/models/container.py +2 -1
- soar_sdk/models/finding.py +4 -6
- soar_sdk/models/vault_attachment.py +1 -0
- soar_sdk/models/view.py +2 -0
- soar_sdk/params.py +13 -7
- soar_sdk/shims/phantom/action_result.py +1 -1
- soar_sdk/shims/phantom/app.py +1 -1
- soar_sdk/shims/phantom/base_connector.py +3 -4
- soar_sdk/shims/phantom/connector_result.py +0 -1
- soar_sdk/shims/phantom/install_info.py +1 -1
- soar_sdk/shims/phantom/ph_ipc.py +2 -1
- soar_sdk/shims/phantom/vault.py +8 -6
- soar_sdk/shims/phantom_common/app_interface/app_interface.py +1 -0
- soar_sdk/types.py +1 -1
- soar_sdk/views/component_registry.py +0 -1
- soar_sdk/views/template_filters.py +4 -4
- soar_sdk/views/template_renderer.py +3 -2
- soar_sdk/views/view_parser.py +8 -6
- soar_sdk/webhooks/models.py +3 -3
- soar_sdk/webhooks/routing.py +3 -4
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/METADATA +5 -1
- splunk_soar_sdk-3.6.0.dist-info/RECORD +117 -0
- splunk_soar_sdk-3.4.0.dist-info/RECORD +0 -110
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/WHEEL +0 -0
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/entry_points.txt +0 -0
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1171 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import contextlib
|
|
3
|
+
import email
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import mimetypes
|
|
7
|
+
import re
|
|
8
|
+
import shutil
|
|
9
|
+
import tempfile
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from email.header import decode_header, make_header
|
|
13
|
+
from email.message import Message
|
|
14
|
+
from html import unescape
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, TypedDict
|
|
17
|
+
|
|
18
|
+
from bs4 import BeautifulSoup, UnicodeDammit # type: ignore[attr-defined]
|
|
19
|
+
from pydantic import HttpUrl, ValidationError
|
|
20
|
+
from requests.structures import CaseInsensitiveDict
|
|
21
|
+
|
|
22
|
+
from soar_sdk.abstract import SOARClient
|
|
23
|
+
from soar_sdk.extras.email.utils import (
|
|
24
|
+
clean_url,
|
|
25
|
+
create_dict_hash,
|
|
26
|
+
decode_uni_string,
|
|
27
|
+
get_file_contains,
|
|
28
|
+
get_string,
|
|
29
|
+
is_ip,
|
|
30
|
+
is_sha1,
|
|
31
|
+
remove_child_info,
|
|
32
|
+
)
|
|
33
|
+
from soar_sdk.logging import getLogger
|
|
34
|
+
from soar_sdk.shims import phantom
|
|
35
|
+
from soar_sdk.shims.phantom.app import APP_ERROR, APP_SUCCESS
|
|
36
|
+
from soar_sdk.shims.phantom.vault import VaultBase
|
|
37
|
+
|
|
38
|
+
logger = getLogger()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def validate_url(value: str) -> None:
|
|
42
|
+
"""Validate a URL using pydantic."""
|
|
43
|
+
try:
|
|
44
|
+
HttpUrl(value)
|
|
45
|
+
except ValidationError as e:
|
|
46
|
+
raise ValueError(f"Invalid URL: {e}") from e
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
_container_common = {"run_automation": False}
|
|
50
|
+
_artifact_common = {"run_automation": False}
|
|
51
|
+
|
|
52
|
+
DEFAULT_ARTIFACT_COUNT = 100
|
|
53
|
+
DEFAULT_CONTAINER_COUNT = 100
|
|
54
|
+
HASH_FIXED_PHANTOM_VERSION = "2.0.201"
|
|
55
|
+
|
|
56
|
+
PROC_EMAIL_JSON_FILES = "files"
|
|
57
|
+
PROC_EMAIL_JSON_BODIES = "bodies"
|
|
58
|
+
PROC_EMAIL_JSON_DATE = "date"
|
|
59
|
+
PROC_EMAIL_JSON_FROM = "from"
|
|
60
|
+
PROC_EMAIL_JSON_SUBJECT = "subject"
|
|
61
|
+
PROC_EMAIL_JSON_TO = "to"
|
|
62
|
+
PROC_EMAIL_JSON_START_TIME = "start_time"
|
|
63
|
+
PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS = "extract_attachments"
|
|
64
|
+
PROC_EMAIL_JSON_EXTRACT_BODY = "add_body_to_header_artifacts"
|
|
65
|
+
PROC_EMAIL_JSON_EXTRACT_URLS = "extract_urls"
|
|
66
|
+
PROC_EMAIL_JSON_EXTRACT_IPS = "extract_ips"
|
|
67
|
+
PROC_EMAIL_JSON_EXTRACT_DOMAINS = "extract_domains"
|
|
68
|
+
PROC_EMAIL_JSON_EXTRACT_HASHES = "extract_hashes"
|
|
69
|
+
PROC_EMAIL_JSON_IPS = "ips"
|
|
70
|
+
PROC_EMAIL_JSON_HASHES = "hashes"
|
|
71
|
+
PROC_EMAIL_JSON_URLS = "urls"
|
|
72
|
+
PROC_EMAIL_JSON_DOMAINS = "domains"
|
|
73
|
+
PROC_EMAIL_JSON_MSG_ID = "message_id"
|
|
74
|
+
PROC_EMAIL_JSON_EMAIL_HEADERS = "email_headers"
|
|
75
|
+
PROC_EMAIL_CONTENT_TYPE_MESSAGE = "message/rfc822"
|
|
76
|
+
|
|
77
|
+
URI_REGEX = r"[Hh][Tt][Tt][Pp][Ss]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
|
78
|
+
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@+[A-Z0-9.-]+\.[A-Z]{2,}\b"
|
|
79
|
+
EMAIL_REGEX2 = r'".*"@[A-Z0-9.-]+\.[A-Z]{2,}\b'
|
|
80
|
+
HASH_REGEX = r"\b[0-9a-fA-F]{32}\b|\b[0-9a-fA-F]{40}\b|\b[0-9a-fA-F]{64}\b"
|
|
81
|
+
IP_REGEX = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"
|
|
82
|
+
IPV6_REGEX = r"\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|"
|
|
83
|
+
IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}"
|
|
84
|
+
IPV6_REGEX += (
|
|
85
|
+
r"|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))"
|
|
86
|
+
)
|
|
87
|
+
IPV6_REGEX += r"|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})"
|
|
88
|
+
IPV6_REGEX += r"|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|"
|
|
89
|
+
IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})"
|
|
90
|
+
IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
|
|
91
|
+
IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})"
|
|
92
|
+
IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
|
|
93
|
+
IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})"
|
|
94
|
+
IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
|
|
95
|
+
IPV6_REGEX += r"(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})"
|
|
96
|
+
IPV6_REGEX += r"|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|"
|
|
97
|
+
IPV6_REGEX += (
|
|
98
|
+
r"(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d"
|
|
99
|
+
)
|
|
100
|
+
IPV6_REGEX += r"|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class EmailBodyDict(TypedDict):
|
|
104
|
+
"""Type definition for email body dictionary."""
|
|
105
|
+
|
|
106
|
+
file_path: str
|
|
107
|
+
charset: str | None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class ProcessEmailContext:
|
|
112
|
+
"""Context object for email processing with SDK components."""
|
|
113
|
+
|
|
114
|
+
soar: SOARClient
|
|
115
|
+
vault: VaultBase
|
|
116
|
+
app_id: str
|
|
117
|
+
folder_name: str
|
|
118
|
+
is_hex: bool
|
|
119
|
+
action_name: str | None = None
|
|
120
|
+
app_run_id: int | None = None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class EmailProcessor:
|
|
124
|
+
"""Email processor for parsing and extracting artifacts from RFC822 emails."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, context: ProcessEmailContext, config: dict[str, Any]) -> None:
|
|
127
|
+
self.context = context
|
|
128
|
+
self._config = config
|
|
129
|
+
self._email_id_contains: list[str] = []
|
|
130
|
+
self._container: dict[str, Any] = {}
|
|
131
|
+
self._artifacts: list[dict[str, Any]] = []
|
|
132
|
+
self._attachments: list[dict[str, Any]] = []
|
|
133
|
+
self._external_headers: list[CaseInsensitiveDict] = []
|
|
134
|
+
self._external_attachments: list[dict[str, Any]] = []
|
|
135
|
+
self._parsed_mail: dict[str, Any] | None = None
|
|
136
|
+
self._guid_to_hash: dict[str, str] = {}
|
|
137
|
+
self._tmp_dirs: list[str] = []
|
|
138
|
+
|
|
139
|
+
def _extract_urls_domains(
|
|
140
|
+
self, file_data: str, urls: set[str], domains: set[str]
|
|
141
|
+
) -> None:
|
|
142
|
+
if (not self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]) and (
|
|
143
|
+
not self._config[PROC_EMAIL_JSON_EXTRACT_URLS]
|
|
144
|
+
):
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
soup = BeautifulSoup(file_data, "html.parser")
|
|
149
|
+
except Exception as e:
|
|
150
|
+
logger.debug(f"Error occurred while extracting domains of the URLs: {e}")
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
uris = []
|
|
154
|
+
links = soup.find_all(href=True)
|
|
155
|
+
srcs = soup.find_all(src=True)
|
|
156
|
+
|
|
157
|
+
if links:
|
|
158
|
+
for x in links:
|
|
159
|
+
uris.append(clean_url(x.get_text()))
|
|
160
|
+
if not x["href"].startswith("mailto:"):
|
|
161
|
+
uris.append(x["href"])
|
|
162
|
+
|
|
163
|
+
if srcs:
|
|
164
|
+
for x in srcs:
|
|
165
|
+
uris.append(clean_url(x.get_text()))
|
|
166
|
+
uris.append(x["src"])
|
|
167
|
+
|
|
168
|
+
file_data = unescape(file_data)
|
|
169
|
+
regex_uris = re.findall(URI_REGEX, file_data)
|
|
170
|
+
uris.extend(clean_url(x) for x in regex_uris)
|
|
171
|
+
|
|
172
|
+
validated_urls = []
|
|
173
|
+
for url in uris:
|
|
174
|
+
try:
|
|
175
|
+
validate_url(url)
|
|
176
|
+
validated_urls.append(url)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.debug(f"URL validation failed for {url}: {e}")
|
|
179
|
+
|
|
180
|
+
if self._config[PROC_EMAIL_JSON_EXTRACT_URLS]:
|
|
181
|
+
urls |= set(validated_urls)
|
|
182
|
+
|
|
183
|
+
if self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]:
|
|
184
|
+
for uri in validated_urls:
|
|
185
|
+
domain = phantom.get_host_from_url(uri) # type: ignore[attr-defined]
|
|
186
|
+
if domain and (not is_ip(domain)):
|
|
187
|
+
domains.add(domain)
|
|
188
|
+
if links:
|
|
189
|
+
mailtos = [
|
|
190
|
+
x["href"] for x in links if (x["href"].startswith("mailto:"))
|
|
191
|
+
]
|
|
192
|
+
for curr_email in mailtos:
|
|
193
|
+
domain = curr_email[curr_email.find("@") + 1 :]
|
|
194
|
+
if domain and (not is_ip(domain)):
|
|
195
|
+
if "?" in domain:
|
|
196
|
+
domain = domain[: domain.find("?")]
|
|
197
|
+
domains.add(domain)
|
|
198
|
+
|
|
199
|
+
def _get_ips(self, file_data: str, ips: set[str]) -> None:
|
|
200
|
+
for match in re.finditer(IP_REGEX, file_data):
|
|
201
|
+
ip_candidate = match.group(0).strip()
|
|
202
|
+
if is_ip(ip_candidate):
|
|
203
|
+
ips.add(ip_candidate)
|
|
204
|
+
|
|
205
|
+
for match in re.finditer(IPV6_REGEX, file_data):
|
|
206
|
+
ip_candidate = match.group(0).strip()
|
|
207
|
+
ips.add(ip_candidate)
|
|
208
|
+
|
|
209
|
+
def _handle_body(
|
|
210
|
+
self,
|
|
211
|
+
body: EmailBodyDict,
|
|
212
|
+
parsed_mail: dict[str, Any],
|
|
213
|
+
body_index: int,
|
|
214
|
+
email_id: str,
|
|
215
|
+
) -> int:
|
|
216
|
+
local_file_path = body["file_path"]
|
|
217
|
+
charset = body.get("charset")
|
|
218
|
+
|
|
219
|
+
ips = parsed_mail[PROC_EMAIL_JSON_IPS]
|
|
220
|
+
hashes = parsed_mail[PROC_EMAIL_JSON_HASHES]
|
|
221
|
+
urls = parsed_mail[PROC_EMAIL_JSON_URLS]
|
|
222
|
+
domains = parsed_mail[PROC_EMAIL_JSON_DOMAINS]
|
|
223
|
+
|
|
224
|
+
file_data_raw: str | bytes | None = None
|
|
225
|
+
try:
|
|
226
|
+
with open(local_file_path) as f:
|
|
227
|
+
file_data_raw = f.read()
|
|
228
|
+
except Exception:
|
|
229
|
+
with open(local_file_path, "rb") as f:
|
|
230
|
+
file_data_raw = f.read()
|
|
231
|
+
|
|
232
|
+
if (file_data_raw is None) or (len(file_data_raw) == 0):
|
|
233
|
+
return APP_ERROR
|
|
234
|
+
|
|
235
|
+
file_data: str = (
|
|
236
|
+
UnicodeDammit(file_data_raw).unicode_markup.encode("utf-8").decode("utf-8")
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
self._parse_email_headers_as_inline(file_data, parsed_mail, charset, email_id)
|
|
240
|
+
|
|
241
|
+
if self._config[PROC_EMAIL_JSON_EXTRACT_DOMAINS]:
|
|
242
|
+
emails = []
|
|
243
|
+
emails.extend(re.findall(EMAIL_REGEX, file_data, re.IGNORECASE))
|
|
244
|
+
emails.extend(re.findall(EMAIL_REGEX2, file_data, re.IGNORECASE))
|
|
245
|
+
|
|
246
|
+
for curr_email in emails:
|
|
247
|
+
domain = curr_email[curr_email.rfind("@") + 1 :]
|
|
248
|
+
domains.add(domain)
|
|
249
|
+
|
|
250
|
+
self._extract_urls_domains(file_data, urls, domains)
|
|
251
|
+
|
|
252
|
+
if self._config[PROC_EMAIL_JSON_EXTRACT_IPS]:
|
|
253
|
+
self._get_ips(file_data, ips)
|
|
254
|
+
|
|
255
|
+
if self._config[PROC_EMAIL_JSON_EXTRACT_HASHES]:
|
|
256
|
+
hashs_in_mail = re.findall(HASH_REGEX, file_data)
|
|
257
|
+
if hashs_in_mail:
|
|
258
|
+
hashes |= set(hashs_in_mail)
|
|
259
|
+
|
|
260
|
+
return APP_SUCCESS
|
|
261
|
+
|
|
262
|
+
def _add_artifacts(
|
|
263
|
+
self,
|
|
264
|
+
cef_key: str,
|
|
265
|
+
input_set: set[str],
|
|
266
|
+
artifact_name: str,
|
|
267
|
+
start_index: int,
|
|
268
|
+
artifacts: list[dict[str, Any]],
|
|
269
|
+
) -> int:
|
|
270
|
+
added_artifacts = 0
|
|
271
|
+
for entry in input_set:
|
|
272
|
+
if not entry:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
artifact: dict[str, Any] = {}
|
|
276
|
+
artifact.update(_artifact_common)
|
|
277
|
+
artifact["source_data_identifier"] = str(start_index + added_artifacts)
|
|
278
|
+
artifact["cef"] = {cef_key: entry}
|
|
279
|
+
artifact["name"] = artifact_name
|
|
280
|
+
logger.debug(f"Artifact: {artifact}")
|
|
281
|
+
artifacts.append(artifact)
|
|
282
|
+
added_artifacts += 1
|
|
283
|
+
|
|
284
|
+
return added_artifacts
|
|
285
|
+
|
|
286
|
+
def _parse_email_headers_as_inline(
|
|
287
|
+
self,
|
|
288
|
+
file_data: str,
|
|
289
|
+
parsed_mail: dict[str, Any],
|
|
290
|
+
charset: str | None,
|
|
291
|
+
email_id: str,
|
|
292
|
+
) -> int:
|
|
293
|
+
email_text = re.sub(
|
|
294
|
+
r"(?im)^.*forwarded message.*\r?\n", "", file_data.strip(), count=1
|
|
295
|
+
)
|
|
296
|
+
mail = email.message_from_string(email_text)
|
|
297
|
+
self._parse_email_headers(parsed_mail, mail, charset, add_email_id=email_id)
|
|
298
|
+
return APP_SUCCESS
|
|
299
|
+
|
|
300
|
+
def _add_email_header_artifacts(
|
|
301
|
+
self,
|
|
302
|
+
email_header_artifacts: list[dict[str, Any]],
|
|
303
|
+
start_index: int,
|
|
304
|
+
artifacts: list[dict[str, Any]],
|
|
305
|
+
) -> int:
|
|
306
|
+
added_artifacts = 0
|
|
307
|
+
for artifact in email_header_artifacts:
|
|
308
|
+
artifact["source_data_identifier"] = str(start_index + added_artifacts)
|
|
309
|
+
artifacts.append(artifact)
|
|
310
|
+
added_artifacts += 1
|
|
311
|
+
return added_artifacts
|
|
312
|
+
|
|
313
|
+
def _create_artifacts(self, parsed_mail: dict[str, Any]) -> int:
|
|
314
|
+
ips = parsed_mail[PROC_EMAIL_JSON_IPS]
|
|
315
|
+
hashes = parsed_mail[PROC_EMAIL_JSON_HASHES]
|
|
316
|
+
urls = parsed_mail[PROC_EMAIL_JSON_URLS]
|
|
317
|
+
domains = parsed_mail[PROC_EMAIL_JSON_DOMAINS]
|
|
318
|
+
email_headers = parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS]
|
|
319
|
+
|
|
320
|
+
artifact_id = 0
|
|
321
|
+
|
|
322
|
+
added_artifacts = self._add_artifacts(
|
|
323
|
+
"sourceAddress", ips, "IP Artifact", artifact_id, self._artifacts
|
|
324
|
+
)
|
|
325
|
+
artifact_id += added_artifacts
|
|
326
|
+
|
|
327
|
+
added_artifacts = self._add_artifacts(
|
|
328
|
+
"fileHash", hashes, "Hash Artifact", artifact_id, self._artifacts
|
|
329
|
+
)
|
|
330
|
+
artifact_id += added_artifacts
|
|
331
|
+
|
|
332
|
+
added_artifacts = self._add_artifacts(
|
|
333
|
+
"requestURL", urls, "URL Artifact", artifact_id, self._artifacts
|
|
334
|
+
)
|
|
335
|
+
artifact_id += added_artifacts
|
|
336
|
+
|
|
337
|
+
added_artifacts = self._add_artifacts(
|
|
338
|
+
"destinationDnsDomain",
|
|
339
|
+
domains,
|
|
340
|
+
"Domain Artifact",
|
|
341
|
+
artifact_id,
|
|
342
|
+
self._artifacts,
|
|
343
|
+
)
|
|
344
|
+
artifact_id += added_artifacts
|
|
345
|
+
|
|
346
|
+
added_artifacts = self._add_email_header_artifacts(
|
|
347
|
+
email_headers, artifact_id, self._artifacts
|
|
348
|
+
)
|
|
349
|
+
artifact_id += added_artifacts
|
|
350
|
+
|
|
351
|
+
return APP_SUCCESS
|
|
352
|
+
|
|
353
|
+
def _get_container_name(self, parsed_mail: dict[str, Any], email_id: str) -> str:
|
|
354
|
+
def_cont_name = f"Email ID: {email_id}"
|
|
355
|
+
subject = parsed_mail.get(PROC_EMAIL_JSON_SUBJECT)
|
|
356
|
+
|
|
357
|
+
if not subject:
|
|
358
|
+
return def_cont_name
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
return str(make_header(decode_header(subject)))
|
|
362
|
+
except Exception:
|
|
363
|
+
return decode_uni_string(subject, def_cont_name)
|
|
364
|
+
|
|
365
|
+
def _handle_if_body(
|
|
366
|
+
self,
|
|
367
|
+
content_disp: str | None,
|
|
368
|
+
content_id: str | None,
|
|
369
|
+
content_type: str | None,
|
|
370
|
+
part: Message,
|
|
371
|
+
bodies: list[EmailBodyDict],
|
|
372
|
+
file_path: str,
|
|
373
|
+
) -> tuple[int, bool]:
|
|
374
|
+
process_as_body = False
|
|
375
|
+
|
|
376
|
+
if content_disp is None or (
|
|
377
|
+
content_disp.lower().strip() == "inline"
|
|
378
|
+
and content_type
|
|
379
|
+
and (("text/html" in content_type) or ("text/plain" in content_type))
|
|
380
|
+
):
|
|
381
|
+
process_as_body = True
|
|
382
|
+
|
|
383
|
+
if not process_as_body:
|
|
384
|
+
return APP_SUCCESS, True
|
|
385
|
+
|
|
386
|
+
part_payload = part.get_payload(decode=True)
|
|
387
|
+
|
|
388
|
+
if not part_payload:
|
|
389
|
+
return APP_SUCCESS, False
|
|
390
|
+
|
|
391
|
+
with open(file_path, "wb") as f:
|
|
392
|
+
f.write(part_payload) # type: ignore[arg-type]
|
|
393
|
+
|
|
394
|
+
bodies.append({"file_path": file_path, "charset": part.get_content_charset()})
|
|
395
|
+
|
|
396
|
+
return APP_SUCCESS, False
|
|
397
|
+
|
|
398
|
+
def _handle_attachment(self, part: Message, file_name: str, file_path: str) -> int:
|
|
399
|
+
if self._parsed_mail is None:
|
|
400
|
+
return APP_ERROR
|
|
401
|
+
|
|
402
|
+
files = self._parsed_mail[PROC_EMAIL_JSON_FILES]
|
|
403
|
+
|
|
404
|
+
if not self._config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]:
|
|
405
|
+
return APP_SUCCESS
|
|
406
|
+
|
|
407
|
+
part_base64_encoded = part.get_payload()
|
|
408
|
+
|
|
409
|
+
headers = self._get_email_headers_from_part(part)
|
|
410
|
+
|
|
411
|
+
attach_meta_info: dict[str, Any] = {}
|
|
412
|
+
|
|
413
|
+
if headers:
|
|
414
|
+
attach_meta_info = {"headers": dict(headers)}
|
|
415
|
+
|
|
416
|
+
for curr_attach in self._external_attachments:
|
|
417
|
+
if curr_attach.get("should_ignore", False):
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
attach_content = curr_attach["content"]
|
|
422
|
+
except Exception as e:
|
|
423
|
+
logger.debug(f"Failed to get attachment content: {e}")
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
if attach_content.strip().replace("\r\n", "") == str(
|
|
427
|
+
part_base64_encoded
|
|
428
|
+
).strip().replace("\r\n", ""):
|
|
429
|
+
attach_meta_info.update(dict(curr_attach))
|
|
430
|
+
del attach_meta_info["content"]
|
|
431
|
+
curr_attach["should_ignore"] = True
|
|
432
|
+
|
|
433
|
+
part_payload = part.get_payload(decode=True)
|
|
434
|
+
if not part_payload:
|
|
435
|
+
return APP_SUCCESS
|
|
436
|
+
|
|
437
|
+
try:
|
|
438
|
+
with open(file_path, "wb") as f:
|
|
439
|
+
f.write(part_payload) # type: ignore[arg-type]
|
|
440
|
+
except OSError as e:
|
|
441
|
+
try:
|
|
442
|
+
if "File name too long" in str(e):
|
|
443
|
+
new_file_name = "ph_long_file_name_temp"
|
|
444
|
+
file_path = "{}{}".format(
|
|
445
|
+
remove_child_info(file_path).rstrip(
|
|
446
|
+
file_name.replace("<", "").replace(">", "").replace(" ", "")
|
|
447
|
+
),
|
|
448
|
+
new_file_name,
|
|
449
|
+
)
|
|
450
|
+
logger.debug(f"Original filename: {file_name}")
|
|
451
|
+
logger.debug(f"Modified filename: {new_file_name}")
|
|
452
|
+
with open(file_path, "wb") as long_file:
|
|
453
|
+
long_file.write(part_payload) # type: ignore[arg-type]
|
|
454
|
+
else:
|
|
455
|
+
logger.debug(f"Error occurred while adding file to Vault: {e}")
|
|
456
|
+
return APP_ERROR
|
|
457
|
+
except Exception as e:
|
|
458
|
+
logger.debug(f"Error occurred while adding file to Vault: {e}")
|
|
459
|
+
return APP_ERROR
|
|
460
|
+
except Exception as e:
|
|
461
|
+
logger.debug(f"Error occurred while adding file to Vault: {e}")
|
|
462
|
+
return APP_ERROR
|
|
463
|
+
|
|
464
|
+
files.append(
|
|
465
|
+
{
|
|
466
|
+
"file_name": file_name,
|
|
467
|
+
"file_path": file_path,
|
|
468
|
+
"meta_info": attach_meta_info,
|
|
469
|
+
}
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
return APP_SUCCESS
|
|
473
|
+
|
|
474
|
+
def _handle_part(
|
|
475
|
+
self,
|
|
476
|
+
part: Message,
|
|
477
|
+
part_index: int,
|
|
478
|
+
tmp_dir: str,
|
|
479
|
+
extract_attach: bool,
|
|
480
|
+
parsed_mail: dict[str, Any],
|
|
481
|
+
) -> int:
|
|
482
|
+
bodies: list[EmailBodyDict] = parsed_mail[PROC_EMAIL_JSON_BODIES]
|
|
483
|
+
|
|
484
|
+
file_name = part.get_filename()
|
|
485
|
+
content_disp = part.get("Content-Disposition")
|
|
486
|
+
content_type = part.get("Content-Type")
|
|
487
|
+
content_id = part.get("Content-ID")
|
|
488
|
+
|
|
489
|
+
if file_name is None:
|
|
490
|
+
name = f"part_{part_index}"
|
|
491
|
+
extension = f".{part_index}"
|
|
492
|
+
|
|
493
|
+
if content_type is not None:
|
|
494
|
+
ext_guess = mimetypes.guess_extension(re.sub(";.*", "", content_type))
|
|
495
|
+
if ext_guess:
|
|
496
|
+
extension = ext_guess
|
|
497
|
+
|
|
498
|
+
if content_id is not None:
|
|
499
|
+
name = content_id
|
|
500
|
+
|
|
501
|
+
file_name = f"{name}{extension}"
|
|
502
|
+
else:
|
|
503
|
+
file_name = decode_uni_string(file_name, file_name)
|
|
504
|
+
|
|
505
|
+
file_path = "{}/{}_{}".format(
|
|
506
|
+
tmp_dir,
|
|
507
|
+
part_index,
|
|
508
|
+
file_name.translate(file_name.maketrans("", "", "".join(["<", ">", " "]))),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
logger.debug(f"file_path: {file_path}")
|
|
512
|
+
|
|
513
|
+
_status, process_further = self._handle_if_body(
|
|
514
|
+
content_disp, content_id, content_type, part, bodies, file_path
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
if not process_further:
|
|
518
|
+
return APP_SUCCESS
|
|
519
|
+
|
|
520
|
+
if (content_type is not None) and (
|
|
521
|
+
content_type.find(PROC_EMAIL_CONTENT_TYPE_MESSAGE) != -1
|
|
522
|
+
):
|
|
523
|
+
return APP_SUCCESS
|
|
524
|
+
|
|
525
|
+
self._handle_attachment(part, file_name, file_path)
|
|
526
|
+
|
|
527
|
+
return APP_SUCCESS
|
|
528
|
+
|
|
529
|
+
def _update_headers(self, headers: CaseInsensitiveDict) -> int:
|
|
530
|
+
if not self._external_headers:
|
|
531
|
+
return APP_SUCCESS
|
|
532
|
+
|
|
533
|
+
if not headers:
|
|
534
|
+
return APP_SUCCESS
|
|
535
|
+
|
|
536
|
+
headers_ci = CaseInsensitiveDict(headers)
|
|
537
|
+
|
|
538
|
+
for curr_header_lower in self._external_headers:
|
|
539
|
+
if (
|
|
540
|
+
headers_ci.get("message-id", "default_value1").strip()
|
|
541
|
+
== curr_header_lower.get("message-id", "default_value2").strip()
|
|
542
|
+
):
|
|
543
|
+
headers.update(curr_header_lower)
|
|
544
|
+
|
|
545
|
+
return APP_SUCCESS
|
|
546
|
+
|
|
547
|
+
def _get_email_headers_from_part(
|
|
548
|
+
self, part: Message, charset: str | None = None
|
|
549
|
+
) -> CaseInsensitiveDict:
|
|
550
|
+
email_headers = list(part.items())
|
|
551
|
+
|
|
552
|
+
if not email_headers:
|
|
553
|
+
return CaseInsensitiveDict()
|
|
554
|
+
|
|
555
|
+
if charset is None:
|
|
556
|
+
charset = part.get_content_charset() or "utf-8"
|
|
557
|
+
|
|
558
|
+
headers: CaseInsensitiveDict = CaseInsensitiveDict()
|
|
559
|
+
try:
|
|
560
|
+
for header_item in email_headers:
|
|
561
|
+
headers.update({header_item[0]: get_string(header_item[1], charset)})
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.debug(
|
|
564
|
+
f"Error converting header with charset {charset}: {e}. Using raw values."
|
|
565
|
+
)
|
|
566
|
+
for header_item in email_headers:
|
|
567
|
+
headers.update({header_item[0]: header_item[1]})
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
received_headers = [
|
|
571
|
+
get_string(x[1], charset)
|
|
572
|
+
for x in email_headers
|
|
573
|
+
if x[0].lower() == "received"
|
|
574
|
+
]
|
|
575
|
+
except Exception as e:
|
|
576
|
+
logger.debug(f"Error converting received headers: {e}")
|
|
577
|
+
received_headers = [
|
|
578
|
+
x[1] for x in email_headers if x[0].lower() == "received"
|
|
579
|
+
]
|
|
580
|
+
|
|
581
|
+
if received_headers:
|
|
582
|
+
headers["Received"] = received_headers
|
|
583
|
+
|
|
584
|
+
subject = headers.get("Subject")
|
|
585
|
+
if subject:
|
|
586
|
+
try:
|
|
587
|
+
headers["decodedSubject"] = str(make_header(decode_header(subject)))
|
|
588
|
+
except Exception:
|
|
589
|
+
headers["decodedSubject"] = decode_uni_string(subject, subject)
|
|
590
|
+
|
|
591
|
+
to_data = headers.get("To")
|
|
592
|
+
if to_data:
|
|
593
|
+
headers["decodedTo"] = decode_uni_string(to_data, to_data)
|
|
594
|
+
|
|
595
|
+
from_data = headers.get("From")
|
|
596
|
+
if from_data:
|
|
597
|
+
headers["decodedFrom"] = decode_uni_string(from_data, from_data)
|
|
598
|
+
|
|
599
|
+
cc_data = headers.get("CC")
|
|
600
|
+
if cc_data:
|
|
601
|
+
headers["decodedCC"] = decode_uni_string(cc_data, cc_data)
|
|
602
|
+
|
|
603
|
+
return headers
|
|
604
|
+
|
|
605
|
+
def _parse_email_headers(
|
|
606
|
+
self,
|
|
607
|
+
parsed_mail: dict[str, Any],
|
|
608
|
+
part: Message,
|
|
609
|
+
charset: str | None = None,
|
|
610
|
+
add_email_id: str | None = None,
|
|
611
|
+
) -> int:
|
|
612
|
+
email_header_artifacts = parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS]
|
|
613
|
+
|
|
614
|
+
headers = self._get_email_headers_from_part(part, charset)
|
|
615
|
+
|
|
616
|
+
if not headers:
|
|
617
|
+
return 0
|
|
618
|
+
|
|
619
|
+
cef_artifact: dict[str, Any] = {}
|
|
620
|
+
cef_types: dict[str, list[str]] = {}
|
|
621
|
+
|
|
622
|
+
if headers.get("From"):
|
|
623
|
+
cef_artifact.update({"fromEmail": headers["From"]})
|
|
624
|
+
|
|
625
|
+
if headers.get("To"):
|
|
626
|
+
cef_artifact.update({"toEmail": headers["To"]})
|
|
627
|
+
|
|
628
|
+
message_id = headers.get("message-id")
|
|
629
|
+
if (not cef_artifact) and (message_id is None):
|
|
630
|
+
return 0
|
|
631
|
+
|
|
632
|
+
cef_types.update({"fromEmail": ["email"], "toEmail": ["email"]})
|
|
633
|
+
|
|
634
|
+
self._update_headers(headers)
|
|
635
|
+
cef_artifact["emailHeaders"] = dict(headers)
|
|
636
|
+
|
|
637
|
+
body = None
|
|
638
|
+
|
|
639
|
+
for curr_key in list(cef_artifact["emailHeaders"].keys()):
|
|
640
|
+
if curr_key.lower().startswith("body"):
|
|
641
|
+
body = cef_artifact["emailHeaders"].pop(curr_key)
|
|
642
|
+
elif curr_key in ("parentInternetMessageId", "parentGuid", "emailGuid"):
|
|
643
|
+
curr_value = cef_artifact["emailHeaders"].pop(curr_key)
|
|
644
|
+
cef_artifact.update({curr_key: curr_value})
|
|
645
|
+
|
|
646
|
+
if self._config.get(PROC_EMAIL_JSON_EXTRACT_BODY, False) and not body:
|
|
647
|
+
queue: list[Message] = [part]
|
|
648
|
+
i = 1
|
|
649
|
+
while len(queue) > 0:
|
|
650
|
+
cur_part = queue.pop(0)
|
|
651
|
+
payload = cur_part.get_payload()
|
|
652
|
+
if isinstance(payload, list):
|
|
653
|
+
queue.extend(payload) # type: ignore[arg-type]
|
|
654
|
+
else:
|
|
655
|
+
encoding = cur_part["Content-Transfer-Encoding"]
|
|
656
|
+
if encoding:
|
|
657
|
+
if "base64" in encoding.lower():
|
|
658
|
+
payload = base64.b64decode(
|
|
659
|
+
"".join(str(payload).splitlines())
|
|
660
|
+
)
|
|
661
|
+
elif encoding != "8bit":
|
|
662
|
+
payload = cur_part.get_payload(decode=True)
|
|
663
|
+
payload = (
|
|
664
|
+
UnicodeDammit(payload)
|
|
665
|
+
.unicode_markup.encode("utf-8")
|
|
666
|
+
.decode("utf-8")
|
|
667
|
+
)
|
|
668
|
+
try:
|
|
669
|
+
json.dumps({"body": payload})
|
|
670
|
+
except (TypeError, UnicodeDecodeError):
|
|
671
|
+
try:
|
|
672
|
+
payload = payload.decode("UTF-8") # type: ignore[union-attr]
|
|
673
|
+
except (UnicodeDecodeError, AttributeError):
|
|
674
|
+
logger.debug(
|
|
675
|
+
"Email body caused unicode exception. Encoding as base64."
|
|
676
|
+
)
|
|
677
|
+
if isinstance(payload, bytes):
|
|
678
|
+
payload = base64.b64encode(payload).decode("UTF-8")
|
|
679
|
+
else:
|
|
680
|
+
payload = base64.b64encode(
|
|
681
|
+
str(payload).encode("UTF-8")
|
|
682
|
+
).decode("UTF-8")
|
|
683
|
+
cef_artifact["body_base64encoded"] = True
|
|
684
|
+
|
|
685
|
+
cef_artifact.update({f"bodyPart{i}": payload if payload else None})
|
|
686
|
+
cef_artifact.update(
|
|
687
|
+
{
|
|
688
|
+
f"bodyPart{i}ContentType": cur_part["Content-Type"]
|
|
689
|
+
if cur_part["Content-Type"]
|
|
690
|
+
else None
|
|
691
|
+
}
|
|
692
|
+
)
|
|
693
|
+
i += 1
|
|
694
|
+
|
|
695
|
+
if add_email_id:
|
|
696
|
+
cef_artifact["emailId"] = add_email_id
|
|
697
|
+
if self._email_id_contains:
|
|
698
|
+
cef_types.update({"emailId": self._email_id_contains})
|
|
699
|
+
|
|
700
|
+
artifact: dict[str, Any] = {}
|
|
701
|
+
artifact.update(_artifact_common)
|
|
702
|
+
artifact["name"] = "Email Artifact"
|
|
703
|
+
artifact["cef"] = cef_artifact
|
|
704
|
+
artifact["cef_types"] = cef_types
|
|
705
|
+
email_header_artifacts.append(artifact)
|
|
706
|
+
|
|
707
|
+
return len(email_header_artifacts)
|
|
708
|
+
|
|
709
|
+
def _handle_mail_object(
|
|
710
|
+
self,
|
|
711
|
+
mail: Message,
|
|
712
|
+
email_id: str,
|
|
713
|
+
rfc822_email: str,
|
|
714
|
+
tmp_dir: str,
|
|
715
|
+
start_time_epoch: float,
|
|
716
|
+
) -> int:
|
|
717
|
+
self._parsed_mail = {}
|
|
718
|
+
|
|
719
|
+
tmp_dir_path = Path(tmp_dir)
|
|
720
|
+
if not tmp_dir_path.exists():
|
|
721
|
+
tmp_dir_path.mkdir(parents=True)
|
|
722
|
+
|
|
723
|
+
extract_attach = self._config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]
|
|
724
|
+
|
|
725
|
+
self._parsed_mail[PROC_EMAIL_JSON_SUBJECT] = mail.get("Subject", "")
|
|
726
|
+
self._parsed_mail[PROC_EMAIL_JSON_FROM] = mail.get("From", "")
|
|
727
|
+
self._parsed_mail[PROC_EMAIL_JSON_TO] = mail.get("To", "")
|
|
728
|
+
self._parsed_mail[PROC_EMAIL_JSON_DATE] = mail.get("Date", "")
|
|
729
|
+
self._parsed_mail[PROC_EMAIL_JSON_MSG_ID] = mail.get("Message-ID", "")
|
|
730
|
+
self._parsed_mail[PROC_EMAIL_JSON_FILES] = files = [] # type: ignore[var-annotated]
|
|
731
|
+
bodies: list[EmailBodyDict] = []
|
|
732
|
+
self._parsed_mail[PROC_EMAIL_JSON_BODIES] = bodies
|
|
733
|
+
self._parsed_mail[PROC_EMAIL_JSON_START_TIME] = start_time_epoch
|
|
734
|
+
self._parsed_mail[PROC_EMAIL_JSON_EMAIL_HEADERS] = []
|
|
735
|
+
|
|
736
|
+
if mail.is_multipart():
|
|
737
|
+
for i, part in enumerate(mail.walk()):
|
|
738
|
+
add_email_id = None
|
|
739
|
+
if i == 0:
|
|
740
|
+
add_email_id = email_id
|
|
741
|
+
|
|
742
|
+
self._parse_email_headers(
|
|
743
|
+
self._parsed_mail, part, add_email_id=add_email_id
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
if part.is_multipart():
|
|
747
|
+
continue
|
|
748
|
+
try:
|
|
749
|
+
ret_val = self._handle_part(
|
|
750
|
+
part, i, tmp_dir, extract_attach, self._parsed_mail
|
|
751
|
+
)
|
|
752
|
+
except Exception as e:
|
|
753
|
+
logger.debug(f"ErrorExp in _handle_part # {i}: {e}")
|
|
754
|
+
continue
|
|
755
|
+
|
|
756
|
+
if ret_val == APP_ERROR:
|
|
757
|
+
continue
|
|
758
|
+
|
|
759
|
+
else:
|
|
760
|
+
self._parse_email_headers(self._parsed_mail, mail, add_email_id=email_id)
|
|
761
|
+
file_path = f"{tmp_dir}/part_1.text"
|
|
762
|
+
payload = mail.get_payload(decode=True)
|
|
763
|
+
if payload:
|
|
764
|
+
with open(file_path, "wb") as f:
|
|
765
|
+
f.write(payload) # type: ignore[arg-type]
|
|
766
|
+
bodies.append(
|
|
767
|
+
{"file_path": file_path, "charset": mail.get_content_charset()}
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
container_name = self._get_container_name(self._parsed_mail, email_id)
|
|
771
|
+
|
|
772
|
+
if container_name is None:
|
|
773
|
+
return APP_ERROR
|
|
774
|
+
|
|
775
|
+
container: dict[str, Any] = {}
|
|
776
|
+
container_data = dict(self._parsed_mail)
|
|
777
|
+
|
|
778
|
+
del container_data[PROC_EMAIL_JSON_EMAIL_HEADERS]
|
|
779
|
+
container.update(_container_common)
|
|
780
|
+
|
|
781
|
+
if not self.context.is_hex:
|
|
782
|
+
try:
|
|
783
|
+
folder_hex = hashlib.sha256(self.context.folder_name) # type: ignore[arg-type]
|
|
784
|
+
except Exception:
|
|
785
|
+
folder_hex = hashlib.sha256(self.context.folder_name.encode())
|
|
786
|
+
|
|
787
|
+
folder_sdi = folder_hex.hexdigest()
|
|
788
|
+
else:
|
|
789
|
+
folder_sdi = self.context.folder_name
|
|
790
|
+
|
|
791
|
+
self._container["source_data_identifier"] = f"{folder_sdi} : {email_id}"
|
|
792
|
+
self._container["name"] = container_name
|
|
793
|
+
self._container["data"] = {"raw_email": rfc822_email}
|
|
794
|
+
|
|
795
|
+
self._parsed_mail[PROC_EMAIL_JSON_IPS] = set()
|
|
796
|
+
self._parsed_mail[PROC_EMAIL_JSON_HASHES] = set()
|
|
797
|
+
self._parsed_mail[PROC_EMAIL_JSON_URLS] = set()
|
|
798
|
+
self._parsed_mail[PROC_EMAIL_JSON_DOMAINS] = set()
|
|
799
|
+
|
|
800
|
+
for i, body in enumerate(bodies):
|
|
801
|
+
if not body:
|
|
802
|
+
continue
|
|
803
|
+
|
|
804
|
+
try:
|
|
805
|
+
self._handle_body(body, self._parsed_mail, i, email_id)
|
|
806
|
+
except Exception as e:
|
|
807
|
+
logger.debug(f"ErrorExp in _handle_body # {i}: {e!s}")
|
|
808
|
+
continue
|
|
809
|
+
|
|
810
|
+
self._attachments.extend(files)
|
|
811
|
+
|
|
812
|
+
self._create_artifacts(self._parsed_mail)
|
|
813
|
+
|
|
814
|
+
return APP_SUCCESS
|
|
815
|
+
|
|
816
|
+
def _set_email_id_contains(self, email_id: str) -> None:
|
|
817
|
+
email_id_str = str(email_id)
|
|
818
|
+
|
|
819
|
+
if is_sha1(email_id_str):
|
|
820
|
+
self._email_id_contains = ["vault id"]
|
|
821
|
+
|
|
822
|
+
def _int_process_email(
|
|
823
|
+
self, rfc822_email: str, email_id: str, start_time_epoch: float
|
|
824
|
+
) -> tuple[int, str, list[dict[str, Any]]]:
|
|
825
|
+
mail = email.message_from_string(rfc822_email)
|
|
826
|
+
|
|
827
|
+
tmp_dir = tempfile.mkdtemp(prefix="ph_email_")
|
|
828
|
+
self._tmp_dirs.append(tmp_dir)
|
|
829
|
+
|
|
830
|
+
try:
|
|
831
|
+
ret_val = self._handle_mail_object(
|
|
832
|
+
mail, email_id, rfc822_email, tmp_dir, start_time_epoch
|
|
833
|
+
)
|
|
834
|
+
except Exception as e:
|
|
835
|
+
message = f"ErrorExp in self._handle_mail_object: {e}"
|
|
836
|
+
logger.debug(message)
|
|
837
|
+
return APP_ERROR, message, []
|
|
838
|
+
|
|
839
|
+
results = [
|
|
840
|
+
{
|
|
841
|
+
"container": self._container,
|
|
842
|
+
"artifacts": self._artifacts,
|
|
843
|
+
"files": self._attachments,
|
|
844
|
+
"temp_directory": tmp_dir,
|
|
845
|
+
}
|
|
846
|
+
]
|
|
847
|
+
|
|
848
|
+
return ret_val, "Email Parsed", results
|
|
849
|
+
|
|
850
|
+
def process_email(
|
|
851
|
+
self,
|
|
852
|
+
base_connector: object,
|
|
853
|
+
rfc822_email: str,
|
|
854
|
+
email_id: str,
|
|
855
|
+
config: dict[str, Any],
|
|
856
|
+
epoch: float,
|
|
857
|
+
container_id: int | None = None,
|
|
858
|
+
email_headers: list[dict[str, Any]] | None = None,
|
|
859
|
+
attachments_data: list[dict[str, Any]] | None = None,
|
|
860
|
+
) -> tuple[int, str]:
|
|
861
|
+
"""Process an email and extract artifacts."""
|
|
862
|
+
self._config = config
|
|
863
|
+
|
|
864
|
+
if email_headers:
|
|
865
|
+
for curr_header in email_headers:
|
|
866
|
+
self._external_headers.append(CaseInsensitiveDict(curr_header))
|
|
867
|
+
|
|
868
|
+
if (config[PROC_EMAIL_JSON_EXTRACT_ATTACHMENTS]) and (
|
|
869
|
+
attachments_data is not None
|
|
870
|
+
):
|
|
871
|
+
self._external_attachments = attachments_data
|
|
872
|
+
|
|
873
|
+
with contextlib.suppress(Exception):
|
|
874
|
+
self._set_email_id_contains(email_id)
|
|
875
|
+
|
|
876
|
+
ret_val, message, results = self._int_process_email(
|
|
877
|
+
rfc822_email, email_id, epoch
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
if not ret_val:
|
|
881
|
+
self._del_tmp_dirs()
|
|
882
|
+
return APP_ERROR, message
|
|
883
|
+
|
|
884
|
+
try:
|
|
885
|
+
self._parse_results(results, container_id)
|
|
886
|
+
except Exception:
|
|
887
|
+
self._del_tmp_dirs()
|
|
888
|
+
raise
|
|
889
|
+
|
|
890
|
+
return APP_SUCCESS, "Email Processed"
|
|
891
|
+
|
|
892
|
+
def _save_ingested(
|
|
893
|
+
self, container: dict[str, Any], using_dummy: bool
|
|
894
|
+
) -> tuple[int, str, int | None]:
|
|
895
|
+
if using_dummy:
|
|
896
|
+
cid = container["id"]
|
|
897
|
+
artifacts = container["artifacts"]
|
|
898
|
+
for artifact in artifacts:
|
|
899
|
+
artifact["container_id"] = cid
|
|
900
|
+
try:
|
|
901
|
+
_ids = self.context.soar.save_artifacts(artifacts) # type: ignore[attr-defined]
|
|
902
|
+
ret_val, message = APP_SUCCESS, "Success"
|
|
903
|
+
logger.debug(
|
|
904
|
+
f"save_artifacts returns, value: {ret_val}, reason: {message}"
|
|
905
|
+
)
|
|
906
|
+
except Exception as e:
|
|
907
|
+
ret_val, message = APP_ERROR, str(e)
|
|
908
|
+
logger.debug(f"save_artifacts failed: {e}")
|
|
909
|
+
return ret_val, message, None
|
|
910
|
+
|
|
911
|
+
return ret_val, message, cid
|
|
912
|
+
else:
|
|
913
|
+
try:
|
|
914
|
+
cid = self.context.soar.save_container(container) # type: ignore[attr-defined]
|
|
915
|
+
ret_val, message = APP_SUCCESS, "Success"
|
|
916
|
+
logger.debug(
|
|
917
|
+
f"save_container (with artifacts) returns, value: {ret_val}, reason: {message}, id: {cid}"
|
|
918
|
+
)
|
|
919
|
+
except Exception as e:
|
|
920
|
+
ret_val, message, cid = APP_ERROR, str(e), None
|
|
921
|
+
logger.debug(f"save_container failed: {e}")
|
|
922
|
+
|
|
923
|
+
return ret_val, message, cid
|
|
924
|
+
|
|
925
|
+
def _handle_save_ingested(
|
|
926
|
+
self,
|
|
927
|
+
artifacts: list[dict[str, Any]],
|
|
928
|
+
container: dict[str, Any] | None,
|
|
929
|
+
container_id: int | None,
|
|
930
|
+
files: list[dict[str, Any]],
|
|
931
|
+
) -> None:
|
|
932
|
+
using_dummy = False
|
|
933
|
+
|
|
934
|
+
if container_id:
|
|
935
|
+
using_dummy = True
|
|
936
|
+
container = {
|
|
937
|
+
"name": "Dummy Container",
|
|
938
|
+
"dummy": True,
|
|
939
|
+
"id": container_id,
|
|
940
|
+
"artifacts": artifacts,
|
|
941
|
+
}
|
|
942
|
+
elif container:
|
|
943
|
+
container["artifacts"] = artifacts
|
|
944
|
+
else:
|
|
945
|
+
return
|
|
946
|
+
|
|
947
|
+
for artifact in [
|
|
948
|
+
x
|
|
949
|
+
for x in container.get("artifacts", [])
|
|
950
|
+
if not x.get("source_data_identifier")
|
|
951
|
+
]:
|
|
952
|
+
self._set_sdi(artifact)
|
|
953
|
+
|
|
954
|
+
if files and container.get("artifacts"):
|
|
955
|
+
container["artifacts"][-1]["run_automation"] = False
|
|
956
|
+
|
|
957
|
+
ret_val, message, container_id = self._save_ingested(container, using_dummy)
|
|
958
|
+
|
|
959
|
+
if ret_val == APP_ERROR:
|
|
960
|
+
message = f"Failed to save ingested artifacts, error msg: {message}"
|
|
961
|
+
logger.debug(message)
|
|
962
|
+
return
|
|
963
|
+
|
|
964
|
+
if not container_id:
|
|
965
|
+
message = "save_container did not return a container_id"
|
|
966
|
+
logger.debug(message)
|
|
967
|
+
return
|
|
968
|
+
|
|
969
|
+
vault_ids: list[str] = []
|
|
970
|
+
vault_artifacts_added = 0
|
|
971
|
+
|
|
972
|
+
last_file = len(files) - 1
|
|
973
|
+
for i, curr_file in enumerate(files):
|
|
974
|
+
run_automation = i == last_file
|
|
975
|
+
ret_val, added_to_vault = self._handle_file(
|
|
976
|
+
curr_file,
|
|
977
|
+
vault_ids,
|
|
978
|
+
container_id,
|
|
979
|
+
vault_artifacts_added,
|
|
980
|
+
run_automation,
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
if added_to_vault:
|
|
984
|
+
vault_artifacts_added += 1
|
|
985
|
+
|
|
986
|
+
def _parse_results(
|
|
987
|
+
self, results: list[dict[str, Any]], container_id: int | None = None
|
|
988
|
+
) -> int:
|
|
989
|
+
container_count = DEFAULT_CONTAINER_COUNT
|
|
990
|
+
results = results[:container_count]
|
|
991
|
+
|
|
992
|
+
for result in results:
|
|
993
|
+
if container_id is None:
|
|
994
|
+
container = result.get("container")
|
|
995
|
+
|
|
996
|
+
if not container:
|
|
997
|
+
continue
|
|
998
|
+
|
|
999
|
+
container.update(_container_common)
|
|
1000
|
+
|
|
1001
|
+
else:
|
|
1002
|
+
container = None
|
|
1003
|
+
|
|
1004
|
+
artifacts = result.get("artifacts", [])
|
|
1005
|
+
for _j, artifact in enumerate(artifacts):
|
|
1006
|
+
if not artifact:
|
|
1007
|
+
continue
|
|
1008
|
+
|
|
1009
|
+
self._set_sdi(artifact)
|
|
1010
|
+
|
|
1011
|
+
if not artifacts:
|
|
1012
|
+
continue
|
|
1013
|
+
|
|
1014
|
+
len_artifacts = len(artifacts)
|
|
1015
|
+
|
|
1016
|
+
for j, artifact in enumerate(artifacts):
|
|
1017
|
+
if not artifact:
|
|
1018
|
+
continue
|
|
1019
|
+
|
|
1020
|
+
if (j + 1) == len_artifacts:
|
|
1021
|
+
artifact["run_automation"] = True
|
|
1022
|
+
|
|
1023
|
+
cef_artifact = artifact.get("cef")
|
|
1024
|
+
if "parentGuid" in cef_artifact:
|
|
1025
|
+
parent_guid = cef_artifact.pop("parentGuid")
|
|
1026
|
+
if parent_guid in self._guid_to_hash:
|
|
1027
|
+
cef_artifact["parentSourceDataIdentifier"] = self._guid_to_hash[
|
|
1028
|
+
parent_guid
|
|
1029
|
+
]
|
|
1030
|
+
if "emailGuid" in cef_artifact:
|
|
1031
|
+
del cef_artifact["emailGuid"]
|
|
1032
|
+
|
|
1033
|
+
self._handle_save_ingested(
|
|
1034
|
+
artifacts, container, container_id, result.get("files", [])
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
for result in results:
|
|
1038
|
+
if result.get("temp_directory"):
|
|
1039
|
+
shutil.rmtree(result["temp_directory"], ignore_errors=True)
|
|
1040
|
+
|
|
1041
|
+
return APP_SUCCESS
|
|
1042
|
+
|
|
1043
|
+
def _add_vault_hashes_to_dictionary(
|
|
1044
|
+
self, cef_artifact: dict[str, Any], vault_id: str
|
|
1045
|
+
) -> tuple[int, str]:
|
|
1046
|
+
try:
|
|
1047
|
+
vault_info_data = self.context.vault.get_attachment(vault_id=vault_id)
|
|
1048
|
+
except Exception:
|
|
1049
|
+
return APP_ERROR, "Could not retrieve vault file"
|
|
1050
|
+
|
|
1051
|
+
if not vault_info_data:
|
|
1052
|
+
return APP_ERROR, "Vault ID not found"
|
|
1053
|
+
|
|
1054
|
+
try:
|
|
1055
|
+
metadata = vault_info_data[0].get("metadata")
|
|
1056
|
+
except Exception:
|
|
1057
|
+
return APP_ERROR, "Failed to get vault item metadata"
|
|
1058
|
+
|
|
1059
|
+
if metadata:
|
|
1060
|
+
with contextlib.suppress(Exception):
|
|
1061
|
+
cef_artifact["fileHashSha256"] = metadata["sha256"]
|
|
1062
|
+
|
|
1063
|
+
with contextlib.suppress(Exception):
|
|
1064
|
+
cef_artifact["fileHashMd5"] = metadata["md5"]
|
|
1065
|
+
|
|
1066
|
+
with contextlib.suppress(Exception):
|
|
1067
|
+
cef_artifact["fileHashSha1"] = metadata["sha1"]
|
|
1068
|
+
|
|
1069
|
+
return APP_SUCCESS, "Mapped hash values"
|
|
1070
|
+
|
|
1071
|
+
def _handle_file(
|
|
1072
|
+
self,
|
|
1073
|
+
curr_file: dict[str, Any],
|
|
1074
|
+
vault_ids: list[str],
|
|
1075
|
+
container_id: int,
|
|
1076
|
+
artifact_id: int,
|
|
1077
|
+
run_automation: bool = False,
|
|
1078
|
+
) -> tuple[int, int]:
|
|
1079
|
+
file_name = curr_file.get("file_name")
|
|
1080
|
+
|
|
1081
|
+
local_file_path = curr_file["file_path"]
|
|
1082
|
+
|
|
1083
|
+
contains = get_file_contains(local_file_path)
|
|
1084
|
+
|
|
1085
|
+
vault_attach_dict: dict[str, Any] = {}
|
|
1086
|
+
|
|
1087
|
+
if not file_name:
|
|
1088
|
+
file_name = Path(local_file_path).name
|
|
1089
|
+
|
|
1090
|
+
logger.debug(f"Vault file name: {file_name}")
|
|
1091
|
+
|
|
1092
|
+
vault_attach_dict[phantom.APP_JSON_ACTION_NAME] = self.context.action_name # type: ignore[attr-defined]
|
|
1093
|
+
vault_attach_dict[phantom.APP_JSON_APP_RUN_ID] = self.context.app_run_id # type: ignore[attr-defined]
|
|
1094
|
+
|
|
1095
|
+
file_name = decode_uni_string(file_name, file_name)
|
|
1096
|
+
|
|
1097
|
+
try:
|
|
1098
|
+
vault_id = self.context.vault.add_attachment(
|
|
1099
|
+
container_id=container_id,
|
|
1100
|
+
file_location=local_file_path,
|
|
1101
|
+
file_name=file_name,
|
|
1102
|
+
metadata=vault_attach_dict,
|
|
1103
|
+
)
|
|
1104
|
+
except Exception as e:
|
|
1105
|
+
logger.debug(f"Error adding file to vault: {e}")
|
|
1106
|
+
return APP_ERROR, APP_ERROR
|
|
1107
|
+
|
|
1108
|
+
cef_artifact = curr_file.get("meta_info", {})
|
|
1109
|
+
cef_artifact.update({"fileName": file_name})
|
|
1110
|
+
|
|
1111
|
+
if vault_id:
|
|
1112
|
+
cef_artifact.update(
|
|
1113
|
+
{"vaultId": vault_id, "cs6": vault_id, "cs6Label": "Vault ID"}
|
|
1114
|
+
)
|
|
1115
|
+
self._add_vault_hashes_to_dictionary(cef_artifact, vault_id)
|
|
1116
|
+
|
|
1117
|
+
artifact: dict[str, Any] = {}
|
|
1118
|
+
artifact.update(_artifact_common)
|
|
1119
|
+
artifact["container_id"] = container_id
|
|
1120
|
+
artifact["name"] = "Vault Artifact"
|
|
1121
|
+
artifact["cef"] = cef_artifact
|
|
1122
|
+
artifact["run_automation"] = run_automation
|
|
1123
|
+
if contains:
|
|
1124
|
+
artifact["cef_types"] = {"vaultId": contains, "cs6": contains}
|
|
1125
|
+
self._set_sdi(artifact)
|
|
1126
|
+
|
|
1127
|
+
if "parentGuid" in cef_artifact:
|
|
1128
|
+
parent_guid = cef_artifact.pop("parentGuid")
|
|
1129
|
+
cef_artifact["parentSourceDataIdentifier"] = self._guid_to_hash[parent_guid]
|
|
1130
|
+
|
|
1131
|
+
try:
|
|
1132
|
+
artifact_id_result = self.context.soar.save_artifact(artifact) # type: ignore[attr-defined]
|
|
1133
|
+
ret_val, status_string = APP_SUCCESS, "Success"
|
|
1134
|
+
logger.debug(
|
|
1135
|
+
f"save_artifact returns, value: {ret_val}, reason: {status_string}, id: {artifact_id_result}"
|
|
1136
|
+
)
|
|
1137
|
+
except Exception as e:
|
|
1138
|
+
ret_val, status_string = APP_ERROR, str(e)
|
|
1139
|
+
logger.debug(f"save_artifact failed: {e}")
|
|
1140
|
+
|
|
1141
|
+
return APP_SUCCESS, ret_val
|
|
1142
|
+
|
|
1143
|
+
def _set_sdi(self, input_dict: dict[str, Any]) -> int:
|
|
1144
|
+
input_dict.pop("source_data_identifier", None)
|
|
1145
|
+
|
|
1146
|
+
input_dict_hash = input_dict
|
|
1147
|
+
|
|
1148
|
+
cef = input_dict.get("cef")
|
|
1149
|
+
|
|
1150
|
+
curr_email_guid = None
|
|
1151
|
+
|
|
1152
|
+
if cef is not None and (("parentGuid" in cef) or ("emailGuid" in cef)):
|
|
1153
|
+
input_dict_hash = deepcopy(input_dict)
|
|
1154
|
+
cef = input_dict_hash["cef"]
|
|
1155
|
+
if "parentGuid" in cef:
|
|
1156
|
+
del cef["parentGuid"]
|
|
1157
|
+
curr_email_guid = cef.get("emailGuid")
|
|
1158
|
+
if curr_email_guid is not None:
|
|
1159
|
+
del cef["emailGuid"]
|
|
1160
|
+
|
|
1161
|
+
input_dict["source_data_identifier"] = create_dict_hash(input_dict_hash)
|
|
1162
|
+
|
|
1163
|
+
if curr_email_guid:
|
|
1164
|
+
self._guid_to_hash[curr_email_guid] = input_dict["source_data_identifier"]
|
|
1165
|
+
|
|
1166
|
+
return APP_SUCCESS
|
|
1167
|
+
|
|
1168
|
+
def _del_tmp_dirs(self) -> None:
|
|
1169
|
+
"""Remove any tmp_dirs that were created."""
|
|
1170
|
+
for tmp_dir in self._tmp_dirs:
|
|
1171
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|