splunk-soar-sdk 3.4.0__py3-none-any.whl → 3.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- soar_sdk/abstract.py +7 -6
- soar_sdk/action_results.py +7 -7
- soar_sdk/actions_manager.py +7 -13
- soar_sdk/apis/artifact.py +3 -3
- soar_sdk/apis/container.py +2 -2
- soar_sdk/apis/es/findings.py +27 -0
- soar_sdk/apis/utils.py +3 -2
- soar_sdk/apis/vault.py +1 -0
- soar_sdk/app.py +24 -27
- soar_sdk/app_cli_runner.py +7 -6
- soar_sdk/app_client.py +3 -4
- soar_sdk/asset.py +7 -9
- soar_sdk/asset_state.py +1 -2
- soar_sdk/async_utils.py +1 -2
- soar_sdk/cli/cli.py +2 -2
- soar_sdk/cli/init/cli.py +5 -5
- soar_sdk/cli/manifests/deserializers.py +4 -3
- soar_sdk/cli/manifests/processors.py +4 -2
- soar_sdk/cli/manifests/serializers.py +4 -4
- soar_sdk/cli/package/cli.py +14 -14
- soar_sdk/cli/package/utils.py +3 -2
- soar_sdk/cli/path_utils.py +1 -1
- soar_sdk/code_renderers/action_renderer.py +5 -4
- soar_sdk/code_renderers/app_renderer.py +1 -1
- soar_sdk/code_renderers/asset_renderer.py +1 -1
- soar_sdk/code_renderers/renderer.py +2 -2
- soar_sdk/compat.py +2 -1
- soar_sdk/decorators/__init__.py +3 -3
- soar_sdk/decorators/action.py +7 -11
- soar_sdk/decorators/make_request.py +9 -11
- soar_sdk/decorators/on_es_poll.py +105 -136
- soar_sdk/decorators/on_poll.py +7 -11
- soar_sdk/decorators/test_connectivity.py +5 -6
- soar_sdk/decorators/view_handler.py +6 -7
- soar_sdk/decorators/webhook.py +3 -5
- soar_sdk/es_client.py +43 -0
- soar_sdk/extras/__init__.py +0 -0
- soar_sdk/extras/email/__init__.py +9 -0
- soar_sdk/extras/email/processor.py +1171 -0
- soar_sdk/extras/email/rfc5322.py +335 -0
- soar_sdk/extras/email/utils.py +178 -0
- soar_sdk/input_spec.py +4 -3
- soar_sdk/logging.py +5 -4
- soar_sdk/meta/actions.py +3 -3
- soar_sdk/meta/app.py +1 -0
- soar_sdk/meta/dependencies.py +47 -11
- soar_sdk/meta/webhooks.py +2 -1
- soar_sdk/models/__init__.py +1 -1
- soar_sdk/models/artifact.py +1 -0
- soar_sdk/models/attachment_input.py +1 -1
- soar_sdk/models/container.py +2 -1
- soar_sdk/models/finding.py +4 -6
- soar_sdk/models/vault_attachment.py +1 -0
- soar_sdk/models/view.py +2 -0
- soar_sdk/params.py +13 -7
- soar_sdk/shims/phantom/action_result.py +1 -1
- soar_sdk/shims/phantom/app.py +1 -1
- soar_sdk/shims/phantom/base_connector.py +3 -4
- soar_sdk/shims/phantom/connector_result.py +0 -1
- soar_sdk/shims/phantom/install_info.py +1 -1
- soar_sdk/shims/phantom/ph_ipc.py +2 -1
- soar_sdk/shims/phantom/vault.py +8 -6
- soar_sdk/shims/phantom_common/app_interface/app_interface.py +1 -0
- soar_sdk/types.py +1 -1
- soar_sdk/views/component_registry.py +0 -1
- soar_sdk/views/template_filters.py +4 -4
- soar_sdk/views/template_renderer.py +3 -2
- soar_sdk/views/view_parser.py +8 -6
- soar_sdk/webhooks/models.py +3 -3
- soar_sdk/webhooks/routing.py +3 -4
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/METADATA +5 -1
- splunk_soar_sdk-3.6.0.dist-info/RECORD +117 -0
- splunk_soar_sdk-3.4.0.dist-info/RECORD +0 -110
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/WHEEL +0 -0
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/entry_points.txt +0 -0
- {splunk_soar_sdk-3.4.0.dist-info → splunk_soar_sdk-3.6.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import email
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from email.header import decode_header, make_header
|
|
5
|
+
from email.message import Message
|
|
6
|
+
from html import unescape
|
|
7
|
+
from typing import Any
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from bs4 import BeautifulSoup, UnicodeDammit # type: ignore[attr-defined]
|
|
11
|
+
|
|
12
|
+
from soar_sdk.extras.email.utils import clean_url, decode_uni_string, is_ip
|
|
13
|
+
from soar_sdk.logging import getLogger
|
|
14
|
+
|
|
15
|
+
logger = getLogger()
|
|
16
|
+
|
|
17
|
+
URI_REGEX = r"[Hh][Tt][Tt][Pp][Ss]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
|
18
|
+
EMAIL_REGEX = r"\b[A-Z0-9._%+-]+@+[A-Z0-9.-]+\.[A-Z]{2,}\b"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class EmailHeaders:
|
|
23
|
+
"""Extracted email headers from an RFC 5322 message."""
|
|
24
|
+
|
|
25
|
+
email_id: str | None = None
|
|
26
|
+
message_id: str | None = None
|
|
27
|
+
to: str | None = None
|
|
28
|
+
from_address: str | None = None
|
|
29
|
+
subject: str | None = None
|
|
30
|
+
date: str | None = None
|
|
31
|
+
received: list[str] = field(default_factory=list)
|
|
32
|
+
cc: str | None = None
|
|
33
|
+
bcc: str | None = None
|
|
34
|
+
x_mailer: str | None = None
|
|
35
|
+
x_priority: str | None = None
|
|
36
|
+
reply_to: str | None = None
|
|
37
|
+
content_type: str | None = None
|
|
38
|
+
raw_headers: dict[str, Any] = field(default_factory=dict)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class EmailBody:
|
|
43
|
+
"""Extracted email body content."""
|
|
44
|
+
|
|
45
|
+
plain_text: str | None = None
|
|
46
|
+
html: str | None = None
|
|
47
|
+
charset: str | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class EmailAttachment:
|
|
52
|
+
"""Extracted email attachment metadata."""
|
|
53
|
+
|
|
54
|
+
filename: str
|
|
55
|
+
content_type: str | None = None
|
|
56
|
+
size: int = 0
|
|
57
|
+
content_id: str | None = None
|
|
58
|
+
content: bytes | None = None
|
|
59
|
+
is_inline: bool = False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class RFC5322EmailData:
|
|
64
|
+
"""Complete extracted data from an RFC 5322 email message."""
|
|
65
|
+
|
|
66
|
+
raw_email: str
|
|
67
|
+
headers: EmailHeaders
|
|
68
|
+
body: EmailBody
|
|
69
|
+
urls: list[str] = field(default_factory=list)
|
|
70
|
+
attachments: list[EmailAttachment] = field(default_factory=list)
|
|
71
|
+
|
|
72
|
+
def to_dict(self) -> dict[str, Any]:
|
|
73
|
+
"""Convert to dictionary representation."""
|
|
74
|
+
return {
|
|
75
|
+
"raw_email": self.raw_email,
|
|
76
|
+
"headers": {
|
|
77
|
+
"email_id": self.headers.email_id,
|
|
78
|
+
"message_id": self.headers.message_id,
|
|
79
|
+
"to": self.headers.to,
|
|
80
|
+
"from": self.headers.from_address,
|
|
81
|
+
"subject": self.headers.subject,
|
|
82
|
+
"date": self.headers.date,
|
|
83
|
+
"received": self.headers.received,
|
|
84
|
+
"cc": self.headers.cc,
|
|
85
|
+
"bcc": self.headers.bcc,
|
|
86
|
+
"x_mailer": self.headers.x_mailer,
|
|
87
|
+
"x_priority": self.headers.x_priority,
|
|
88
|
+
"reply_to": self.headers.reply_to,
|
|
89
|
+
"content_type": self.headers.content_type,
|
|
90
|
+
"raw_headers": self.headers.raw_headers,
|
|
91
|
+
},
|
|
92
|
+
"body": {
|
|
93
|
+
"plain_text": self.body.plain_text,
|
|
94
|
+
"html": self.body.html,
|
|
95
|
+
"charset": self.body.charset,
|
|
96
|
+
},
|
|
97
|
+
"urls": self.urls,
|
|
98
|
+
"attachments": [
|
|
99
|
+
{
|
|
100
|
+
"filename": att.filename,
|
|
101
|
+
"content_type": att.content_type,
|
|
102
|
+
"size": att.size,
|
|
103
|
+
"content_id": att.content_id,
|
|
104
|
+
"is_inline": att.is_inline,
|
|
105
|
+
}
|
|
106
|
+
for att in self.attachments
|
|
107
|
+
],
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _decode_header_value(value: str | None) -> str | None:
|
|
112
|
+
if not value:
|
|
113
|
+
return None
|
|
114
|
+
try:
|
|
115
|
+
return str(make_header(decode_header(value)))
|
|
116
|
+
except Exception:
|
|
117
|
+
return decode_uni_string(value, value)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _get_charset(part: Message) -> str:
|
|
121
|
+
charset = part.get_content_charset()
|
|
122
|
+
return charset if charset else "utf-8"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _decode_payload(payload: bytes, charset: str) -> str:
|
|
126
|
+
try:
|
|
127
|
+
return UnicodeDammit(payload).unicode_markup.encode("utf-8").decode("utf-8")
|
|
128
|
+
except Exception:
|
|
129
|
+
try:
|
|
130
|
+
return payload.decode(charset)
|
|
131
|
+
except Exception:
|
|
132
|
+
return payload.decode("utf-8", errors="replace")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _extract_urls_from_content(content: str, urls: set[str], is_html: bool) -> None:
|
|
136
|
+
if is_html:
|
|
137
|
+
try:
|
|
138
|
+
soup = BeautifulSoup(content, "html.parser")
|
|
139
|
+
for link in soup.find_all(href=True):
|
|
140
|
+
href = link["href"]
|
|
141
|
+
if href and not href.startswith("mailto:"):
|
|
142
|
+
cleaned = clean_url(href)
|
|
143
|
+
if cleaned.startswith("http"):
|
|
144
|
+
urls.add(cleaned)
|
|
145
|
+
for src in soup.find_all(src=True):
|
|
146
|
+
src_val = src["src"]
|
|
147
|
+
if src_val:
|
|
148
|
+
cleaned = clean_url(src_val)
|
|
149
|
+
if cleaned.startswith("http"):
|
|
150
|
+
urls.add(cleaned)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
logger.debug(f"Error parsing HTML for URLs: {e}")
|
|
153
|
+
|
|
154
|
+
content = unescape(content)
|
|
155
|
+
uri_matches = re.findall(URI_REGEX, content)
|
|
156
|
+
for uri in uri_matches:
|
|
157
|
+
urls.add(clean_url(uri))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def extract_email_headers(mail: Message, email_id: str | None = None) -> EmailHeaders:
|
|
161
|
+
"""Extract headers from a parsed email Message."""
|
|
162
|
+
headers = EmailHeaders()
|
|
163
|
+
headers.email_id = email_id
|
|
164
|
+
headers.message_id = mail.get("Message-ID")
|
|
165
|
+
headers.to = _decode_header_value(mail.get("To"))
|
|
166
|
+
headers.from_address = _decode_header_value(mail.get("From"))
|
|
167
|
+
headers.subject = _decode_header_value(mail.get("Subject"))
|
|
168
|
+
headers.date = mail.get("Date")
|
|
169
|
+
headers.cc = _decode_header_value(mail.get("CC"))
|
|
170
|
+
headers.bcc = _decode_header_value(mail.get("BCC"))
|
|
171
|
+
headers.x_mailer = mail.get("X-Mailer")
|
|
172
|
+
headers.x_priority = mail.get("X-Priority")
|
|
173
|
+
headers.reply_to = _decode_header_value(mail.get("Reply-To"))
|
|
174
|
+
headers.content_type = mail.get("Content-Type")
|
|
175
|
+
|
|
176
|
+
received_headers = mail.get_all("Received") or []
|
|
177
|
+
headers.received = [str(r) for r in received_headers]
|
|
178
|
+
|
|
179
|
+
for key, value in mail.items():
|
|
180
|
+
if key.lower() == "received":
|
|
181
|
+
continue
|
|
182
|
+
headers.raw_headers[key] = _decode_header_value(str(value)) if value else None
|
|
183
|
+
|
|
184
|
+
return headers
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def extract_email_body(mail: Message) -> EmailBody:
|
|
188
|
+
"""Extract plain text and HTML body from a parsed email Message."""
|
|
189
|
+
body = EmailBody()
|
|
190
|
+
charset = _get_charset(mail)
|
|
191
|
+
body.charset = charset
|
|
192
|
+
|
|
193
|
+
if not mail.is_multipart():
|
|
194
|
+
payload = mail.get_payload(decode=True)
|
|
195
|
+
if payload and isinstance(payload, bytes):
|
|
196
|
+
content_type = mail.get_content_type()
|
|
197
|
+
decoded = _decode_payload(payload, charset)
|
|
198
|
+
if content_type == "text/html":
|
|
199
|
+
body.html = decoded
|
|
200
|
+
else:
|
|
201
|
+
body.plain_text = decoded
|
|
202
|
+
return body
|
|
203
|
+
|
|
204
|
+
for part in mail.walk():
|
|
205
|
+
if part.is_multipart():
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
content_type = part.get_content_type()
|
|
209
|
+
content_disp = str(part.get("Content-Disposition") or "")
|
|
210
|
+
|
|
211
|
+
if "attachment" in content_disp.lower():
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
payload = part.get_payload(decode=True)
|
|
215
|
+
if not payload or not isinstance(payload, bytes):
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
part_charset = _get_charset(part)
|
|
219
|
+
decoded = _decode_payload(payload, part_charset)
|
|
220
|
+
|
|
221
|
+
if content_type == "text/plain" and not body.plain_text:
|
|
222
|
+
body.plain_text = decoded
|
|
223
|
+
elif content_type == "text/html" and not body.html:
|
|
224
|
+
body.html = decoded
|
|
225
|
+
|
|
226
|
+
return body
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def extract_email_urls(mail: Message) -> list[str]:
|
|
230
|
+
"""Extract all URLs from email body content."""
|
|
231
|
+
urls: set[str] = set()
|
|
232
|
+
body = extract_email_body(mail)
|
|
233
|
+
|
|
234
|
+
if body.html:
|
|
235
|
+
_extract_urls_from_content(body.html, urls, is_html=True)
|
|
236
|
+
if body.plain_text:
|
|
237
|
+
_extract_urls_from_content(body.plain_text, urls, is_html=False)
|
|
238
|
+
|
|
239
|
+
return sorted(urls)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def extract_email_attachments(
|
|
243
|
+
mail: Message, include_content: bool = False
|
|
244
|
+
) -> list[EmailAttachment]:
|
|
245
|
+
"""Extract attachment metadata from a parsed email Message."""
|
|
246
|
+
attachments: list[EmailAttachment] = []
|
|
247
|
+
|
|
248
|
+
if not mail.is_multipart():
|
|
249
|
+
return attachments
|
|
250
|
+
|
|
251
|
+
for part in mail.walk():
|
|
252
|
+
if part.is_multipart():
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
content_disp = str(part.get("Content-Disposition") or "")
|
|
256
|
+
content_type = part.get_content_type()
|
|
257
|
+
content_id = part.get("Content-ID")
|
|
258
|
+
|
|
259
|
+
filename = part.get_filename()
|
|
260
|
+
if not filename:
|
|
261
|
+
if "attachment" not in content_disp.lower():
|
|
262
|
+
continue
|
|
263
|
+
filename = "unnamed_attachment"
|
|
264
|
+
|
|
265
|
+
filename = _decode_header_value(filename) or filename
|
|
266
|
+
is_inline = "inline" in content_disp.lower()
|
|
267
|
+
raw_payload = part.get_payload(decode=True)
|
|
268
|
+
payload = raw_payload if isinstance(raw_payload, bytes) else None
|
|
269
|
+
|
|
270
|
+
attachment = EmailAttachment(
|
|
271
|
+
filename=filename,
|
|
272
|
+
content_type=content_type,
|
|
273
|
+
size=len(payload) if payload else 0,
|
|
274
|
+
content_id=content_id.strip("<>") if content_id else None,
|
|
275
|
+
is_inline=is_inline,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if include_content and payload:
|
|
279
|
+
attachment.content = payload
|
|
280
|
+
|
|
281
|
+
attachments.append(attachment)
|
|
282
|
+
|
|
283
|
+
return attachments
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def extract_rfc5322_email_data(
|
|
287
|
+
rfc822_email: str,
|
|
288
|
+
email_id: str | None = None,
|
|
289
|
+
include_attachment_content: bool = False,
|
|
290
|
+
) -> RFC5322EmailData:
|
|
291
|
+
"""Extract all components from an RFC 5322 email string."""
|
|
292
|
+
mail = email.message_from_string(rfc822_email)
|
|
293
|
+
|
|
294
|
+
return RFC5322EmailData(
|
|
295
|
+
raw_email=rfc822_email,
|
|
296
|
+
headers=extract_email_headers(mail, email_id),
|
|
297
|
+
body=extract_email_body(mail),
|
|
298
|
+
urls=extract_email_urls(mail),
|
|
299
|
+
attachments=extract_email_attachments(mail, include_attachment_content),
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def extract_domains_from_urls(urls: list[str]) -> list[str]:
|
|
304
|
+
"""Extract unique domains from a list of URLs."""
|
|
305
|
+
domains: set[str] = set()
|
|
306
|
+
|
|
307
|
+
for url in urls:
|
|
308
|
+
try:
|
|
309
|
+
parsed = urlparse(url)
|
|
310
|
+
if parsed.netloc and not is_ip(parsed.netloc):
|
|
311
|
+
domain = parsed.netloc.split(":")[0]
|
|
312
|
+
domains.add(domain)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
logger.debug(f"Failed to parse URL for domain extraction: {e}")
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
return sorted(domains)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def extract_email_addresses_from_body(mail: Message) -> list[str]:
|
|
321
|
+
"""Extract email addresses found in the email body."""
|
|
322
|
+
addresses: set[str] = set()
|
|
323
|
+
body = extract_email_body(mail)
|
|
324
|
+
|
|
325
|
+
content = ""
|
|
326
|
+
if body.plain_text:
|
|
327
|
+
content += body.plain_text
|
|
328
|
+
if body.html:
|
|
329
|
+
content += body.html
|
|
330
|
+
|
|
331
|
+
if content:
|
|
332
|
+
matches = re.findall(EMAIL_REGEX, content, re.IGNORECASE)
|
|
333
|
+
addresses.update(m.lower() for m in matches)
|
|
334
|
+
|
|
335
|
+
return sorted(addresses)
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import ipaddress
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from email.header import decode_header, make_header
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from bs4 import UnicodeDammit # type: ignore[attr-defined]
|
|
10
|
+
|
|
11
|
+
from soar_sdk.logging import getLogger
|
|
12
|
+
|
|
13
|
+
logger = getLogger()
|
|
14
|
+
|
|
15
|
+
FILE_EXTENSIONS = {
|
|
16
|
+
".vmsn": ["os memory dump", "vm snapshot file"],
|
|
17
|
+
".vmss": ["os memory dump", "vm suspend file"],
|
|
18
|
+
".js": ["javascript"],
|
|
19
|
+
".doc": ["doc"],
|
|
20
|
+
".docx": ["doc"],
|
|
21
|
+
".xls": ["xls"],
|
|
22
|
+
".xlsx": ["xls"],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
MAGIC_FORMATS = [
|
|
26
|
+
("^PE.* Windows", ["pe file", "hash"]),
|
|
27
|
+
("^MS-DOS executable", ["pe file", "hash"]),
|
|
28
|
+
("^PDF ", ["pdf"]),
|
|
29
|
+
("^MDMP crash", ["process dump"]),
|
|
30
|
+
("^Macromedia Flash", ["flash"]),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_file_contains(file_path: str) -> list[str]:
|
|
35
|
+
"""Get file type contains based on extension and magic bytes."""
|
|
36
|
+
try:
|
|
37
|
+
import magic # type: ignore[import-not-found]
|
|
38
|
+
except ImportError:
|
|
39
|
+
logger.warning(
|
|
40
|
+
"python-magic not installed, file type detection will be limited"
|
|
41
|
+
)
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
contains = []
|
|
45
|
+
ext = Path(file_path).suffix
|
|
46
|
+
contains.extend(FILE_EXTENSIONS.get(ext, []))
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
magic_str = magic.from_file(file_path)
|
|
50
|
+
for regex_pattern, cur_contains in MAGIC_FORMATS:
|
|
51
|
+
if re.match(regex_pattern, magic_str):
|
|
52
|
+
contains.extend(cur_contains)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.debug(f"Failed to detect file type with magic: {e}")
|
|
55
|
+
|
|
56
|
+
return contains
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def is_ip(input_ip: str) -> bool:
|
|
60
|
+
"""Check if input is a valid IP address."""
|
|
61
|
+
try:
|
|
62
|
+
ipaddress.ip_address(input_ip)
|
|
63
|
+
return True
|
|
64
|
+
except ValueError:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def is_ipv6(input_ip: str) -> bool:
|
|
69
|
+
"""Validate if input is an IPv6 address."""
|
|
70
|
+
try:
|
|
71
|
+
ip = ipaddress.ip_address(input_ip)
|
|
72
|
+
return ip.version == 6
|
|
73
|
+
except ValueError:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def is_sha1(input_str: str) -> bool:
|
|
78
|
+
"""Validate if the input is a SHA1 hash."""
|
|
79
|
+
sha1_regex = r"^[0-9a-fA-F]{40}$"
|
|
80
|
+
return bool(re.match(sha1_regex, input_str))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def clean_url(url: str) -> str:
|
|
84
|
+
"""Clean and normalize a URL string."""
|
|
85
|
+
url = url.strip(">),.]\r\n")
|
|
86
|
+
if "<" in url:
|
|
87
|
+
url = url[: url.find("<")]
|
|
88
|
+
if ">" in url:
|
|
89
|
+
url = url[: url.find(">")]
|
|
90
|
+
url = url.rstrip("]")
|
|
91
|
+
return url.strip()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def decode_uni_string(input_str: str, def_name: str) -> str:
|
|
95
|
+
"""Decode RFC 2047 encoded strings."""
|
|
96
|
+
encoded_strings = re.findall(r"=\?.*?\?=", input_str, re.I)
|
|
97
|
+
|
|
98
|
+
if not encoded_strings:
|
|
99
|
+
return input_str
|
|
100
|
+
|
|
101
|
+
try:
|
|
102
|
+
decoded_strings = [decode_header(x)[0] for x in encoded_strings]
|
|
103
|
+
decoded_string_dicts = [
|
|
104
|
+
{"value": x[0], "encoding": x[1]} for x in decoded_strings
|
|
105
|
+
]
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.debug(f"Decoding: {encoded_strings}. Error: {e}")
|
|
108
|
+
return def_name
|
|
109
|
+
|
|
110
|
+
new_str = ""
|
|
111
|
+
new_str_create_count = 0
|
|
112
|
+
for _i, decoded_string_dict in enumerate(decoded_string_dicts):
|
|
113
|
+
value = decoded_string_dict.get("value")
|
|
114
|
+
encoding = decoded_string_dict.get("encoding")
|
|
115
|
+
|
|
116
|
+
if not encoding or not value:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
if encoding != "utf-8":
|
|
121
|
+
value = str(value, encoding)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.debug(f"Encoding conversion failed: {e}")
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
new_str += UnicodeDammit(value).unicode_markup
|
|
127
|
+
new_str_create_count += 1
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.debug(f"Unicode markup conversion failed: {e}")
|
|
130
|
+
|
|
131
|
+
if new_str and new_str_create_count == len(encoded_strings):
|
|
132
|
+
logger.debug(
|
|
133
|
+
"Creating a new string entirely from the encoded_strings and assigning into input_str"
|
|
134
|
+
)
|
|
135
|
+
input_str = new_str
|
|
136
|
+
|
|
137
|
+
return input_str
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_string(input_str: str, charset: str | None = None) -> str:
|
|
141
|
+
"""Convert string to proper encoding with charset handling."""
|
|
142
|
+
if not input_str:
|
|
143
|
+
return input_str
|
|
144
|
+
|
|
145
|
+
if charset is None:
|
|
146
|
+
charset = "utf-8"
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
return UnicodeDammit(input_str).unicode_markup.encode(charset).decode(charset)
|
|
150
|
+
except Exception:
|
|
151
|
+
try:
|
|
152
|
+
return str(make_header(decode_header(input_str)))
|
|
153
|
+
except Exception:
|
|
154
|
+
return decode_uni_string(input_str, input_str)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def remove_child_info(file_path: str) -> str:
|
|
158
|
+
"""Remove child info suffix from file path."""
|
|
159
|
+
if file_path.endswith("_True"):
|
|
160
|
+
return file_path.rstrip("_True")
|
|
161
|
+
return file_path.rstrip("_False")
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def create_dict_hash(input_dict: dict[str, Any]) -> str | None:
|
|
165
|
+
"""Create a SHA256 hash of a dictionary."""
|
|
166
|
+
if not input_dict:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
input_dict_str = json.dumps(input_dict, sort_keys=True)
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.debug(f"Handled exception in create_dict_hash: {e}")
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
return hashlib.sha256(input_dict_str).hexdigest() # type: ignore[arg-type]
|
|
177
|
+
except TypeError:
|
|
178
|
+
return hashlib.sha256(input_dict_str.encode("UTF-8")).hexdigest()
|
soar_sdk/input_spec.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from uuid import uuid4
|
|
2
|
-
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
|
3
|
-
from typing import Literal, Any
|
|
4
1
|
import random
|
|
2
|
+
from typing import Any, Literal
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def id_factory() -> int:
|
soar_sdk/logging.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
|
-
from soar_sdk.shims.phantom.install_info import is_soar_available, get_product_version
|
|
5
|
-
from soar_sdk.shims.phantom.ph_ipc import ph_ipc
|
|
6
4
|
from packaging.version import Version
|
|
7
|
-
|
|
5
|
+
|
|
6
|
+
from soar_sdk.colors import ANSIColor
|
|
8
7
|
from soar_sdk.compat import remove_when_soar_newer_than
|
|
8
|
+
from soar_sdk.shims.phantom.install_info import get_product_version, is_soar_available
|
|
9
|
+
from soar_sdk.shims.phantom.ph_ipc import ph_ipc
|
|
9
10
|
|
|
10
11
|
PROGRESS_LEVEL = 25
|
|
11
12
|
logging.addLevelName(PROGRESS_LEVEL, "PROGRESS")
|
soar_sdk/meta/actions.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from typing import Any,
|
|
1
|
+
from typing import Any, Callable, Type # noqa: UP035
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
|
-
from soar_sdk.cli.manifests.serializers import ParamsSerializer, OutputsSerializer
|
|
6
|
-
from soar_sdk.params import Params
|
|
7
5
|
from soar_sdk.action_results import ActionOutput
|
|
6
|
+
from soar_sdk.cli.manifests.serializers import OutputsSerializer, ParamsSerializer
|
|
7
|
+
from soar_sdk.params import Params
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class ActionMeta(BaseModel):
|
soar_sdk/meta/app.py
CHANGED
soar_sdk/meta/dependencies.py
CHANGED
|
@@ -1,24 +1,21 @@
|
|
|
1
1
|
import functools
|
|
2
|
+
import hashlib
|
|
2
3
|
import io
|
|
3
4
|
import os
|
|
4
|
-
from pathlib import Path
|
|
5
5
|
import subprocess
|
|
6
6
|
import tarfile
|
|
7
|
+
from collections.abc import AsyncGenerator, Mapping, Sequence
|
|
8
|
+
from logging import getLogger
|
|
9
|
+
from pathlib import Path
|
|
7
10
|
from tempfile import TemporaryDirectory
|
|
8
|
-
import build
|
|
9
|
-
|
|
10
11
|
from typing import ClassVar
|
|
11
|
-
from collections.abc import Mapping, Sequence, AsyncGenerator
|
|
12
|
-
from pydantic import BaseModel, Field
|
|
13
|
-
|
|
14
|
-
from logging import getLogger
|
|
15
12
|
|
|
13
|
+
import build
|
|
16
14
|
import httpx
|
|
17
|
-
import
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
18
16
|
|
|
19
17
|
from soar_sdk.compat import remove_when_soar_newer_than
|
|
20
18
|
|
|
21
|
-
|
|
22
19
|
logger = getLogger(__name__)
|
|
23
20
|
|
|
24
21
|
# These dependencies are provided by the Python runner,
|
|
@@ -55,7 +52,8 @@ remove_when_soar_newer_than(
|
|
|
55
52
|
"If the Splunk SDK is available as a wheel now, remove it, and remove all of the code for building wheels from source.",
|
|
56
53
|
)
|
|
57
54
|
DEPENDENCIES_TO_BUILD = {
|
|
58
|
-
"splunk_sdk", # https://github.com/splunk/splunk-sdk-python/pull/656
|
|
55
|
+
"splunk_sdk", # https://github.com/splunk/splunk-sdk-python/pull/656,
|
|
56
|
+
"splunk_soar_sdk", # Useful to build from source when developing the SDK
|
|
59
57
|
}
|
|
60
58
|
|
|
61
59
|
|
|
@@ -192,6 +190,23 @@ class UvSourceDistribution(BaseModel):
|
|
|
192
190
|
return Path(wheel_path).name, f.read()
|
|
193
191
|
|
|
194
192
|
|
|
193
|
+
class UvSourceDirectory(BaseModel):
|
|
194
|
+
"""Represents a Python dependency to be built from a source directory on the local filesystem."""
|
|
195
|
+
|
|
196
|
+
directory: str
|
|
197
|
+
|
|
198
|
+
def build(self) -> tuple[str, bytes]:
|
|
199
|
+
"""Build a wheel from a local source directory."""
|
|
200
|
+
with TemporaryDirectory() as build_dir:
|
|
201
|
+
builder = build.ProjectBuilder(
|
|
202
|
+
self.directory,
|
|
203
|
+
runner=UvSourceDistribution._builder_runner,
|
|
204
|
+
)
|
|
205
|
+
wheel_path = builder.build("wheel", build_dir)
|
|
206
|
+
with open(wheel_path, "rb") as f:
|
|
207
|
+
return Path(wheel_path).name, f.read()
|
|
208
|
+
|
|
209
|
+
|
|
195
210
|
class DependencyWheel(BaseModel):
|
|
196
211
|
"""Represents a Python package dependency with all the information required to fetch its wheel(s) from the CDN."""
|
|
197
212
|
|
|
@@ -202,6 +217,7 @@ class DependencyWheel(BaseModel):
|
|
|
202
217
|
wheel: UvWheel | None = Field(exclude=True, default=None)
|
|
203
218
|
wheel_aarch64: UvWheel | None = Field(exclude=True, default=None)
|
|
204
219
|
sdist: UvSourceDistribution | None = Field(exclude=True, default=None)
|
|
220
|
+
source_dir: UvSourceDirectory | None = Field(exclude=True, default=None)
|
|
205
221
|
|
|
206
222
|
async def collect_wheels(self) -> AsyncGenerator[tuple[str, bytes]]:
|
|
207
223
|
"""Collect a list of wheel files to fetch for this dependency across all platforms."""
|
|
@@ -211,6 +227,12 @@ class DependencyWheel(BaseModel):
|
|
|
211
227
|
yield (f"wheels/shared/{wheel_name}", wheel_bytes)
|
|
212
228
|
return
|
|
213
229
|
|
|
230
|
+
if self.wheel is None and self.source_dir is not None:
|
|
231
|
+
logger.info(f"Building local sources for {self.input_file}")
|
|
232
|
+
wheel_name, wheel_bytes = self.source_dir.build()
|
|
233
|
+
yield (f"wheels/shared/{wheel_name}", wheel_bytes)
|
|
234
|
+
return
|
|
235
|
+
|
|
214
236
|
if self.wheel is None:
|
|
215
237
|
raise ValueError(
|
|
216
238
|
f"Could not find a suitable wheel or source distribution for {self.module} in uv.lock"
|
|
@@ -250,6 +272,13 @@ class UvDependency(BaseModel):
|
|
|
250
272
|
name: str
|
|
251
273
|
|
|
252
274
|
|
|
275
|
+
class UvSource(BaseModel):
|
|
276
|
+
"""Represents the source of a Python package in the uv lock."""
|
|
277
|
+
|
|
278
|
+
registry: str | None = None
|
|
279
|
+
directory: str | None = None
|
|
280
|
+
|
|
281
|
+
|
|
253
282
|
class UvPackage(BaseModel):
|
|
254
283
|
"""Represents a Python package loaded from the uv lock."""
|
|
255
284
|
|
|
@@ -261,6 +290,7 @@ class UvPackage(BaseModel):
|
|
|
261
290
|
)
|
|
262
291
|
wheels: list[UvWheel] = []
|
|
263
292
|
sdist: UvSourceDistribution | None = None
|
|
293
|
+
source: UvSource
|
|
264
294
|
|
|
265
295
|
def _find_wheel(
|
|
266
296
|
self,
|
|
@@ -369,6 +399,12 @@ class UvPackage(BaseModel):
|
|
|
369
399
|
):
|
|
370
400
|
wheel.sdist = self.sdist
|
|
371
401
|
|
|
402
|
+
if (
|
|
403
|
+
self.source.directory is not None
|
|
404
|
+
and UvLock.normalize_package_name(self.name) in DEPENDENCIES_TO_BUILD
|
|
405
|
+
):
|
|
406
|
+
wheel.source_dir = UvSourceDirectory(directory=self.source.directory)
|
|
407
|
+
|
|
372
408
|
try:
|
|
373
409
|
wheel_x86_64 = self._find_wheel(
|
|
374
410
|
abi_precedence, python_precedence, self.platform_precedence_x86_64
|
|
@@ -376,7 +412,7 @@ class UvPackage(BaseModel):
|
|
|
376
412
|
wheel.input_file = f"{wheel_x86_64.basename}.whl"
|
|
377
413
|
wheel.wheel = wheel_x86_64
|
|
378
414
|
except FileNotFoundError as e:
|
|
379
|
-
if wheel.sdist is None:
|
|
415
|
+
if wheel.sdist is None and wheel.source_dir is None:
|
|
380
416
|
raise FileNotFoundError(
|
|
381
417
|
f"Could not find a suitable x86_64 wheel or source distribution for {self.name}"
|
|
382
418
|
) from e
|
soar_sdk/meta/webhooks.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field, field_validator
|
|
2
1
|
from ipaddress import ip_network
|
|
3
2
|
|
|
3
|
+
from pydantic import BaseModel, Field, field_validator
|
|
4
|
+
|
|
4
5
|
|
|
5
6
|
class WebhookRouteMeta(BaseModel):
|
|
6
7
|
"""Metadata for a webhook route, including the handler function and its properties."""
|