txt2stix 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- txt2stix/__init__.py +33 -0
- txt2stix/ai_extractor/__init__.py +15 -0
- txt2stix/ai_extractor/anthropic.py +12 -0
- txt2stix/ai_extractor/base.py +87 -0
- txt2stix/ai_extractor/deepseek.py +19 -0
- txt2stix/ai_extractor/gemini.py +18 -0
- txt2stix/ai_extractor/openai.py +15 -0
- txt2stix/ai_extractor/openrouter.py +20 -0
- txt2stix/ai_extractor/prompts.py +164 -0
- txt2stix/ai_extractor/utils.py +85 -0
- txt2stix/attack_flow.py +101 -0
- txt2stix/bundler.py +428 -0
- txt2stix/common.py +23 -0
- txt2stix/extractions.py +59 -0
- txt2stix/includes/__init__.py +0 -0
- txt2stix/includes/extractions/ai/config.yaml +1023 -0
- txt2stix/includes/extractions/lookup/config.yaml +393 -0
- txt2stix/includes/extractions/pattern/config.yaml +609 -0
- txt2stix/includes/helpers/mimetype_filename_extension_list.csv +936 -0
- txt2stix/includes/helpers/stix_relationship_types.txt +41 -0
- txt2stix/includes/helpers/tlds.txt +1446 -0
- txt2stix/includes/helpers/windows_registry_key_prefix.txt +12 -0
- txt2stix/includes/lookups/_README.md +11 -0
- txt2stix/includes/lookups/_generate_lookups.py +247 -0
- txt2stix/includes/lookups/attack_pattern.txt +1 -0
- txt2stix/includes/lookups/campaign.txt +1 -0
- txt2stix/includes/lookups/country_iso3166_alpha2.txt +249 -0
- txt2stix/includes/lookups/course_of_action.txt +1 -0
- txt2stix/includes/lookups/disarm_id_v1_5.txt +345 -0
- txt2stix/includes/lookups/disarm_name_v1_5.txt +347 -0
- txt2stix/includes/lookups/extensions.txt +78 -0
- txt2stix/includes/lookups/identity.txt +1 -0
- txt2stix/includes/lookups/infrastructure.txt +1 -0
- txt2stix/includes/lookups/intrusion_set.txt +1 -0
- txt2stix/includes/lookups/malware.txt +2 -0
- txt2stix/includes/lookups/mitre_atlas_id_v4_5_2.txt +116 -0
- txt2stix/includes/lookups/mitre_atlas_name_v4_5_2.txt +117 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +1502 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +1656 -0
- txt2stix/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +1765 -0
- txt2stix/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +141 -0
- txt2stix/includes/lookups/mitre_attack_ics_id_v16_0.txt +254 -0
- txt2stix/includes/lookups/mitre_attack_ics_name_v16_0.txt +293 -0
- txt2stix/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +159 -0
- txt2stix/includes/lookups/mitre_attack_mobile_id_v16_0.txt +277 -0
- txt2stix/includes/lookups/mitre_attack_mobile_name_v16_0.txt +296 -0
- txt2stix/includes/lookups/mitre_capec_id_v3_9.txt +559 -0
- txt2stix/includes/lookups/mitre_capec_name_v3_9.txt +560 -0
- txt2stix/includes/lookups/mitre_cwe_id_v4_15.txt +939 -0
- txt2stix/includes/lookups/mitre_cwe_name_v4_15.txt +939 -0
- txt2stix/includes/lookups/threat_actor.txt +1 -0
- txt2stix/includes/lookups/tld.txt +1422 -0
- txt2stix/includes/lookups/tool.txt +1 -0
- txt2stix/includes/tests/test_cases.yaml +695 -0
- txt2stix/indicator.py +860 -0
- txt2stix/lookups.py +68 -0
- txt2stix/pattern/__init__.py +13 -0
- txt2stix/pattern/extractors/__init__.py +0 -0
- txt2stix/pattern/extractors/base_extractor.py +167 -0
- txt2stix/pattern/extractors/card/README.md +34 -0
- txt2stix/pattern/extractors/card/__init__.py +15 -0
- txt2stix/pattern/extractors/card/amex_card_extractor.py +52 -0
- txt2stix/pattern/extractors/card/diners_card_extractor.py +47 -0
- txt2stix/pattern/extractors/card/discover_card_extractor.py +48 -0
- txt2stix/pattern/extractors/card/jcb_card_extractor.py +43 -0
- txt2stix/pattern/extractors/card/master_card_extractor.py +63 -0
- txt2stix/pattern/extractors/card/union_card_extractor.py +38 -0
- txt2stix/pattern/extractors/card/visa_card_extractor.py +46 -0
- txt2stix/pattern/extractors/crypto/__init__.py +3 -0
- txt2stix/pattern/extractors/crypto/btc_extractor.py +38 -0
- txt2stix/pattern/extractors/directory/__init__.py +10 -0
- txt2stix/pattern/extractors/directory/unix_directory_extractor.py +40 -0
- txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +47 -0
- txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +42 -0
- txt2stix/pattern/extractors/domain/__init__.py +8 -0
- txt2stix/pattern/extractors/domain/domain_extractor.py +39 -0
- txt2stix/pattern/extractors/domain/hostname_extractor.py +36 -0
- txt2stix/pattern/extractors/domain/sub_domain_extractor.py +49 -0
- txt2stix/pattern/extractors/hashes/__init__.py +16 -0
- txt2stix/pattern/extractors/hashes/md5_extractor.py +16 -0
- txt2stix/pattern/extractors/hashes/sha1_extractor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha224_extractor.py +18 -0
- txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +14 -0
- txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +13 -0
- txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +15 -0
- txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +16 -0
- txt2stix/pattern/extractors/helper.py +64 -0
- txt2stix/pattern/extractors/ip/__init__.py +14 -0
- txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +49 -0
- txt2stix/pattern/extractors/ip/ipv4_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +42 -0
- txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +18 -0
- txt2stix/pattern/extractors/ip/ipv6_extractor.py +16 -0
- txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +46 -0
- txt2stix/pattern/extractors/others/__init__.py +22 -0
- txt2stix/pattern/extractors/others/asn_extractor.py +14 -0
- txt2stix/pattern/extractors/others/cpe_extractor.py +29 -0
- txt2stix/pattern/extractors/others/cve_extractor.py +14 -0
- txt2stix/pattern/extractors/others/email_extractor.py +21 -0
- txt2stix/pattern/extractors/others/filename_extractor.py +17 -0
- txt2stix/pattern/extractors/others/iban_extractor.py +15 -0
- txt2stix/pattern/extractors/others/mac_address_extractor.py +13 -0
- txt2stix/pattern/extractors/others/phonenumber_extractor.py +41 -0
- txt2stix/pattern/extractors/others/user_agent_extractor.py +20 -0
- txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +18 -0
- txt2stix/pattern/extractors/url/__init__.py +7 -0
- txt2stix/pattern/extractors/url/url_extractor.py +22 -0
- txt2stix/pattern/extractors/url/url_file_extractor.py +21 -0
- txt2stix/pattern/extractors/url/url_path_extractor.py +74 -0
- txt2stix/retriever.py +126 -0
- txt2stix/stix.py +1 -0
- txt2stix/txt2stix.py +336 -0
- txt2stix/utils.py +86 -0
- txt2stix-0.0.4.dist-info/METADATA +190 -0
- txt2stix-0.0.4.dist-info/RECORD +119 -0
- txt2stix-0.0.4.dist-info/WHEEL +4 -0
- txt2stix-0.0.4.dist-info/entry_points.txt +2 -0
- txt2stix-0.0.4.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
from ipaddress import IPv4Address
|
3
|
+
|
4
|
+
|
5
|
+
class IPv4WithPortExtractor(BaseExtractor):
|
6
|
+
"""
|
7
|
+
A class for extracting valid IPv4 addresses with ports from text using a custom extraction function.
|
8
|
+
|
9
|
+
Attributes:
|
10
|
+
name (str): The name of the extractor, set to "ipv4_port".
|
11
|
+
extraction_function (function): The custom extraction function to validate and extract IPv4 addresses with ports.
|
12
|
+
"""
|
13
|
+
|
14
|
+
name = "pattern_ipv4_address_port"
|
15
|
+
extraction_function = lambda x: IPv4WithPortExtractor.validate_ipv4_with_port(x)
|
16
|
+
|
17
|
+
@staticmethod
|
18
|
+
def validate_ipv4_with_port(x):
|
19
|
+
"""
|
20
|
+
Custom extraction function to validate if the provided string is a valid IPv4 address with a port.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
x (str): The string to be checked.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
tuple: A tuple containing the extracted IPv4 address and port if valid, False otherwise.
|
27
|
+
"""
|
28
|
+
x = x.strip('"')
|
29
|
+
if ":" in x and x.count(":") == 1:
|
30
|
+
ip_address, port = x.split(":")
|
31
|
+
|
32
|
+
try:
|
33
|
+
# Validate the IPv4 address part.
|
34
|
+
IPv4Address(ip_address)
|
35
|
+
|
36
|
+
# Validate the port part.
|
37
|
+
if 1 <= int(port) <= 65535:
|
38
|
+
return ip_address, port
|
39
|
+
except ValueError:
|
40
|
+
pass
|
41
|
+
|
42
|
+
return False
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# import ipaddress
|
2
|
+
from ipaddress import IPv6Address
|
3
|
+
|
4
|
+
import validators
|
5
|
+
from ..base_extractor import BaseExtractor
|
6
|
+
|
7
|
+
|
8
|
+
class IPv6WithCIDRExtractor(BaseExtractor):
|
9
|
+
"""
|
10
|
+
A class for extracting valid IPv6 addresses with ports from text using a custom extraction function.
|
11
|
+
|
12
|
+
Attributes:
|
13
|
+
name (str): The name of the extractor, set to "ipv6_port".
|
14
|
+
extraction_function (function): The custom extraction function to validate and extract IPv6 addresses with ports.
|
15
|
+
"""
|
16
|
+
|
17
|
+
name = "pattern_ipv6_address_cidr"
|
18
|
+
extraction_function = lambda ipaddress: validators.ipv6(ipaddress, strict=True, cidr=True)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import validators
|
2
|
+
from ..base_extractor import BaseExtractor
|
3
|
+
import ipaddress
|
4
|
+
|
5
|
+
|
6
|
+
class IPv6Extractor(BaseExtractor):
|
7
|
+
"""
|
8
|
+
A class for extracting valid IPv6 addresses from text using a custom extraction function.
|
9
|
+
|
10
|
+
Attributes:
|
11
|
+
name (str): The name of the extractor, set to "ipv6".
|
12
|
+
extraction_function (function): The custom extraction function to validate and extract IPv6 addresses.
|
13
|
+
"""
|
14
|
+
|
15
|
+
name = "pattern_ipv6_address_only"
|
16
|
+
extraction_function = lambda ipaddress: validators.ipv6(ipaddress, strict=True, cidr=False)
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import re
|
2
|
+
from ipaddress import IPv6Address
|
3
|
+
from ..base_extractor import BaseExtractor
|
4
|
+
|
5
|
+
class IPv6WithPortExtractor(BaseExtractor):
|
6
|
+
"""
|
7
|
+
A class for extracting valid IPv6 addresses with ports from text using a custom extraction function.
|
8
|
+
|
9
|
+
Attributes:
|
10
|
+
name (str): The name of the extractor, set to "ipv6_port".
|
11
|
+
extraction_function (function): The custom extraction function to validate and extract IPv6 addresses with ports.
|
12
|
+
"""
|
13
|
+
|
14
|
+
name = "pattern_ipv6_address_port"
|
15
|
+
extraction_function = lambda x: IPv6WithPortExtractor.validate_ipv6_with_port(x)
|
16
|
+
|
17
|
+
@staticmethod
|
18
|
+
def validate_ipv6_with_port(x):
|
19
|
+
"""
|
20
|
+
Custom extraction function to validate if the provided string is a valid IPv6 address with a port.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
x (str): The string to be checked.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
tuple: A tuple containing the extracted IPv6 address and port if valid, False otherwise.
|
27
|
+
"""
|
28
|
+
if ":" in x:
|
29
|
+
# Use regex to extract the IPv6 address and port.
|
30
|
+
match = re.match(r"\[(.*)\]:(.*)", x)
|
31
|
+
print([x, match])
|
32
|
+
if match:
|
33
|
+
ip_address, port = match.groups()
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Validate the IPv6 address part.
|
37
|
+
ip = IPv6Address(ip_address)
|
38
|
+
print("yes", ip.exploded)
|
39
|
+
|
40
|
+
# Validate the port part.
|
41
|
+
if 1 <= int(port) <= 65535:
|
42
|
+
return ip_address, port
|
43
|
+
except ValueError:
|
44
|
+
pass
|
45
|
+
|
46
|
+
return False
|
@@ -0,0 +1,22 @@
|
|
1
|
+
from .asn_extractor import ASNExtractor
|
2
|
+
from .cpe_extractor import CPEExtractor
|
3
|
+
from .cve_extractor import CVEExtractor
|
4
|
+
from .email_extractor import EmailAddressExtractor
|
5
|
+
from .filename_extractor import FileNameExtractor
|
6
|
+
from .iban_extractor import IBANExtractor
|
7
|
+
from .mac_address_extractor import MacAddressExtractor
|
8
|
+
from .phonenumber_extractor import PhoneNumberExtractor
|
9
|
+
from .user_agent_extractor import UserAgentBaseExtractor
|
10
|
+
from .windows_registry_key_extractor import WindowsRegistryKeyExtractor
|
11
|
+
|
12
|
+
OTHER_EXTRACTORS = [ASNExtractor,
|
13
|
+
CPEExtractor,
|
14
|
+
CVEExtractor,
|
15
|
+
EmailAddressExtractor,
|
16
|
+
FileNameExtractor,
|
17
|
+
IBANExtractor,
|
18
|
+
MacAddressExtractor,
|
19
|
+
PhoneNumberExtractor,
|
20
|
+
UserAgentBaseExtractor,
|
21
|
+
WindowsRegistryKeyExtractor
|
22
|
+
]
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
|
4
|
+
class ASNExtractor(BaseExtractor):
|
5
|
+
"""
|
6
|
+
A class for extracting Autonomous System Numbers (ASNs) from text using a regular expression.
|
7
|
+
|
8
|
+
Attributes:
|
9
|
+
name (str): The name of the extractor, set to "asn".
|
10
|
+
extraction_regex (str): The regular expression pattern used for extracting ASNs from the text.
|
11
|
+
"""
|
12
|
+
|
13
|
+
name = "pattern_autonomous_system_number"
|
14
|
+
extraction_regex = r"\b(?:ASN?)(?: )?(\d{1,5})\b"
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
|
4
|
+
class CPEExtractor(BaseExtractor):
|
5
|
+
"""
|
6
|
+
A class for extracting Common Platform Enumeration (CPE) strings from text using a custom extraction function.
|
7
|
+
|
8
|
+
Attributes:
|
9
|
+
name (str): The name of the extractor, set to "cpe".
|
10
|
+
extraction_function (function): The custom extraction function to validate and extract CPE strings.
|
11
|
+
"""
|
12
|
+
|
13
|
+
name = "pattern_cpe_uri"
|
14
|
+
extraction_function = lambda x: CPEExtractor.is_valid_cpe(x)
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def is_valid_cpe(cpe_string):
|
18
|
+
"""
|
19
|
+
Custom extraction function to validate if the provided string is a valid CPE string.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
cpe_string (str): The string to be checked.
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
bool: True if the CPE string is valid, False otherwise.
|
26
|
+
"""
|
27
|
+
|
28
|
+
if cpe_string.startswith('cpe:') and cpe_string.count(':') == 12:
|
29
|
+
return True
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
|
4
|
+
class CVEExtractor(BaseExtractor):
|
5
|
+
"""
|
6
|
+
A class for extracting Common Vulnerabilities and Exposures (CVE) identifiers from text using a regular expression.
|
7
|
+
|
8
|
+
Attributes:
|
9
|
+
name (str): The name of the extractor, set to "cve".
|
10
|
+
extraction_regex (str): The regular expression pattern used for extracting CVE identifiers from the text.
|
11
|
+
"""
|
12
|
+
|
13
|
+
name = "pattern_cve_id"
|
14
|
+
extraction_regex = r'\bCVE-\d{4}-\d{4,5}\b'
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
from ..helper import TLDs
|
3
|
+
|
4
|
+
|
5
|
+
class EmailAddressExtractor(BaseExtractor):
|
6
|
+
"""
|
7
|
+
A class for extracting valid email addresses from text using a custom extraction function.
|
8
|
+
|
9
|
+
Attributes:
|
10
|
+
name (str): The name of the extractor, set to "email".
|
11
|
+
extraction_regex (function): The custom extraction function to validate and extract email addresses.
|
12
|
+
"""
|
13
|
+
name = "pattern_email_address"
|
14
|
+
extraction_regex = r'[\w.+-]+@[\w-]+\.[\w.-]+'
|
15
|
+
|
16
|
+
@staticmethod
|
17
|
+
def filter_function(email):
|
18
|
+
x = email.split("@")
|
19
|
+
domain = x[-1].split(".")[-1]
|
20
|
+
if domain in TLDs:
|
21
|
+
return True
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
class FileNameExtractor(BaseExtractor):
|
4
|
+
"""
|
5
|
+
A class for extracting file names with suspicious file extensions from text using a regular expression.
|
6
|
+
|
7
|
+
Attributes:
|
8
|
+
name (str): The name of the extractor, set to "file_name".
|
9
|
+
file_extensions (str): A string containing the regex pattern for suspicious file extensions.
|
10
|
+
extraction_regex (str): The regular expression pattern used for extracting file names with suspicious extensions.
|
11
|
+
"""
|
12
|
+
|
13
|
+
name = "pattern_file_name"
|
14
|
+
file_extensions = "(?:(?:7(?:Z|z))|(?:AP(?:K|P))|(?:B(?:AT|IN|MP))|(?:C(?:LASS|AB|ER|GI|HM|MD|RX))|(?:D(?:OCX?|EB|LL))|EXE|FLV|(?:G(?:ADGET|IF|Z))|INF|(?:J(?:A(?:VA|R)|PG|S))|(?:L(?:NK|OG))|(?:M(?:O(?:F|V)|P(?:4|G)|S(?:G|I)|4V))|ODT|(?:P(?:LUGIN|PTX?|7S|DF|HP|NG|SD|F|Y))|(?:R(?:AR|PM))|(?:S(?:VG|WF|YS|O))|(?:T(?:IFF?|AR|GZ|MP|XT))|(?:V(?:BS|IR))|(?:W(?:MV|SF))|XLSX?|ZIPX?|(?:ap(?:k|p))|(?:b(?:at|in|mp))|(?:c(?:lass|ab|er|gi|hm|md|rx))|(?:d(?:ocx?|eb|ll))|exe|flv|(?:g(?:adget|if|z))|inf|(?:j(?:a(?:va|r)|pg|s))|(?:l(?:nk|og))|(?:m(?:o(?:f|v)|p(?:4|g)|s(?:g|i)|4v))|odt|(?:p(?:lugin|ptx?|7s|df|hp|ng|sd|f|y))|(?:r(?:ar|pm))|(?:s(?:vg|wf|ys|o))|(?:t(?:iff?|ar|gz|mp|xt))|(?:v(?:bs|ir))|(?:w(?:mv|sf))|xlsx?|zipx?)"
|
15
|
+
extraction_regex = rf"(?!['\"])([^\\/:\*\?\"\<\>\|\s]*)\.({file_extensions})(?!\()"
|
16
|
+
|
17
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from validators import iban
|
2
|
+
from ..base_extractor import BaseExtractor
|
3
|
+
|
4
|
+
|
5
|
+
class IBANExtractor(BaseExtractor):
|
6
|
+
"""
|
7
|
+
A class for extracting International Bank Account Number (IBAN) codes from text using a custom extraction function.
|
8
|
+
|
9
|
+
Attributes:
|
10
|
+
name (str): The name of the extractor, set to "IBAN".
|
11
|
+
extraction_function (function): The custom extraction function to validate and extract IBAN codes.
|
12
|
+
"""
|
13
|
+
|
14
|
+
name = "pattern_iban_number"
|
15
|
+
extraction_function = lambda x: iban(x)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
class MacAddressExtractor(BaseExtractor):
|
4
|
+
"""
|
5
|
+
A class for extracting MAC addresses from text using a regular expression.
|
6
|
+
|
7
|
+
Attributes:
|
8
|
+
name (str): The name of the extractor, set to "mac_address".
|
9
|
+
extraction_regex (str): The regular expression pattern used for extracting MAC addresses from the text.
|
10
|
+
"""
|
11
|
+
|
12
|
+
name = "pattern_mac_address"
|
13
|
+
extraction_regex = r'([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})'
|
@@ -0,0 +1,41 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
import phonenumbers
|
4
|
+
from ..base_extractor import BaseExtractor
|
5
|
+
|
6
|
+
|
7
|
+
class PhoneNumberExtractor(BaseExtractor):
|
8
|
+
"""
|
9
|
+
A class for extracting phone numbers from text using a regular expression.
|
10
|
+
|
11
|
+
Attributes:
|
12
|
+
name (str): The name of the extractor, set to "phone_number".
|
13
|
+
extraction_regex (str): The regular expression pattern used for extracting phone numbers from the text.
|
14
|
+
"""
|
15
|
+
|
16
|
+
name = "pattern_phone_number"
|
17
|
+
extraction_regex = r'((\+|00)\d{1,3}[ \-]?\d{1,5}[ \-]?\d{1,5}[ \-]?\d{1,5})'
|
18
|
+
|
19
|
+
@staticmethod
|
20
|
+
def validate_phone_number(regex, phone_number):
|
21
|
+
match = re.fullmatch(regex, phone_number)
|
22
|
+
return match
|
23
|
+
|
24
|
+
@staticmethod
|
25
|
+
def filter_function(input_string):
|
26
|
+
input_string = input_string.replace(" ", "")
|
27
|
+
if len(input_string) >= 15 or len(input_string) <= 7:
|
28
|
+
return False
|
29
|
+
return PhoneNumberExtractor.parse_phone_number(input_string)
|
30
|
+
|
31
|
+
@staticmethod
|
32
|
+
def parse_phone_number(phone_number: str):
|
33
|
+
try:
|
34
|
+
phone_number = '+' + phone_number.replace(' ', '').removeprefix('00').removeprefix('+')
|
35
|
+
phone = phonenumbers.parse(phone_number, None)
|
36
|
+
if not phonenumbers.is_valid_number(phone):
|
37
|
+
return None
|
38
|
+
return phone
|
39
|
+
except:
|
40
|
+
return None
|
41
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from ..base_extractor import BaseExtractor
|
2
|
+
|
3
|
+
|
4
|
+
class UserAgentBaseExtractor(BaseExtractor):
|
5
|
+
"""
|
6
|
+
A class for extracting user agent strings from text using a regular expression.
|
7
|
+
|
8
|
+
Attributes:
|
9
|
+
name (str): The name of the extractor, set to "user_agent".
|
10
|
+
platforms (str): The regex pattern to match the user agent platform name.
|
11
|
+
user_agent_details (str): The regex pattern to match additional user agent details within parentheses.
|
12
|
+
user_agent (str): The regex pattern to match the entire user agent string.
|
13
|
+
extraction_regex (str): The regular expression pattern used for extracting user agent strings from the text.
|
14
|
+
"""
|
15
|
+
|
16
|
+
name = "pattern_user_agent"
|
17
|
+
platforms = r"([a-zA-Z]+)"
|
18
|
+
user_agent_details = r"\([\w;\s\,.:-]+\)"
|
19
|
+
user_agent = rf"((User-Agent: )|(user-agent: ))?Mozilla/5.0([ ](({user_agent_details})|(({platforms}/)[^\s\"\',]+)))+"
|
20
|
+
extraction_regex = r"Mozilla\/\d+\.\d+(\s+\([^\)]+\))?"
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import re
|
2
|
+
from ..base_extractor import BaseExtractor
|
3
|
+
|
4
|
+
|
5
|
+
class WindowsRegistryKeyExtractor(BaseExtractor):
|
6
|
+
"""
|
7
|
+
A class for extracting valid Windows Registry keys from text using a regular expression.
|
8
|
+
|
9
|
+
Attributes:
|
10
|
+
name (str): The name of the extractor, set to "windows_registry_key".
|
11
|
+
valid_root_keys (str): The valid root keys of the Windows Registry.
|
12
|
+
extraction_regex (str): The regular expression pattern used for extracting Windows Registry keys from the text.
|
13
|
+
"""
|
14
|
+
|
15
|
+
name = "pattern_windows_registry_key"
|
16
|
+
valid_root_keys = ['HKEY_CLASSES_ROOT', 'HKCR', 'HKEY_CURRENT_USER', 'HKCU', 'HKEY_LOCAL_MACHINE', 'HKLM', 'HKEY_USERS', 'HKU', 'HKEY_CURRENT_CONFIG', 'HKCC', 'HKEY_PERFORMANCE_DATA', 'HKEY_DYN_DATA']
|
17
|
+
prefix_regex = r'(?:' + '|'.join(re.escape(item) for item in valid_root_keys) + r')'
|
18
|
+
extraction_regex = rf'\b({prefix_regex}[\\\w]+)\b'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
from .url_path_extractor import URLPathExtractor
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
class URLExtractor(URLPathExtractor):
|
7
|
+
"""
|
8
|
+
A class for extracting valid URLs from text using a combination of regular expressions and validation functions.
|
9
|
+
|
10
|
+
Attributes:
|
11
|
+
name (str): The name of the extractor, set to "url".
|
12
|
+
extraction_function (function): The extraction function that validates and extracts URLs from the given text.
|
13
|
+
"""
|
14
|
+
|
15
|
+
name = "pattern_url"
|
16
|
+
filter_function = lambda url: not URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url)
|
17
|
+
|
18
|
+
|
19
|
+
class HostnameUrlExtractor(URLExtractor):
|
20
|
+
name = "pattern_host_name_url"
|
21
|
+
|
22
|
+
filter_function = lambda url: not URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url, validate_tld=False)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
from ..helper import validate_file_extension
|
3
|
+
from .url_path_extractor import URLPathExtractor
|
4
|
+
|
5
|
+
|
6
|
+
class URLFileExtractor(URLPathExtractor):
|
7
|
+
"""
|
8
|
+
A class for extracting valid URLs from text using a combination of regular expressions and validation functions.
|
9
|
+
|
10
|
+
Attributes:
|
11
|
+
name (str): The name of the extractor, set to "url".
|
12
|
+
extraction_function (function): The extraction function that validates and extracts URLs from the given text.
|
13
|
+
"""
|
14
|
+
|
15
|
+
name = "pattern_url_file"
|
16
|
+
filter_function = lambda url: URLFileExtractor.is_path(url) and URLPathExtractor.validate_host(url) and validate_file_extension(url)
|
17
|
+
|
18
|
+
|
19
|
+
class HostnameFileExtractor(URLFileExtractor):
|
20
|
+
name = "pattern_host_name_file"
|
21
|
+
filter_function = lambda url: URLFileExtractor.is_path(url) and validate_file_extension(url) and URLPathExtractor.validate_host(url, validate_tld=False)
|
@@ -0,0 +1,74 @@
|
|
1
|
+
import tldextract
|
2
|
+
import validators
|
3
|
+
|
4
|
+
from txt2stix import utils
|
5
|
+
|
6
|
+
from ..base_extractor import BaseExtractor
|
7
|
+
from ..helper import check_false_positive_domain, validate_file_extension
|
8
|
+
from urllib.parse import urlparse
|
9
|
+
from ipaddress import ip_address
|
10
|
+
|
11
|
+
|
12
|
+
class URLPathExtractor(BaseExtractor):
|
13
|
+
"""
|
14
|
+
URLPathExtractor is a class that extracts URLs from input data using a simple validation mechanism.
|
15
|
+
|
16
|
+
Attributes:
|
17
|
+
name (str): The name of the extractor.
|
18
|
+
extraction_function (function): The function to extract URLs.
|
19
|
+
"""
|
20
|
+
name = "pattern_url_path"
|
21
|
+
extraction_function = lambda url: URLPathExtractor.is_valid_url(url)
|
22
|
+
filter_function = lambda url: URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url) and not validate_file_extension(url)
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def is_path(cls, url):
|
26
|
+
path = urlparse(url).path
|
27
|
+
if path and path != "/":
|
28
|
+
return True
|
29
|
+
return False
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def validate_host(cls, url, validate_tld=True):
|
33
|
+
uri = urlparse(url)
|
34
|
+
if not validators.hostname(uri.hostname):
|
35
|
+
return False
|
36
|
+
if validate_tld and not cls.is_ip_address(uri.hostname):
|
37
|
+
return utils.validate_tld(uri.hostname)
|
38
|
+
return True
|
39
|
+
|
40
|
+
@staticmethod
|
41
|
+
def is_ip_address(address):
|
42
|
+
try:
|
43
|
+
ip_address(address)
|
44
|
+
return True
|
45
|
+
except:
|
46
|
+
return False
|
47
|
+
@staticmethod
|
48
|
+
def is_valid_url(url):
|
49
|
+
"""
|
50
|
+
Checks if a given URL is valid and does not point to a file.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
url (str): The URL to be validated.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
bool: True if the URL is valid and doesn't point to a file, False otherwise.
|
57
|
+
"""
|
58
|
+
try:
|
59
|
+
# Check if "http" or "www" is present in the URL
|
60
|
+
if validators.url(url):
|
61
|
+
extracted_domain = tldextract.extract(url)
|
62
|
+
if check_false_positive_domain(extracted_domain.domain):
|
63
|
+
return True
|
64
|
+
except Exception as e:
|
65
|
+
# An exception occurred, consider the URL invalid
|
66
|
+
return False
|
67
|
+
|
68
|
+
# Default case: URL is not valid or doesn't meet the conditions
|
69
|
+
return False
|
70
|
+
|
71
|
+
|
72
|
+
class HostnamePathExtractor(URLPathExtractor):
|
73
|
+
name = "pattern_host_name_path"
|
74
|
+
filter_function = lambda url: URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url, validate_tld=False) and not validate_file_extension(url)
|
txt2stix/retriever.py
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
import logging
|
2
|
+
from urllib.parse import urljoin
|
3
|
+
import dotenv, os
|
4
|
+
import stix2
|
5
|
+
import requests
|
6
|
+
|
7
|
+
dotenv.load_dotenv()
|
8
|
+
|
9
|
+
|
10
|
+
class STIXObjectRetriever:
|
11
|
+
def __init__(self, host="ctibutler") -> None:
|
12
|
+
if host == "ctibutler":
|
13
|
+
self.api_root = os.environ['CTIBUTLER_BASE_URL'] + '/'
|
14
|
+
self.api_key = os.environ.get('CTIBUTLER_API_KEY')
|
15
|
+
elif host == "vulmatch":
|
16
|
+
self.api_root = os.environ['VULMATCH_BASE_URL'] + '/'
|
17
|
+
self.api_key = os.environ.get('VULMATCH_API_KEY')
|
18
|
+
else:
|
19
|
+
raise NotImplementedError("The type `%s` is not supported", host)
|
20
|
+
|
21
|
+
def get_attack_object(self, matrix, attack_id):
|
22
|
+
endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/{attack_id}/")
|
23
|
+
return self._retrieve_objects(endpoint)
|
24
|
+
|
25
|
+
def get_attack_objects(self, matrix, attack_ids):
|
26
|
+
endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/?attack_id={','.join(attack_ids)}")
|
27
|
+
return self._retrieve_objects(endpoint)
|
28
|
+
|
29
|
+
def get_objects_by_id(self, id, type):
|
30
|
+
return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/{id}/"))
|
31
|
+
|
32
|
+
def get_location_objects(self, id):
|
33
|
+
return self._retrieve_objects(urljoin(self.api_root, f"v1/location/objects/?alpha2_code={id}"))
|
34
|
+
|
35
|
+
def get_objects_by_name(self, name, type):
|
36
|
+
return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/?name={name}"))
|
37
|
+
|
38
|
+
def get_objects_by_alias(self, alias, type):
|
39
|
+
return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/?alias={alias}"))
|
40
|
+
|
41
|
+
def _retrieve_objects(self, endpoint, key='objects'):
|
42
|
+
s = requests.Session()
|
43
|
+
s.headers.update({
|
44
|
+
"API-KEY": self.api_key,
|
45
|
+
})
|
46
|
+
data = []
|
47
|
+
page = 1
|
48
|
+
while True:
|
49
|
+
resp = s.get(endpoint, params=dict(page=page, page_size=50))
|
50
|
+
resp.raise_for_status()
|
51
|
+
d = resp.json()
|
52
|
+
if len(d[key]) == 0:
|
53
|
+
break
|
54
|
+
data.extend(d[key])
|
55
|
+
page+=1
|
56
|
+
if d['page_results_count'] < d['page_size']:
|
57
|
+
break
|
58
|
+
return data
|
59
|
+
|
60
|
+
def retrieve_stix_objects(stix_mapping: str, id, host=None):
|
61
|
+
try:
|
62
|
+
object_path = stix_mapping
|
63
|
+
if stix_mapping in ['location']:
|
64
|
+
host = 'ctibutler'
|
65
|
+
if not host:
|
66
|
+
host, object_path = stix_mapping.split('-', 1)
|
67
|
+
retreiver = STIXObjectRetriever(host)
|
68
|
+
match object_path:
|
69
|
+
### ATT&CK by ID
|
70
|
+
case 'mitre-attack-ics-id':
|
71
|
+
return retreiver.get_attack_object('ics', id)
|
72
|
+
case 'mitre-attack-mobile-id':
|
73
|
+
return retreiver.get_attack_object('mobile', id)
|
74
|
+
case 'mitre-attack-enterprise-id':
|
75
|
+
return retreiver.get_attack_object('enterprise', id)
|
76
|
+
|
77
|
+
### Others by ID
|
78
|
+
case "mitre-capec-id":
|
79
|
+
return retreiver.get_objects_by_id(id, 'capec')
|
80
|
+
case "mitre-atlas-id":
|
81
|
+
return retreiver.get_objects_by_id(id, 'atlas')
|
82
|
+
case "disarm-id":
|
83
|
+
return retreiver.get_objects_by_id(id, 'disarm')
|
84
|
+
case "mitre-cwe-id":
|
85
|
+
return retreiver.get_objects_by_id(id, 'cwe')
|
86
|
+
case "cve-id":
|
87
|
+
return retreiver.get_objects_by_id(id, 'cve')
|
88
|
+
case "cpe-id":
|
89
|
+
return retreiver.get_objects_by_id(id, 'cpe')
|
90
|
+
case "location":
|
91
|
+
return retreiver.get_location_objects(id)
|
92
|
+
|
93
|
+
### ATT&CK by Name
|
94
|
+
case "mitre-attack-enterprise-name":
|
95
|
+
return retreiver.get_objects_by_name(id, 'attack-enterprise')
|
96
|
+
case "mitre-attack-mobile-name":
|
97
|
+
return retreiver.get_objects_by_name(id, 'attack-mobile')
|
98
|
+
case "mitre-attack-ics-name":
|
99
|
+
return retreiver.get_objects_by_name(id, 'attack-ics')
|
100
|
+
|
101
|
+
### ATT&CK by Alias
|
102
|
+
case "mitre-attack-enterprise-aliases":
|
103
|
+
return retreiver.get_objects_by_alias(id, 'attack-enterprise')
|
104
|
+
case "mitre-attack-mobile-aliases":
|
105
|
+
return retreiver.get_objects_by_alias(id, 'attack-mobile')
|
106
|
+
case "mitre-attack-ics-aliases":
|
107
|
+
return retreiver.get_objects_by_alias(id, 'attack-ics')
|
108
|
+
|
109
|
+
### OTHERS by Name
|
110
|
+
case "mitre-capec-name":
|
111
|
+
return retreiver.get_objects_by_name(id, 'capec')
|
112
|
+
case "mitre-cwe-name":
|
113
|
+
return retreiver.get_objects_by_name(id, 'cwe')
|
114
|
+
case "mitre-atlas-name":
|
115
|
+
return retreiver.get_objects_by_name(id, 'atlas')
|
116
|
+
case "disarm-name":
|
117
|
+
return retreiver.get_objects_by_name(id, 'disarm')
|
118
|
+
case _:
|
119
|
+
raise NotImplementedError(f"pair {(host, object_path)=} not implemented")
|
120
|
+
except (NotImplementedError, ValueError):
|
121
|
+
pass
|
122
|
+
except Exception as e:
|
123
|
+
msg = f"failed to get {object_path} for {id} from {host}"
|
124
|
+
logging.info(msg)
|
125
|
+
logging.debug(msg, exc_info=True)
|
126
|
+
return None
|
txt2stix/stix.py
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
from txt2stix.bundler import *
|