txt2stix 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. txt2stix/__init__.py +33 -0
  2. txt2stix/ai_extractor/__init__.py +15 -0
  3. txt2stix/ai_extractor/anthropic.py +12 -0
  4. txt2stix/ai_extractor/base.py +87 -0
  5. txt2stix/ai_extractor/deepseek.py +19 -0
  6. txt2stix/ai_extractor/gemini.py +18 -0
  7. txt2stix/ai_extractor/openai.py +15 -0
  8. txt2stix/ai_extractor/openrouter.py +20 -0
  9. txt2stix/ai_extractor/prompts.py +164 -0
  10. txt2stix/ai_extractor/utils.py +85 -0
  11. txt2stix/attack_flow.py +101 -0
  12. txt2stix/bundler.py +428 -0
  13. txt2stix/common.py +23 -0
  14. txt2stix/extractions.py +59 -0
  15. txt2stix/includes/__init__.py +0 -0
  16. txt2stix/includes/extractions/ai/config.yaml +1023 -0
  17. txt2stix/includes/extractions/lookup/config.yaml +393 -0
  18. txt2stix/includes/extractions/pattern/config.yaml +609 -0
  19. txt2stix/includes/helpers/mimetype_filename_extension_list.csv +936 -0
  20. txt2stix/includes/helpers/stix_relationship_types.txt +41 -0
  21. txt2stix/includes/helpers/tlds.txt +1446 -0
  22. txt2stix/includes/helpers/windows_registry_key_prefix.txt +12 -0
  23. txt2stix/includes/lookups/_README.md +11 -0
  24. txt2stix/includes/lookups/_generate_lookups.py +247 -0
  25. txt2stix/includes/lookups/attack_pattern.txt +1 -0
  26. txt2stix/includes/lookups/campaign.txt +1 -0
  27. txt2stix/includes/lookups/country_iso3166_alpha2.txt +249 -0
  28. txt2stix/includes/lookups/course_of_action.txt +1 -0
  29. txt2stix/includes/lookups/disarm_id_v1_5.txt +345 -0
  30. txt2stix/includes/lookups/disarm_name_v1_5.txt +347 -0
  31. txt2stix/includes/lookups/extensions.txt +78 -0
  32. txt2stix/includes/lookups/identity.txt +1 -0
  33. txt2stix/includes/lookups/infrastructure.txt +1 -0
  34. txt2stix/includes/lookups/intrusion_set.txt +1 -0
  35. txt2stix/includes/lookups/malware.txt +2 -0
  36. txt2stix/includes/lookups/mitre_atlas_id_v4_5_2.txt +116 -0
  37. txt2stix/includes/lookups/mitre_atlas_name_v4_5_2.txt +117 -0
  38. txt2stix/includes/lookups/mitre_attack_enterprise_aliases_v16_0.txt +1502 -0
  39. txt2stix/includes/lookups/mitre_attack_enterprise_id_v16_0.txt +1656 -0
  40. txt2stix/includes/lookups/mitre_attack_enterprise_name_v16_0.txt +1765 -0
  41. txt2stix/includes/lookups/mitre_attack_ics_aliases_v16_0.txt +141 -0
  42. txt2stix/includes/lookups/mitre_attack_ics_id_v16_0.txt +254 -0
  43. txt2stix/includes/lookups/mitre_attack_ics_name_v16_0.txt +293 -0
  44. txt2stix/includes/lookups/mitre_attack_mobile_aliases_v16_0.txt +159 -0
  45. txt2stix/includes/lookups/mitre_attack_mobile_id_v16_0.txt +277 -0
  46. txt2stix/includes/lookups/mitre_attack_mobile_name_v16_0.txt +296 -0
  47. txt2stix/includes/lookups/mitre_capec_id_v3_9.txt +559 -0
  48. txt2stix/includes/lookups/mitre_capec_name_v3_9.txt +560 -0
  49. txt2stix/includes/lookups/mitre_cwe_id_v4_15.txt +939 -0
  50. txt2stix/includes/lookups/mitre_cwe_name_v4_15.txt +939 -0
  51. txt2stix/includes/lookups/threat_actor.txt +1 -0
  52. txt2stix/includes/lookups/tld.txt +1422 -0
  53. txt2stix/includes/lookups/tool.txt +1 -0
  54. txt2stix/includes/tests/test_cases.yaml +695 -0
  55. txt2stix/indicator.py +860 -0
  56. txt2stix/lookups.py +68 -0
  57. txt2stix/pattern/__init__.py +13 -0
  58. txt2stix/pattern/extractors/__init__.py +0 -0
  59. txt2stix/pattern/extractors/base_extractor.py +167 -0
  60. txt2stix/pattern/extractors/card/README.md +34 -0
  61. txt2stix/pattern/extractors/card/__init__.py +15 -0
  62. txt2stix/pattern/extractors/card/amex_card_extractor.py +52 -0
  63. txt2stix/pattern/extractors/card/diners_card_extractor.py +47 -0
  64. txt2stix/pattern/extractors/card/discover_card_extractor.py +48 -0
  65. txt2stix/pattern/extractors/card/jcb_card_extractor.py +43 -0
  66. txt2stix/pattern/extractors/card/master_card_extractor.py +63 -0
  67. txt2stix/pattern/extractors/card/union_card_extractor.py +38 -0
  68. txt2stix/pattern/extractors/card/visa_card_extractor.py +46 -0
  69. txt2stix/pattern/extractors/crypto/__init__.py +3 -0
  70. txt2stix/pattern/extractors/crypto/btc_extractor.py +38 -0
  71. txt2stix/pattern/extractors/directory/__init__.py +10 -0
  72. txt2stix/pattern/extractors/directory/unix_directory_extractor.py +40 -0
  73. txt2stix/pattern/extractors/directory/unix_file_path_extractor.py +42 -0
  74. txt2stix/pattern/extractors/directory/windows_directory_path_extractor.py +47 -0
  75. txt2stix/pattern/extractors/directory/windows_file_path_extractor.py +42 -0
  76. txt2stix/pattern/extractors/domain/__init__.py +8 -0
  77. txt2stix/pattern/extractors/domain/domain_extractor.py +39 -0
  78. txt2stix/pattern/extractors/domain/hostname_extractor.py +36 -0
  79. txt2stix/pattern/extractors/domain/sub_domain_extractor.py +49 -0
  80. txt2stix/pattern/extractors/hashes/__init__.py +16 -0
  81. txt2stix/pattern/extractors/hashes/md5_extractor.py +16 -0
  82. txt2stix/pattern/extractors/hashes/sha1_extractor.py +14 -0
  83. txt2stix/pattern/extractors/hashes/sha224_extractor.py +18 -0
  84. txt2stix/pattern/extractors/hashes/sha2_256_exactor.py +14 -0
  85. txt2stix/pattern/extractors/hashes/sha2_512_exactor.py +13 -0
  86. txt2stix/pattern/extractors/hashes/sha3_256_exactor.py +15 -0
  87. txt2stix/pattern/extractors/hashes/sha3_512_exactor.py +16 -0
  88. txt2stix/pattern/extractors/helper.py +64 -0
  89. txt2stix/pattern/extractors/ip/__init__.py +14 -0
  90. txt2stix/pattern/extractors/ip/ipv4_cidr_extractor.py +49 -0
  91. txt2stix/pattern/extractors/ip/ipv4_extractor.py +18 -0
  92. txt2stix/pattern/extractors/ip/ipv4_port_extractor.py +42 -0
  93. txt2stix/pattern/extractors/ip/ipv6_cidr_extractor.py +18 -0
  94. txt2stix/pattern/extractors/ip/ipv6_extractor.py +16 -0
  95. txt2stix/pattern/extractors/ip/ipv6_port_extractor.py +46 -0
  96. txt2stix/pattern/extractors/others/__init__.py +22 -0
  97. txt2stix/pattern/extractors/others/asn_extractor.py +14 -0
  98. txt2stix/pattern/extractors/others/cpe_extractor.py +29 -0
  99. txt2stix/pattern/extractors/others/cve_extractor.py +14 -0
  100. txt2stix/pattern/extractors/others/email_extractor.py +21 -0
  101. txt2stix/pattern/extractors/others/filename_extractor.py +17 -0
  102. txt2stix/pattern/extractors/others/iban_extractor.py +15 -0
  103. txt2stix/pattern/extractors/others/mac_address_extractor.py +13 -0
  104. txt2stix/pattern/extractors/others/phonenumber_extractor.py +41 -0
  105. txt2stix/pattern/extractors/others/user_agent_extractor.py +20 -0
  106. txt2stix/pattern/extractors/others/windows_registry_key_extractor.py +18 -0
  107. txt2stix/pattern/extractors/url/__init__.py +7 -0
  108. txt2stix/pattern/extractors/url/url_extractor.py +22 -0
  109. txt2stix/pattern/extractors/url/url_file_extractor.py +21 -0
  110. txt2stix/pattern/extractors/url/url_path_extractor.py +74 -0
  111. txt2stix/retriever.py +126 -0
  112. txt2stix/stix.py +1 -0
  113. txt2stix/txt2stix.py +336 -0
  114. txt2stix/utils.py +86 -0
  115. txt2stix-0.0.4.dist-info/METADATA +190 -0
  116. txt2stix-0.0.4.dist-info/RECORD +119 -0
  117. txt2stix-0.0.4.dist-info/WHEEL +4 -0
  118. txt2stix-0.0.4.dist-info/entry_points.txt +2 -0
  119. txt2stix-0.0.4.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,42 @@
1
+ from ..base_extractor import BaseExtractor
2
+ from ipaddress import IPv4Address
3
+
4
+
5
+ class IPv4WithPortExtractor(BaseExtractor):
6
+ """
7
+ A class for extracting valid IPv4 addresses with ports from text using a custom extraction function.
8
+
9
+ Attributes:
10
+ name (str): The name of the extractor, set to "ipv4_port".
11
+ extraction_function (function): The custom extraction function to validate and extract IPv4 addresses with ports.
12
+ """
13
+
14
+ name = "pattern_ipv4_address_port"
15
+ extraction_function = lambda x: IPv4WithPortExtractor.validate_ipv4_with_port(x)
16
+
17
+ @staticmethod
18
+ def validate_ipv4_with_port(x):
19
+ """
20
+ Custom extraction function to validate if the provided string is a valid IPv4 address with a port.
21
+
22
+ Args:
23
+ x (str): The string to be checked.
24
+
25
+ Returns:
26
+ tuple: A tuple containing the extracted IPv4 address and port if valid, False otherwise.
27
+ """
28
+ x = x.strip('"')
29
+ if ":" in x and x.count(":") == 1:
30
+ ip_address, port = x.split(":")
31
+
32
+ try:
33
+ # Validate the IPv4 address part.
34
+ IPv4Address(ip_address)
35
+
36
+ # Validate the port part.
37
+ if 1 <= int(port) <= 65535:
38
+ return ip_address, port
39
+ except ValueError:
40
+ pass
41
+
42
+ return False
@@ -0,0 +1,18 @@
1
+ # import ipaddress
2
+ from ipaddress import IPv6Address
3
+
4
+ import validators
5
+ from ..base_extractor import BaseExtractor
6
+
7
+
8
+ class IPv6WithCIDRExtractor(BaseExtractor):
9
+ """
10
+ A class for extracting valid IPv6 addresses with ports from text using a custom extraction function.
11
+
12
+ Attributes:
13
+ name (str): The name of the extractor, set to "ipv6_port".
14
+ extraction_function (function): The custom extraction function to validate and extract IPv6 addresses with ports.
15
+ """
16
+
17
+ name = "pattern_ipv6_address_cidr"
18
+ extraction_function = lambda ipaddress: validators.ipv6(ipaddress, strict=True, cidr=True)
@@ -0,0 +1,16 @@
1
+ import validators
2
+ from ..base_extractor import BaseExtractor
3
+ import ipaddress
4
+
5
+
6
+ class IPv6Extractor(BaseExtractor):
7
+ """
8
+ A class for extracting valid IPv6 addresses from text using a custom extraction function.
9
+
10
+ Attributes:
11
+ name (str): The name of the extractor, set to "ipv6".
12
+ extraction_function (function): The custom extraction function to validate and extract IPv6 addresses.
13
+ """
14
+
15
+ name = "pattern_ipv6_address_only"
16
+ extraction_function = lambda ipaddress: validators.ipv6(ipaddress, strict=True, cidr=False)
@@ -0,0 +1,46 @@
1
+ import re
2
+ from ipaddress import IPv6Address
3
+ from ..base_extractor import BaseExtractor
4
+
5
+ class IPv6WithPortExtractor(BaseExtractor):
6
+ """
7
+ A class for extracting valid IPv6 addresses with ports from text using a custom extraction function.
8
+
9
+ Attributes:
10
+ name (str): The name of the extractor, set to "ipv6_port".
11
+ extraction_function (function): The custom extraction function to validate and extract IPv6 addresses with ports.
12
+ """
13
+
14
+ name = "pattern_ipv6_address_port"
15
+ extraction_function = lambda x: IPv6WithPortExtractor.validate_ipv6_with_port(x)
16
+
17
+ @staticmethod
18
+ def validate_ipv6_with_port(x):
19
+ """
20
+ Custom extraction function to validate if the provided string is a valid IPv6 address with a port.
21
+
22
+ Args:
23
+ x (str): The string to be checked.
24
+
25
+ Returns:
26
+ tuple: A tuple containing the extracted IPv6 address and port if valid, False otherwise.
27
+ """
28
+ if ":" in x:
29
+ # Use regex to extract the IPv6 address and port.
30
+ match = re.match(r"\[(.*)\]:(.*)", x)
31
+ print([x, match])
32
+ if match:
33
+ ip_address, port = match.groups()
34
+
35
+ try:
36
+ # Validate the IPv6 address part.
37
+ ip = IPv6Address(ip_address)
38
+ print("yes", ip.exploded)
39
+
40
+ # Validate the port part.
41
+ if 1 <= int(port) <= 65535:
42
+ return ip_address, port
43
+ except ValueError:
44
+ pass
45
+
46
+ return False
@@ -0,0 +1,22 @@
1
+ from .asn_extractor import ASNExtractor
2
+ from .cpe_extractor import CPEExtractor
3
+ from .cve_extractor import CVEExtractor
4
+ from .email_extractor import EmailAddressExtractor
5
+ from .filename_extractor import FileNameExtractor
6
+ from .iban_extractor import IBANExtractor
7
+ from .mac_address_extractor import MacAddressExtractor
8
+ from .phonenumber_extractor import PhoneNumberExtractor
9
+ from .user_agent_extractor import UserAgentBaseExtractor
10
+ from .windows_registry_key_extractor import WindowsRegistryKeyExtractor
11
+
12
+ OTHER_EXTRACTORS = [ASNExtractor,
13
+ CPEExtractor,
14
+ CVEExtractor,
15
+ EmailAddressExtractor,
16
+ FileNameExtractor,
17
+ IBANExtractor,
18
+ MacAddressExtractor,
19
+ PhoneNumberExtractor,
20
+ UserAgentBaseExtractor,
21
+ WindowsRegistryKeyExtractor
22
+ ]
@@ -0,0 +1,14 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+
4
+ class ASNExtractor(BaseExtractor):
5
+ """
6
+ A class for extracting Autonomous System Numbers (ASNs) from text using a regular expression.
7
+
8
+ Attributes:
9
+ name (str): The name of the extractor, set to "asn".
10
+ extraction_regex (str): The regular expression pattern used for extracting ASNs from the text.
11
+ """
12
+
13
+ name = "pattern_autonomous_system_number"
14
+ extraction_regex = r"\b(?:ASN?)(?: )?(\d{1,5})\b"
@@ -0,0 +1,29 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+
4
+ class CPEExtractor(BaseExtractor):
5
+ """
6
+ A class for extracting Common Platform Enumeration (CPE) strings from text using a custom extraction function.
7
+
8
+ Attributes:
9
+ name (str): The name of the extractor, set to "cpe".
10
+ extraction_function (function): The custom extraction function to validate and extract CPE strings.
11
+ """
12
+
13
+ name = "pattern_cpe_uri"
14
+ extraction_function = lambda x: CPEExtractor.is_valid_cpe(x)
15
+
16
+ @staticmethod
17
+ def is_valid_cpe(cpe_string):
18
+ """
19
+ Custom extraction function to validate if the provided string is a valid CPE string.
20
+
21
+ Args:
22
+ cpe_string (str): The string to be checked.
23
+
24
+ Returns:
25
+ bool: True if the CPE string is valid, False otherwise.
26
+ """
27
+
28
+ if cpe_string.startswith('cpe:') and cpe_string.count(':') == 12:
29
+ return True
@@ -0,0 +1,14 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+
4
+ class CVEExtractor(BaseExtractor):
5
+ """
6
+ A class for extracting Common Vulnerabilities and Exposures (CVE) identifiers from text using a regular expression.
7
+
8
+ Attributes:
9
+ name (str): The name of the extractor, set to "cve".
10
+ extraction_regex (str): The regular expression pattern used for extracting CVE identifiers from the text.
11
+ """
12
+
13
+ name = "pattern_cve_id"
14
+ extraction_regex = r'\bCVE-\d{4}-\d{4,5}\b'
@@ -0,0 +1,21 @@
1
+ from ..base_extractor import BaseExtractor
2
+ from ..helper import TLDs
3
+
4
+
5
+ class EmailAddressExtractor(BaseExtractor):
6
+ """
7
+ A class for extracting valid email addresses from text using a custom extraction function.
8
+
9
+ Attributes:
10
+ name (str): The name of the extractor, set to "email".
11
+ extraction_regex (function): The custom extraction function to validate and extract email addresses.
12
+ """
13
+ name = "pattern_email_address"
14
+ extraction_regex = r'[\w.+-]+@[\w-]+\.[\w.-]+'
15
+
16
+ @staticmethod
17
+ def filter_function(email):
18
+ x = email.split("@")
19
+ domain = x[-1].split(".")[-1]
20
+ if domain in TLDs:
21
+ return True
@@ -0,0 +1,17 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+ class FileNameExtractor(BaseExtractor):
4
+ """
5
+ A class for extracting file names with suspicious file extensions from text using a regular expression.
6
+
7
+ Attributes:
8
+ name (str): The name of the extractor, set to "file_name".
9
+ file_extensions (str): A string containing the regex pattern for suspicious file extensions.
10
+ extraction_regex (str): The regular expression pattern used for extracting file names with suspicious extensions.
11
+ """
12
+
13
+ name = "pattern_file_name"
14
+ file_extensions = "(?:(?:7(?:Z|z))|(?:AP(?:K|P))|(?:B(?:AT|IN|MP))|(?:C(?:LASS|AB|ER|GI|HM|MD|RX))|(?:D(?:OCX?|EB|LL))|EXE|FLV|(?:G(?:ADGET|IF|Z))|INF|(?:J(?:A(?:VA|R)|PG|S))|(?:L(?:NK|OG))|(?:M(?:O(?:F|V)|P(?:4|G)|S(?:G|I)|4V))|ODT|(?:P(?:LUGIN|PTX?|7S|DF|HP|NG|SD|F|Y))|(?:R(?:AR|PM))|(?:S(?:VG|WF|YS|O))|(?:T(?:IFF?|AR|GZ|MP|XT))|(?:V(?:BS|IR))|(?:W(?:MV|SF))|XLSX?|ZIPX?|(?:ap(?:k|p))|(?:b(?:at|in|mp))|(?:c(?:lass|ab|er|gi|hm|md|rx))|(?:d(?:ocx?|eb|ll))|exe|flv|(?:g(?:adget|if|z))|inf|(?:j(?:a(?:va|r)|pg|s))|(?:l(?:nk|og))|(?:m(?:o(?:f|v)|p(?:4|g)|s(?:g|i)|4v))|odt|(?:p(?:lugin|ptx?|7s|df|hp|ng|sd|f|y))|(?:r(?:ar|pm))|(?:s(?:vg|wf|ys|o))|(?:t(?:iff?|ar|gz|mp|xt))|(?:v(?:bs|ir))|(?:w(?:mv|sf))|xlsx?|zipx?)"
15
+ extraction_regex = rf"(?!['\"])([^\\/:\*\?\"\<\>\|\s]*)\.({file_extensions})(?!\()"
16
+
17
+
@@ -0,0 +1,15 @@
1
+ from validators import iban
2
+ from ..base_extractor import BaseExtractor
3
+
4
+
5
+ class IBANExtractor(BaseExtractor):
6
+ """
7
+ A class for extracting International Bank Account Number (IBAN) codes from text using a custom extraction function.
8
+
9
+ Attributes:
10
+ name (str): The name of the extractor, set to "IBAN".
11
+ extraction_function (function): The custom extraction function to validate and extract IBAN codes.
12
+ """
13
+
14
+ name = "pattern_iban_number"
15
+ extraction_function = lambda x: iban(x)
@@ -0,0 +1,13 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+ class MacAddressExtractor(BaseExtractor):
4
+ """
5
+ A class for extracting MAC addresses from text using a regular expression.
6
+
7
+ Attributes:
8
+ name (str): The name of the extractor, set to "mac_address".
9
+ extraction_regex (str): The regular expression pattern used for extracting MAC addresses from the text.
10
+ """
11
+
12
+ name = "pattern_mac_address"
13
+ extraction_regex = r'([0-9A-Fa-f]{2}[:-]){5}([0-9A-Fa-f]{2})'
@@ -0,0 +1,41 @@
1
+ import re
2
+
3
+ import phonenumbers
4
+ from ..base_extractor import BaseExtractor
5
+
6
+
7
+ class PhoneNumberExtractor(BaseExtractor):
8
+ """
9
+ A class for extracting phone numbers from text using a regular expression.
10
+
11
+ Attributes:
12
+ name (str): The name of the extractor, set to "phone_number".
13
+ extraction_regex (str): The regular expression pattern used for extracting phone numbers from the text.
14
+ """
15
+
16
+ name = "pattern_phone_number"
17
+ extraction_regex = r'((\+|00)\d{1,3}[ \-]?\d{1,5}[ \-]?\d{1,5}[ \-]?\d{1,5})'
18
+
19
+ @staticmethod
20
+ def validate_phone_number(regex, phone_number):
21
+ match = re.fullmatch(regex, phone_number)
22
+ return match
23
+
24
+ @staticmethod
25
+ def filter_function(input_string):
26
+ input_string = input_string.replace(" ", "")
27
+ if len(input_string) >= 15 or len(input_string) <= 7:
28
+ return False
29
+ return PhoneNumberExtractor.parse_phone_number(input_string)
30
+
31
+ @staticmethod
32
+ def parse_phone_number(phone_number: str):
33
+ try:
34
+ phone_number = '+' + phone_number.replace(' ', '').removeprefix('00').removeprefix('+')
35
+ phone = phonenumbers.parse(phone_number, None)
36
+ if not phonenumbers.is_valid_number(phone):
37
+ return None
38
+ return phone
39
+ except:
40
+ return None
41
+
@@ -0,0 +1,20 @@
1
+ from ..base_extractor import BaseExtractor
2
+
3
+
4
+ class UserAgentBaseExtractor(BaseExtractor):
5
+ """
6
+ A class for extracting user agent strings from text using a regular expression.
7
+
8
+ Attributes:
9
+ name (str): The name of the extractor, set to "user_agent".
10
+ platforms (str): The regex pattern to match the user agent platform name.
11
+ user_agent_details (str): The regex pattern to match additional user agent details within parentheses.
12
+ user_agent (str): The regex pattern to match the entire user agent string.
13
+ extraction_regex (str): The regular expression pattern used for extracting user agent strings from the text.
14
+ """
15
+
16
+ name = "pattern_user_agent"
17
+ platforms = r"([a-zA-Z]+)"
18
+ user_agent_details = r"\([\w;\s\,.:-]+\)"
19
+ user_agent = rf"((User-Agent: )|(user-agent: ))?Mozilla/5.0([ ](({user_agent_details})|(({platforms}/)[^\s\"\',]+)))+"
20
+ extraction_regex = r"Mozilla\/\d+\.\d+(\s+\([^\)]+\))?"
@@ -0,0 +1,18 @@
1
+ import re
2
+ from ..base_extractor import BaseExtractor
3
+
4
+
5
+ class WindowsRegistryKeyExtractor(BaseExtractor):
6
+ """
7
+ A class for extracting valid Windows Registry keys from text using a regular expression.
8
+
9
+ Attributes:
10
+ name (str): The name of the extractor, set to "windows_registry_key".
11
+ valid_root_keys (str): The valid root keys of the Windows Registry.
12
+ extraction_regex (str): The regular expression pattern used for extracting Windows Registry keys from the text.
13
+ """
14
+
15
+ name = "pattern_windows_registry_key"
16
+ valid_root_keys = ['HKEY_CLASSES_ROOT', 'HKCR', 'HKEY_CURRENT_USER', 'HKCU', 'HKEY_LOCAL_MACHINE', 'HKLM', 'HKEY_USERS', 'HKU', 'HKEY_CURRENT_CONFIG', 'HKCC', 'HKEY_PERFORMANCE_DATA', 'HKEY_DYN_DATA']
17
+ prefix_regex = r'(?:' + '|'.join(re.escape(item) for item in valid_root_keys) + r')'
18
+ extraction_regex = rf'\b({prefix_regex}[\\\w]+)\b'
@@ -0,0 +1,7 @@
1
+ from .url_file_extractor import URLFileExtractor
2
+ from .url_path_extractor import URLPathExtractor
3
+ from .url_extractor import URLExtractor
4
+
5
+ URL_EXTRACTORS = [URLFileExtractor,
6
+ URLPathExtractor
7
+ ]
@@ -0,0 +1,22 @@
1
+
2
+ from .url_path_extractor import URLPathExtractor
3
+
4
+
5
+
6
+ class URLExtractor(URLPathExtractor):
7
+ """
8
+ A class for extracting valid URLs from text using a combination of regular expressions and validation functions.
9
+
10
+ Attributes:
11
+ name (str): The name of the extractor, set to "url".
12
+ extraction_function (function): The extraction function that validates and extracts URLs from the given text.
13
+ """
14
+
15
+ name = "pattern_url"
16
+ filter_function = lambda url: not URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url)
17
+
18
+
19
+ class HostnameUrlExtractor(URLExtractor):
20
+ name = "pattern_host_name_url"
21
+
22
+ filter_function = lambda url: not URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url, validate_tld=False)
@@ -0,0 +1,21 @@
1
+
2
+ from ..helper import validate_file_extension
3
+ from .url_path_extractor import URLPathExtractor
4
+
5
+
6
+ class URLFileExtractor(URLPathExtractor):
7
+ """
8
+ A class for extracting valid URLs from text using a combination of regular expressions and validation functions.
9
+
10
+ Attributes:
11
+ name (str): The name of the extractor, set to "url".
12
+ extraction_function (function): The extraction function that validates and extracts URLs from the given text.
13
+ """
14
+
15
+ name = "pattern_url_file"
16
+ filter_function = lambda url: URLFileExtractor.is_path(url) and URLPathExtractor.validate_host(url) and validate_file_extension(url)
17
+
18
+
19
+ class HostnameFileExtractor(URLFileExtractor):
20
+ name = "pattern_host_name_file"
21
+ filter_function = lambda url: URLFileExtractor.is_path(url) and validate_file_extension(url) and URLPathExtractor.validate_host(url, validate_tld=False)
@@ -0,0 +1,74 @@
1
+ import tldextract
2
+ import validators
3
+
4
+ from txt2stix import utils
5
+
6
+ from ..base_extractor import BaseExtractor
7
+ from ..helper import check_false_positive_domain, validate_file_extension
8
+ from urllib.parse import urlparse
9
+ from ipaddress import ip_address
10
+
11
+
12
+ class URLPathExtractor(BaseExtractor):
13
+ """
14
+ URLPathExtractor is a class that extracts URLs from input data using a simple validation mechanism.
15
+
16
+ Attributes:
17
+ name (str): The name of the extractor.
18
+ extraction_function (function): The function to extract URLs.
19
+ """
20
+ name = "pattern_url_path"
21
+ extraction_function = lambda url: URLPathExtractor.is_valid_url(url)
22
+ filter_function = lambda url: URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url) and not validate_file_extension(url)
23
+
24
+ @classmethod
25
+ def is_path(cls, url):
26
+ path = urlparse(url).path
27
+ if path and path != "/":
28
+ return True
29
+ return False
30
+
31
+ @classmethod
32
+ def validate_host(cls, url, validate_tld=True):
33
+ uri = urlparse(url)
34
+ if not validators.hostname(uri.hostname):
35
+ return False
36
+ if validate_tld and not cls.is_ip_address(uri.hostname):
37
+ return utils.validate_tld(uri.hostname)
38
+ return True
39
+
40
+ @staticmethod
41
+ def is_ip_address(address):
42
+ try:
43
+ ip_address(address)
44
+ return True
45
+ except:
46
+ return False
47
+ @staticmethod
48
+ def is_valid_url(url):
49
+ """
50
+ Checks if a given URL is valid and does not point to a file.
51
+
52
+ Args:
53
+ url (str): The URL to be validated.
54
+
55
+ Returns:
56
+ bool: True if the URL is valid and doesn't point to a file, False otherwise.
57
+ """
58
+ try:
59
+ # Check if "http" or "www" is present in the URL
60
+ if validators.url(url):
61
+ extracted_domain = tldextract.extract(url)
62
+ if check_false_positive_domain(extracted_domain.domain):
63
+ return True
64
+ except Exception as e:
65
+ # An exception occurred, consider the URL invalid
66
+ return False
67
+
68
+ # Default case: URL is not valid or doesn't meet the conditions
69
+ return False
70
+
71
+
72
+ class HostnamePathExtractor(URLPathExtractor):
73
+ name = "pattern_host_name_path"
74
+ filter_function = lambda url: URLPathExtractor.is_path(url) and URLPathExtractor.validate_host(url, validate_tld=False) and not validate_file_extension(url)
txt2stix/retriever.py ADDED
@@ -0,0 +1,126 @@
1
+ import logging
2
+ from urllib.parse import urljoin
3
+ import dotenv, os
4
+ import stix2
5
+ import requests
6
+
7
+ dotenv.load_dotenv()
8
+
9
+
10
+ class STIXObjectRetriever:
11
+ def __init__(self, host="ctibutler") -> None:
12
+ if host == "ctibutler":
13
+ self.api_root = os.environ['CTIBUTLER_BASE_URL'] + '/'
14
+ self.api_key = os.environ.get('CTIBUTLER_API_KEY')
15
+ elif host == "vulmatch":
16
+ self.api_root = os.environ['VULMATCH_BASE_URL'] + '/'
17
+ self.api_key = os.environ.get('VULMATCH_API_KEY')
18
+ else:
19
+ raise NotImplementedError("The type `%s` is not supported", host)
20
+
21
+ def get_attack_object(self, matrix, attack_id):
22
+ endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/{attack_id}/")
23
+ return self._retrieve_objects(endpoint)
24
+
25
+ def get_attack_objects(self, matrix, attack_ids):
26
+ endpoint = urljoin(self.api_root, f"v1/attack-{matrix}/objects/?attack_id={','.join(attack_ids)}")
27
+ return self._retrieve_objects(endpoint)
28
+
29
+ def get_objects_by_id(self, id, type):
30
+ return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/{id}/"))
31
+
32
+ def get_location_objects(self, id):
33
+ return self._retrieve_objects(urljoin(self.api_root, f"v1/location/objects/?alpha2_code={id}"))
34
+
35
+ def get_objects_by_name(self, name, type):
36
+ return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/?name={name}"))
37
+
38
+ def get_objects_by_alias(self, alias, type):
39
+ return self._retrieve_objects(urljoin(self.api_root, f"v1/{type}/objects/?alias={alias}"))
40
+
41
+ def _retrieve_objects(self, endpoint, key='objects'):
42
+ s = requests.Session()
43
+ s.headers.update({
44
+ "API-KEY": self.api_key,
45
+ })
46
+ data = []
47
+ page = 1
48
+ while True:
49
+ resp = s.get(endpoint, params=dict(page=page, page_size=50))
50
+ resp.raise_for_status()
51
+ d = resp.json()
52
+ if len(d[key]) == 0:
53
+ break
54
+ data.extend(d[key])
55
+ page+=1
56
+ if d['page_results_count'] < d['page_size']:
57
+ break
58
+ return data
59
+
60
+ def retrieve_stix_objects(stix_mapping: str, id, host=None):
61
+ try:
62
+ object_path = stix_mapping
63
+ if stix_mapping in ['location']:
64
+ host = 'ctibutler'
65
+ if not host:
66
+ host, object_path = stix_mapping.split('-', 1)
67
+ retreiver = STIXObjectRetriever(host)
68
+ match object_path:
69
+ ### ATT&CK by ID
70
+ case 'mitre-attack-ics-id':
71
+ return retreiver.get_attack_object('ics', id)
72
+ case 'mitre-attack-mobile-id':
73
+ return retreiver.get_attack_object('mobile', id)
74
+ case 'mitre-attack-enterprise-id':
75
+ return retreiver.get_attack_object('enterprise', id)
76
+
77
+ ### Others by ID
78
+ case "mitre-capec-id":
79
+ return retreiver.get_objects_by_id(id, 'capec')
80
+ case "mitre-atlas-id":
81
+ return retreiver.get_objects_by_id(id, 'atlas')
82
+ case "disarm-id":
83
+ return retreiver.get_objects_by_id(id, 'disarm')
84
+ case "mitre-cwe-id":
85
+ return retreiver.get_objects_by_id(id, 'cwe')
86
+ case "cve-id":
87
+ return retreiver.get_objects_by_id(id, 'cve')
88
+ case "cpe-id":
89
+ return retreiver.get_objects_by_id(id, 'cpe')
90
+ case "location":
91
+ return retreiver.get_location_objects(id)
92
+
93
+ ### ATT&CK by Name
94
+ case "mitre-attack-enterprise-name":
95
+ return retreiver.get_objects_by_name(id, 'attack-enterprise')
96
+ case "mitre-attack-mobile-name":
97
+ return retreiver.get_objects_by_name(id, 'attack-mobile')
98
+ case "mitre-attack-ics-name":
99
+ return retreiver.get_objects_by_name(id, 'attack-ics')
100
+
101
+ ### ATT&CK by Alias
102
+ case "mitre-attack-enterprise-aliases":
103
+ return retreiver.get_objects_by_alias(id, 'attack-enterprise')
104
+ case "mitre-attack-mobile-aliases":
105
+ return retreiver.get_objects_by_alias(id, 'attack-mobile')
106
+ case "mitre-attack-ics-aliases":
107
+ return retreiver.get_objects_by_alias(id, 'attack-ics')
108
+
109
+ ### OTHERS by Name
110
+ case "mitre-capec-name":
111
+ return retreiver.get_objects_by_name(id, 'capec')
112
+ case "mitre-cwe-name":
113
+ return retreiver.get_objects_by_name(id, 'cwe')
114
+ case "mitre-atlas-name":
115
+ return retreiver.get_objects_by_name(id, 'atlas')
116
+ case "disarm-name":
117
+ return retreiver.get_objects_by_name(id, 'disarm')
118
+ case _:
119
+ raise NotImplementedError(f"pair {(host, object_path)=} not implemented")
120
+ except (NotImplementedError, ValueError):
121
+ pass
122
+ except Exception as e:
123
+ msg = f"failed to get {object_path} for {id} from {host}"
124
+ logging.info(msg)
125
+ logging.debug(msg, exc_info=True)
126
+ return None
txt2stix/stix.py ADDED
@@ -0,0 +1 @@
1
+ from txt2stix.bundler import *