surblclient 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- surblclient/__init__.py +47 -0
- surblclient/blacklist.py +138 -0
- surblclient/spamhausdbl.py +89 -0
- surblclient/surbl-three-level-tlds +4684 -0
- surblclient/surbl-two-level-tlds +9574 -0
- surblclient/surbl.py +58 -0
- surblclient/uribl.py +33 -0
- surblclient-0.2.0.dist-info/METADATA +157 -0
- surblclient-0.2.0.dist-info/RECORD +10 -0
- surblclient-0.2.0.dist-info/WHEEL +4 -0
surblclient/__init__.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""SURBL checker (http://www.surbl.org/)
|
|
3
|
+
|
|
4
|
+
Example usage:
|
|
5
|
+
>>> from surblclient import surbl
|
|
6
|
+
>>> domain = "foo.bar.test.surbl.org"
|
|
7
|
+
>>> domain in surbl
|
|
8
|
+
True
|
|
9
|
+
>>> surbl.lookup(domain)
|
|
10
|
+
('test.surbl.org', ['ph', 'mw', 'abuse', 'cr'])
|
|
11
|
+
>>> if domain in surbl:
|
|
12
|
+
... base, lists = surbl.lookup(domain)
|
|
13
|
+
... print(f"{base} blacklisted in {lists}")
|
|
14
|
+
...
|
|
15
|
+
test.surbl.org blacklisted in ['ph', 'mw', 'abuse', 'cr']
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Copyright (c) 2026 Filip Salo
|
|
19
|
+
#
|
|
20
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
21
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
22
|
+
# in the Software without restriction, including without limitation the rights
|
|
23
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
24
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
25
|
+
# furnished to do so, subject to the following conditions:
|
|
26
|
+
#
|
|
27
|
+
# The above copyright notice and this permission notice shall be included in
|
|
28
|
+
# all copies or substantial portions of the Software.
|
|
29
|
+
#
|
|
30
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
31
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
32
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
33
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
34
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
35
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
36
|
+
# THE SOFTWARE.
|
|
37
|
+
|
|
38
|
+
from .blacklist import Blacklist # noqa: F401
|
|
39
|
+
from .spamhausdbl import SpamhausDBL
|
|
40
|
+
from .surbl import SURBL
|
|
41
|
+
from .uribl import URIBL
|
|
42
|
+
|
|
43
|
+
VERSION = "0.2.0"
|
|
44
|
+
|
|
45
|
+
surbl = SURBL()
|
|
46
|
+
uribl = URIBL()
|
|
47
|
+
spamhausdbl = SpamhausDBL()
|
surblclient/blacklist.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Filip Salo
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
# furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
# THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
"""Main class for the blacklists"""
|
|
24
|
+
|
|
25
|
+
import socket
|
|
26
|
+
from typing import Literal
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def is_ip_address(domain) -> bool:
|
|
30
|
+
"""Return True if `domain` is an IP address"""
|
|
31
|
+
return all(part.isdigit() for part in domain.split("."))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Blacklist:
|
|
35
|
+
"""An RBL blacklist"""
|
|
36
|
+
|
|
37
|
+
domain = ""
|
|
38
|
+
flags = []
|
|
39
|
+
|
|
40
|
+
def __init__(self) -> None:
|
|
41
|
+
self._cache = (None, None)
|
|
42
|
+
|
|
43
|
+
def get_base_domain(self, domain: str) -> str:
|
|
44
|
+
"""Return the base domain to use for RBL lookup"""
|
|
45
|
+
return domain
|
|
46
|
+
|
|
47
|
+
def _lookup_exact(
|
|
48
|
+
self, domain: str
|
|
49
|
+
) -> tuple[str, list[str]] | Literal[False] | None:
|
|
50
|
+
"""Like 'lookup', but checks the exact domain name given.
|
|
51
|
+
Not for direct use.
|
|
52
|
+
"""
|
|
53
|
+
cached_domain, ip_addresses = self._cache
|
|
54
|
+
if cached_domain != domain:
|
|
55
|
+
try:
|
|
56
|
+
lookup_domain = domain
|
|
57
|
+
if is_ip_address(domain):
|
|
58
|
+
lookup_domain = ".".join(reversed(domain.split(".")))
|
|
59
|
+
# An RBL may return several A records (e.g. one per list); read
|
|
60
|
+
# them all rather than just the first, which gethostbyname does.
|
|
61
|
+
_, _, ip_addresses = socket.gethostbyname_ex(
|
|
62
|
+
lookup_domain + "." + self.domain
|
|
63
|
+
)
|
|
64
|
+
# NOTE: False (not listed) and None (unknown) are both falsy but
|
|
65
|
+
# mean very different things -- None must never be treated as clean.
|
|
66
|
+
# Keep the two return paths distinct; don't collapse them.
|
|
67
|
+
except socket.gaierror as err:
|
|
68
|
+
if err.errno in (socket.EAI_NONAME, socket.EAI_NODATA):
|
|
69
|
+
# No record found
|
|
70
|
+
self._cache = (domain, None)
|
|
71
|
+
return False
|
|
72
|
+
# Unhandled error, pass test for now
|
|
73
|
+
return None
|
|
74
|
+
except OSError:
|
|
75
|
+
# Not sure if this can happen. Timeouts?
|
|
76
|
+
return None
|
|
77
|
+
self._cache = (domain, ip_addresses)
|
|
78
|
+
if ip_addresses is None:
|
|
79
|
+
return False
|
|
80
|
+
return self._decode(domain, ip_addresses)
|
|
81
|
+
|
|
82
|
+
def _decode(
|
|
83
|
+
self, domain: str, ip_addresses: list[str]
|
|
84
|
+
) -> tuple[str, list[str]] | Literal[False] | None:
|
|
85
|
+
"""Interpret the 127.0.0.x answer(s) for `domain`.
|
|
86
|
+
|
|
87
|
+
A DNSxL may return one A record per sublist, and a client "MUST
|
|
88
|
+
interpret any returned A record as meaning that an address or domain is
|
|
89
|
+
listed" -- hence we consider every record, not just the first
|
|
90
|
+
(RFC 5782 sections 2.3 and 6).
|
|
91
|
+
|
|
92
|
+
The default OR-combines the last octet of every returned record into a
|
|
93
|
+
single bitmask over `self.flags` (the "bit masks" approach of RFC 5782
|
|
94
|
+
section 6), with bit 0x1 meaning the query was refused (reported as
|
|
95
|
+
unknown/None). Subclasses whose service uses a different encoding
|
|
96
|
+
override this.
|
|
97
|
+
"""
|
|
98
|
+
flags = 0
|
|
99
|
+
for ip_address in ip_addresses:
|
|
100
|
+
flags |= int(ip_address.split(".")[-1])
|
|
101
|
+
if not flags:
|
|
102
|
+
return False
|
|
103
|
+
if flags & 1:
|
|
104
|
+
# Blocked from making queries
|
|
105
|
+
return None
|
|
106
|
+
return (domain, [s for (n, s) in self.flags if flags & n])
|
|
107
|
+
|
|
108
|
+
def lookup(self, domain: str) -> tuple[str, list[str]] | Literal[False] | None:
|
|
109
|
+
"""Extract the base domain and check it against this blacklist.
|
|
110
|
+
|
|
111
|
+
Returns one of three outcomes:
|
|
112
|
+
- ``(basedomain, lists)`` -- listed; ``lists`` names the sublists hit.
|
|
113
|
+
- ``False`` -- confirmed *not* listed.
|
|
114
|
+
- ``None`` -- unknown: a temporary DNS error, or the service refusing
|
|
115
|
+
the query (e.g. via a public resolver -- see the README).
|
|
116
|
+
|
|
117
|
+
WARNING: ``False`` and ``None`` are *both* falsy, so ``if not
|
|
118
|
+
lookup(...)`` lumps "unknown" together with "not listed". Unknown must
|
|
119
|
+
NOT be treated as clean, so test ``is None`` explicitly before any
|
|
120
|
+
truthiness check.
|
|
121
|
+
"""
|
|
122
|
+
# Remove userinfo
|
|
123
|
+
if "@" in domain:
|
|
124
|
+
domain = domain[domain.index("@") + 1 :]
|
|
125
|
+
|
|
126
|
+
# Remove port
|
|
127
|
+
if ":" in domain:
|
|
128
|
+
domain = domain[: domain.index(":")]
|
|
129
|
+
|
|
130
|
+
if not is_ip_address(domain):
|
|
131
|
+
domain = self.get_base_domain(domain)
|
|
132
|
+
return self._lookup_exact(domain)
|
|
133
|
+
|
|
134
|
+
def __contains__(self, domain: str) -> bool:
|
|
135
|
+
"""Return True if base domain is listed in this blacklist;
|
|
136
|
+
False otherwise.
|
|
137
|
+
"""
|
|
138
|
+
return bool(self.lookup(domain))
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2026 Filip Salo
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
# furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
# THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
"""Spamhaus DBL domain blocklist."""
|
|
24
|
+
|
|
25
|
+
from typing import Literal
|
|
26
|
+
|
|
27
|
+
from .surbl import SURBL
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SpamhausDBL(SURBL):
|
|
31
|
+
"""Client for the Spamhaus DBL (dbl.spamhaus.org) domain blocklist.
|
|
32
|
+
|
|
33
|
+
Unlike SURBL/URIBL, the DBL does not pack independent lists into a bitmask.
|
|
34
|
+
It returns a single enumerated code in the 127.0.1.x range:
|
|
35
|
+
|
|
36
|
+
* 127.0.1.2 - 127.0.1.99 -> listed; "inherently bad" / safe to block
|
|
37
|
+
* 127.0.1.102 - 127.0.1.199 -> "abused-legit": otherwise-good domains
|
|
38
|
+
observed in abuse (e.g. compromised sites); meant for scoring, not
|
|
39
|
+
outright blocking.
|
|
40
|
+
|
|
41
|
+
Error answers live in the 127.255.255.x range and mean the query could not
|
|
42
|
+
be served -- reported as unknown (None):
|
|
43
|
+
|
|
44
|
+
* 127.255.255.252 -> typing error / direct test
|
|
45
|
+
* 127.255.255.254 -> query came via a public/open resolver
|
|
46
|
+
* 127.255.255.255 -> excessive number of queries
|
|
47
|
+
|
|
48
|
+
Like SURBL/URIBL, Spamhaus blocks queries from public/open resolvers and
|
|
49
|
+
rate-limits; see the README's "Resolver requirements" section.
|
|
50
|
+
|
|
51
|
+
Spamhaus documents only the two listing *ranges* as stable, so this client
|
|
52
|
+
classifies by range rather than by individual code.
|
|
53
|
+
|
|
54
|
+
Refs: https://www.spamhaus.org/faqs/dnsbl-usage/ and
|
|
55
|
+
https://docs.spamhaus.com/datasets/docs/source/10-data-type-documentation/datasets/030-datasets.html
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
domain = "dbl.spamhaus.org."
|
|
59
|
+
# DBL's test point (dbltest.com) is a normal registered domain, so the
|
|
60
|
+
# inherited base-domain reduction already resolves subdomains to it; no
|
|
61
|
+
# pseudo-TLD entries are needed.
|
|
62
|
+
test_domains: set[str] = set()
|
|
63
|
+
flags: list[tuple[int, str]] = []
|
|
64
|
+
|
|
65
|
+
def _decode(
|
|
66
|
+
self, domain: str, ip_addresses: list[str]
|
|
67
|
+
) -> tuple[str, list[str]] | Literal[False] | None:
|
|
68
|
+
# The DBL's codes are enumerated, so decode with value-range tests
|
|
69
|
+
# rather than a bitmask (both are sanctioned by RFC 5782 section 6).
|
|
70
|
+
labels: list[str] = []
|
|
71
|
+
for ip_address in ip_addresses:
|
|
72
|
+
octets = ip_address.split(".")
|
|
73
|
+
# Listings are 127.0.1.x. Anything else (the 127.255.255.x error
|
|
74
|
+
# range, or anything unexpected) means we couldn't get a real
|
|
75
|
+
# answer for this query at all.
|
|
76
|
+
if octets[:3] != ["127", "0", "1"]:
|
|
77
|
+
return None
|
|
78
|
+
code = int(octets[3])
|
|
79
|
+
if 2 <= code <= 99:
|
|
80
|
+
label = "bad"
|
|
81
|
+
elif 102 <= code <= 199:
|
|
82
|
+
label = "abused-legit"
|
|
83
|
+
else:
|
|
84
|
+
continue
|
|
85
|
+
if label not in labels:
|
|
86
|
+
labels.append(label)
|
|
87
|
+
if not labels:
|
|
88
|
+
return False
|
|
89
|
+
return (domain, labels)
|