-
Notifications
You must be signed in to change notification settings - Fork 0
/
verify.py
130 lines (101 loc) · 3.78 KB
/
verify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
"""Verify the integrity of the domain blocklist
"""
import io
import sys
from collections import Counter
from publicsuffixlist import PublicSuffixList
from requests import get
blocklist = "disposable_email_blocklist.conf"
allowlist = "allowlist.conf"
files = {
filename: open(filename).read().splitlines() for filename in [allowlist, blocklist]
}
def download_suffixes():
with open("public_suffix_list.dat", "wb") as file:
response = get("https://publicsuffix.org/list/public_suffix_list.dat")
file.write(response.content)
def check_for_public_suffixes(filename):
lines = files[filename]
suffix_detected = False
psl = None
with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest)
for i, line in enumerate(lines):
current_line = line.strip()
public_suffix = psl.publicsuffix(current_line)
if public_suffix == current_line:
print(
f"The line number {i+1} contains just a public suffix: {current_line}"
)
suffix_detected = True
if suffix_detected:
print(
"At least one valid public suffix found in {!r}, please "
"remove it. See https://publicsuffix.org for details on why this "
"shouldn't be blocklisted.".format(filename)
)
sys.exit(1)
def check_for_third_level_domains(filename):
with open("public_suffix_list.dat", "r") as latest:
psl = PublicSuffixList(latest)
invalid = {
line
for line in files[filename]
if len(psl.privateparts(line.strip())) > 1
}
if invalid:
print("The following domains contain a third or lower level domain in {!r}:".format(filename))
for line in sorted(invalid):
print("* {}".format(line))
sys.exit(1)
def check_for_non_lowercase(filename):
lines = files[filename]
invalid = set(lines) - set(line.lower() for line in lines)
if invalid:
print("The following domains should be lowercased in {!r}:".format(filename))
for line in sorted(invalid):
print("* {}".format(line))
sys.exit(1)
def check_for_duplicates(filename):
lines = files[filename]
count = Counter(lines) - Counter(set(lines))
if count:
print("The following domains appear twice in {!r}:".format(filename))
for line in sorted(count):
print("* {}".format(line))
sys.exit(1)
def check_sort_order(filename):
lines = files[filename]
for a, b in zip(lines, sorted(lines)):
if a != b:
print("The list is not sorted in {!r}:".format(filename))
print("* {!r} should come before {!r}".format(b, a))
sys.exit(1)
def check_for_intersection(filename_a, filename_b):
a = files[filename_a]
b = files[filename_b]
intersection = set(a) & set(b)
if intersection:
print("The following domains appear in both lists:")
for line in sorted(intersection):
print("* {}".format(line))
sys.exit(1)
if __name__ == "__main__":
# Download the list of public suffixes
download_suffixes()
# Check if any domains have a public suffix
check_for_public_suffixes(blocklist)
# Check if any domains are a third or lower level domain
check_for_third_level_domains(blocklist)
# Check if any domains are not lowercase
check_for_non_lowercase(allowlist)
check_for_non_lowercase(blocklist)
# Check if any domains are duplicated in the same list
check_for_duplicates(allowlist)
check_for_duplicates(blocklist)
# Check if any lists are not sorted
check_sort_order(allowlist)
check_sort_order(blocklist)
# Check if any domains are in both the allowlist and blocklist
check_for_intersection(allowlist, blocklist)