-
Notifications
You must be signed in to change notification settings - Fork 244
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix and catch unintended uses of inline HTML (#1716)
* fix missing text in "KMS Provider" section of the Client Side Encryption spec due to less-than symbol * manual audit of all less-than symbols followed by a letter * reformat python scripts using "black" * let scripts detect fenced code inside block-quotes * add a pre-commit script to check HTML tags against allowed patterns * replace HTTP links with equivalent HTTPS
- Loading branch information
Showing
13 changed files
with
190 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,29 @@ | ||
import sys | ||
import sys, re | ||
|
||
fname = sys.argv[-1] | ||
|
||
# Roughly detect fenced code even inside block quotes | ||
fenced_code = re.compile(r"^\s*(>\s+)*```") | ||
|
||
# Check for markdown links that got improperly line wrapped. | ||
in_code_block = False | ||
with open(fname) as fid: | ||
for line in fid: | ||
# Ignore code blocks. | ||
if line.strip().startswith('```'): | ||
if fenced_code.match(line): | ||
in_code_block = not in_code_block | ||
if in_code_block: | ||
continue | ||
id0 = line.index('[') if '[' in line else -1 | ||
id1 = line.index(']') if ']' in line else -1 | ||
id2 = line.index('(') if '(' in line else -1 | ||
id3 = line.index(')') if ')' in line else -1 | ||
id0 = line.index("[") if "[" in line else -1 | ||
id1 = line.index("]") if "]" in line else -1 | ||
id2 = line.index("(") if "(" in line else -1 | ||
id3 = line.index(")") if ")" in line else -1 | ||
if id1 == -1 or id2 == -1 or id3 == -1: | ||
continue | ||
if id2 < id1 or id3 < id2: | ||
continue | ||
if id0 == -1: | ||
print('*** Malformed link in line:', line, fname) | ||
sys.exit(1) | ||
print("*** Malformed link in line:", line, fname) | ||
sys.exit(1) | ||
|
||
assert not in_code_block |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import sys, re | ||
|
||
fname = sys.argv[-1] | ||
|
||
# Check for allowed HTML elements in markdown. | ||
# Ignores inline and fenced code, but intentionally doesn't ignore backslash | ||
# escaping. (For compatibility, we want to avoid unintentional inline HTML | ||
# even on markdown implementations where "\<" escapes are not supported.) | ||
|
||
disallowed_re = re.compile( | ||
r""" | ||
[^`]*(`[^`]+`)* | ||
<(?! | ||
- | | ||
/p> | | ||
/span> | | ||
/sub> | | ||
/sup> | | ||
/table> | | ||
/td> | | ||
/tr> | | ||
\d | | ||
\s | | ||
\w+@(\w+\.)+\w+> | # Cover email addresses in license files | ||
= | | ||
br> | | ||
https:// | # Cover HTTPS links but not HTTP | ||
p> | | ||
span[\s>] | | ||
sub> | | ||
sup> | | ||
table[\s>] | | ||
td[\s>] | | ||
tr> | | ||
!-- ) | ||
""", | ||
re.VERBOSE, | ||
) | ||
|
||
# Roughly detect fenced code even inside block quotes | ||
fenced_code = re.compile(r"^\s*(>\s+)*```") | ||
|
||
in_code_block = False | ||
with open(fname) as fid: | ||
for line in fid: | ||
# Ignore code blocks. | ||
if fenced_code.match(line): | ||
in_code_block = not in_code_block | ||
if in_code_block: | ||
continue | ||
if disallowed_re.match(line): | ||
print("*** Markdown contains unexpected HTML in line:", line, fname) | ||
sys.exit(1) | ||
|
||
assert not in_code_block |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.