Skip to content

Commit

Permalink
recreate media types file
Browse files Browse the repository at this point in the history
  • Loading branch information
Archmonger committed Oct 2, 2024
1 parent f71e028 commit d67ed55
Showing 1 changed file with 98 additions and 0 deletions.
98 changes: 98 additions & 0 deletions scripts/generate_default_media_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# pragma: no cover
from __future__ import annotations

import argparse
import http.client
import re
from contextlib import closing
from pathlib import Path

module_dir = Path(__file__).parent.resolve()
media_types_py = module_dir / "../src/servestatic/media_types.py"


def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--check", action="store_true")
args = parser.parse_args()

func_str = get_default_types_function()
text = media_types_py.read_text()
new_text = re.sub(
r"def default_types.*\}",
func_str,
text,
flags=re.DOTALL,
)
if new_text != text:
if args.check:
print("Would write changes")
return 1
print(f"Writing {media_types_py}")
media_types_py.write_text(new_text)
return 0


EXTRA_MIMETYPES = {
# Nginx uses application/javascript, but HTML specification recommends text/javascript:
".js": "text/javascript",
".md": "text/markdown",
".mjs": "text/javascript",
".woff": "application/font-woff",
".woff2": "font/woff2",
"apple-app-site-association": "application/pkc7-mime",
# Adobe: https://www.adobe.com/devnet-docs/acrobatetk/tools/AppSec/xdomain.html#policy-file-host-basics
"crossdomain.xml": "text/x-cross-domain-policy",
}


FUNCTION_TEMPLATE = '''\
def default_types() -> dict[str, str]:
"""
We use our own set of default media types rather than the system-supplied
ones. This ensures consistent media type behaviour across varied
environments. The defaults are based on those shipped with nginx, with
some custom additions.
(Auto-generated by scripts/generate_default_media_types.py)
"""
return {{
{entries}
}}'''


def get_default_types_function() -> str:
types_map = get_types_map()
lines = [f' "{suffix}": "{media_type}",' for suffix, media_type in types_map.items()]
return FUNCTION_TEMPLATE.format(entries="\n".join(lines))


def get_types_map() -> dict[str, str]:
nginx_data = get_nginx_data()
matches = re.findall(r"(\w+/.*?)\s+(.*?);", nginx_data)
types_map = {}
for match in matches:
media_type = match[0]
# This is the default media type anyway, no point specifying it explicitly
if media_type == "application/octet-stream":
continue

extensions = match[1].split()
for extension in extensions:
types_map[f".{extension}"] = media_type
types_map.update(EXTRA_MIMETYPES)
return dict(sorted(types_map.items()))


def get_nginx_data() -> str:
conn = http.client.HTTPSConnection("raw.githubusercontent.com")
with closing(conn):
conn.request("GET", "/nginx/nginx/master/conf/mime.types")
response = conn.getresponse()
if response.status != 200:
raise AssertionError
return response.read().decode()


if __name__ == "__main__":
raise SystemExit(main())

0 comments on commit d67ed55

Please sign in to comment.