From d67ed550285094fa43dfdde3ec9263917cdb703e Mon Sep 17 00:00:00 2001 From: Archmonger <16909269+Archmonger@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:27:56 -0700 Subject: [PATCH] recreate media types file --- scripts/generate_default_media_types.py | 98 +++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 scripts/generate_default_media_types.py diff --git a/scripts/generate_default_media_types.py b/scripts/generate_default_media_types.py new file mode 100644 index 0000000..02a4e12 --- /dev/null +++ b/scripts/generate_default_media_types.py @@ -0,0 +1,98 @@ +# pragma: no cover +from __future__ import annotations + +import argparse +import http.client +import re +from contextlib import closing +from pathlib import Path + +module_dir = Path(__file__).parent.resolve() +media_types_py = module_dir / "../src/servestatic/media_types.py" + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--check", action="store_true") + args = parser.parse_args() + + func_str = get_default_types_function() + text = media_types_py.read_text() + new_text = re.sub( + r"def default_types.*\}", + func_str, + text, + flags=re.DOTALL, + ) + if new_text != text: + if args.check: + print("Would write changes") + return 1 + print(f"Writing {media_types_py}") + media_types_py.write_text(new_text) + return 0 + + +EXTRA_MIMETYPES = { + # Nginx uses application/javascript, but HTML specification recommends text/javascript: + ".js": "text/javascript", + ".md": "text/markdown", + ".mjs": "text/javascript", + ".woff": "application/font-woff", + ".woff2": "font/woff2", + "apple-app-site-association": "application/pkc7-mime", + # Adobe: https://www.adobe.com/devnet-docs/acrobatetk/tools/AppSec/xdomain.html#policy-file-host-basics + "crossdomain.xml": "text/x-cross-domain-policy", +} + + +FUNCTION_TEMPLATE = '''\ +def default_types() -> dict[str, str]: + """ + We use our own set of default media types rather than the system-supplied + ones. This ensures consistent media type behaviour across varied + environments. The defaults are based on those shipped with nginx, with + some custom additions. + + (Auto-generated by scripts/generate_default_media_types.py) + """ + return {{ +{entries} + }}''' + + +def get_default_types_function() -> str: + types_map = get_types_map() + lines = [f' "{suffix}": "{media_type}",' for suffix, media_type in types_map.items()] + return FUNCTION_TEMPLATE.format(entries="\n".join(lines)) + + +def get_types_map() -> dict[str, str]: + nginx_data = get_nginx_data() + matches = re.findall(r"(\w+/.*?)\s+(.*?);", nginx_data) + types_map = {} + for match in matches: + media_type = match[0] + # This is the default media type anyway, no point specifying it explicitly + if media_type == "application/octet-stream": + continue + + extensions = match[1].split() + for extension in extensions: + types_map[f".{extension}"] = media_type + types_map.update(EXTRA_MIMETYPES) + return dict(sorted(types_map.items())) + + +def get_nginx_data() -> str: + conn = http.client.HTTPSConnection("raw.githubusercontent.com") + with closing(conn): + conn.request("GET", "/nginx/nginx/master/conf/mime.types") + response = conn.getresponse() + if response.status != 200: + raise AssertionError + return response.read().decode() + + +if __name__ == "__main__": + raise SystemExit(main())