From 1f4b9e177b8009d600e1cf051665b0a89d00404e Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Fri, 8 Dec 2023 15:34:43 +0100 Subject: [PATCH] performance: pre-render html fully instead of at runtime We still parsed the raw html at runtime and fed that back into the jinja template. We now do that as the ssg build phase, which gives much better performance. --- .../solara_enterprise/ssg.py | 89 +++++++++++-------- solara/__main__.py | 6 +- solara/server/server.py | 25 +++--- 3 files changed, 68 insertions(+), 52 deletions(-) diff --git a/packages/solara-enterprise/solara_enterprise/ssg.py b/packages/solara-enterprise/solara_enterprise/ssg.py index f1d9852a9..89f54c7df 100644 --- a/packages/solara-enterprise/solara_enterprise/ssg.py +++ b/packages/solara-enterprise/solara_enterprise/ssg.py @@ -3,6 +3,7 @@ import threading import time import typing +import urllib from pathlib import Path from typing import List, Optional @@ -124,10 +125,18 @@ def ssg_crawl_route(base_url: str, route: solara.Route, build_path: Path, thread page.locator("#kernel-busy-indicator").wait_for(state="hidden") # page.wait_ time.sleep(0.5) - html = page.content() + raw_html = page.content() except Exception: logger.exception("Failure retrieving content for url: %s", url) raise + request_path = urllib.parse.urlparse(url).path + + import solara.server.server + + # the html from playwright is not what we want, pass it through the jinja template again + html = solara.server.server.read_root(request_path, ssg_data=_ssg_data(raw_html)) + if html is None: + raise Exception(f"Failed to render {url}") path.write_text(html, encoding="utf-8") rprint(f"Wrote to {path}") page.goto("about:blank") @@ -140,12 +149,8 @@ def ssg_crawl_route(base_url: str, route: solara.Route, build_path: Path, thread return results -def ssg_data(path: str) -> Optional[SSGData]: +def ssg_content(path: str) -> Optional[str]: license.check("SSG") - html = "" - # pre_rendered_css = "" - styles = [] - title = "Solara ☀️" # still not sure why we sometimes end with a double slash if path.endswith("//"): path = path[:-2] @@ -164,38 +169,46 @@ def ssg_data(path: str) -> Optional[SSGData]: html_path = html_path.with_suffix(".html") if html_path.exists() and html_path.is_file(): logger.info("Using pre-rendered html at %r", html_path) - - from bs4 import BeautifulSoup, Tag - - soup = BeautifulSoup(html_path.read_text("utf8"), "html.parser") - node = soup.find(id="app") - # TODO: add classes... - if node and isinstance(node, Tag): - # only render children - html = "".join(str(x) for x in node.contents) - title_tag = soup.find("title") - if title_tag: - title = title_tag.text - - # include all meta tags - rendered_metas = soup.find_all("meta") - metas = [] - for meta in rendered_metas: - # but only the ones added by solara - if meta.attrs.get("data-solara-head-key"): - metas.append(str(meta)) - - # include all styles - rendered_styles = soup.find_all("style") - for style in rendered_styles: - style_html = str(style) - # in case we want to skip the mathjax css - # if "MJXZERO" in style_html: - # continue - # pre_rendered_css += style_html - styles.append(style_html) - logger.debug("Include style (size is %r mb):\n\t%r", len(style_html) / 1024**2, style_html[:200]) - return SSGData(title=title, html=html, styles=styles, metas=metas) + return html_path.read_text("utf8") else: logger.error("Count not find html at %r", html_path) return None + + +def _ssg_data(html: str) -> Optional[SSGData]: + license.check("SSG") + from bs4 import BeautifulSoup, Tag + + # pre_rendered_css = "" + styles = [] + title = "Solara ☀️" + + soup = BeautifulSoup(html, "html.parser") + node = soup.find(id="app") + # TODO: add classes... + if node and isinstance(node, Tag): + # only render children + html = "".join(str(x) for x in node.contents) + title_tag = soup.find("title") + if title_tag: + title = title_tag.text + + # include all meta tags + rendered_metas = soup.find_all("meta") + metas = [] + for meta in rendered_metas: + # but only the ones added by solara + if meta.attrs.get("data-solara-head-key"): + metas.append(str(meta)) + + # include all styles + rendered_styles = soup.find_all("style") + for style in rendered_styles: + style_html = str(style) + # in case we want to skip the mathjax css + # if "MJXZERO" in style_html: + # continue + # pre_rendered_css += style_html + styles.append(style_html) + logger.debug("Include style (size is %r mb):\n\t%r", len(style_html) / 1024**2, style_html[:200]) + return SSGData(title=title, html=html, styles=styles, metas=metas) diff --git a/solara/__main__.py b/solara/__main__.py index 8f04e2f8c..bad44b4a8 100644 --- a/solara/__main__.py +++ b/solara/__main__.py @@ -11,12 +11,11 @@ import rich import rich_click as click +import solara import uvicorn from rich import print as rprint -from uvicorn.main import LEVEL_CHOICES, LOOP_CHOICES - -import solara from solara.server import settings +from uvicorn.main import LEVEL_CHOICES, LOOP_CHOICES from .server import telemetry @@ -450,6 +449,7 @@ def ssg(app: str, port: int, host: str, headed: bool): """Static site generation""" settings.ssg.headed = headed settings.ssg.enabled = True + settings.main.mode = "production" # always override this os.environ["SOLARA_APP"] = app from solara.server.starlette import ServerStarlette diff --git a/solara/server/server.py b/solara/server/server.py index 528a03e24..cf2450c57 100644 --- a/solara/server/server.py +++ b/solara/server/server.py @@ -12,7 +12,6 @@ import ipywidgets import jinja2 import requests - import solara import solara.routing import solara.settings @@ -248,7 +247,15 @@ def busy_idle(parent): return False -def read_root(path: str, root_path: str = "", render_kwargs={}, use_nbextensions=True) -> Optional[str]: +def read_root(path: str, root_path: str = "", render_kwargs={}, use_nbextensions=True, ssg_data=None) -> Optional[str]: + if settings.ssg.enabled and ssg_data is None: + # simply return the pre-rendered html + from solara_enterprise import ssg + + content = ssg.ssg_content(path) + if content is not None: + return content + default_app = app.apps["__default__"] routes = default_app.routes router = solara.routing.Router(path, routes) @@ -324,15 +331,11 @@ def include_js(path: str, module=False) -> Markup: pre_rendered_css = "" pre_rendered_metas = "" title = "Solara ☀️" - if settings.ssg.enabled: - from solara_enterprise import ssg - - ssg_data = ssg.ssg_data(path) - if ssg_data is not None: - pre_rendered_html = ssg_data["html"] - pre_rendered_css = "\n".join(ssg_data["styles"]) - pre_rendered_metas = "\n ".join(ssg_data["metas"]) - title = ssg_data["title"] + if ssg_data is not None: + pre_rendered_html = ssg_data["html"] + pre_rendered_css = "\n".join(ssg_data["styles"]) + pre_rendered_metas = "\n ".join(ssg_data["metas"]) + title = ssg_data["title"] if solara.settings.assets.proxy: # solara acts as a proxy