From 766c6d4660a97a006e7b939b9cd01b696dea8eff Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Fri, 8 Dec 2023 15:34:43 +0100 Subject: [PATCH] performance: pre-render html fully instead of at runtime We still parsed the raw html at runtime and fed that back into the jinja template. We now do that as the ssg build phase, which gives much better performance. --- .../solara_enterprise/ssg.py | 89 +++++++++++-------- solara/__main__.py | 6 +- solara/server/server.py | 25 +++--- 3 files changed, 68 insertions(+), 52 deletions(-) diff --git a/packages/solara-enterprise/solara_enterprise/ssg.py b/packages/solara-enterprise/solara_enterprise/ssg.py index f1d9852a9..89f54c7df 100644 --- a/packages/solara-enterprise/solara_enterprise/ssg.py +++ b/packages/solara-enterprise/solara_enterprise/ssg.py @@ -3,6 +3,7 @@ import threading import time import typing +import urllib from pathlib import Path from typing import List, Optional @@ -124,10 +125,18 @@ def ssg_crawl_route(base_url: str, route: solara.Route, build_path: Path, thread page.locator("#kernel-busy-indicator").wait_for(state="hidden") # page.wait_ time.sleep(0.5) - html = page.content() + raw_html = page.content() except Exception: logger.exception("Failure retrieving content for url: %s", url) raise + request_path = urllib.parse.urlparse(url).path + + import solara.server.server + + # the html from playwright is not what we want, pass it through the jinja template again + html = solara.server.server.read_root(request_path, ssg_data=_ssg_data(raw_html)) + if html is None: + raise Exception(f"Failed to render {url}") path.write_text(html, encoding="utf-8") rprint(f"Wrote to {path}") page.goto("about:blank") @@ -140,12 +149,8 @@ def ssg_crawl_route(base_url: str, route: solara.Route, build_path: Path, thread return results -def ssg_data(path: str) -> Optional[SSGData]: +def ssg_content(path: str) -> Optional[str]: license.check("SSG") - html = "" - # pre_rendered_css = "" - styles = [] - title = "Solara ☀️" # still not sure why we sometimes end with a double slash if path.endswith("//"): path = path[:-2] @@ -164,38 +169,46 @@ def ssg_data(path: str) -> Optional[SSGData]: html_path = html_path.with_suffix(".html") if html_path.exists() and html_path.is_file(): logger.info("Using pre-rendered html at %r", html_path) - - from bs4 import BeautifulSoup, Tag - - soup = BeautifulSoup(html_path.read_text("utf8"), "html.parser") - node = soup.find(id="app") - # TODO: add classes... - if node and isinstance(node, Tag): - # only render children - html = "".join(str(x) for x in node.contents) - title_tag = soup.find("title") - if title_tag: - title = title_tag.text - - # include all meta tags - rendered_metas = soup.find_all("meta") - metas = [] - for meta in rendered_metas: - # but only the ones added by solara - if meta.attrs.get("data-solara-head-key"): - metas.append(str(meta)) - - # include all styles - rendered_styles = soup.find_all("style") - for style in rendered_styles: - style_html = str(style) - # in case we want to skip the mathjax css - # if "MJXZERO" in style_html: - # continue - # pre_rendered_css += style_html - styles.append(style_html) - logger.debug("Include style (size is %r mb):\n\t%r", len(style_html) / 1024**2, style_html[:200]) - return SSGData(title=title, html=html, styles=styles, metas=metas) + return html_path.read_text("utf8") else: logger.error("Count not find html at %r", html_path) return None + + +def _ssg_data(html: str) -> Optional[SSGData]: + license.check("SSG") + from bs4 import BeautifulSoup, Tag + + # pre_rendered_css = "" + styles = [] + title = "Solara ☀️" + + soup = BeautifulSoup(html, "html.parser") + node = soup.find(id="app") + # TODO: add classes... + if node and isinstance(node, Tag): + # only render children + html = "".join(str(x) for x in node.contents) + title_tag = soup.find("title") + if title_tag: + title = title_tag.text + + # include all meta tags + rendered_metas = soup.find_all("meta") + metas = [] + for meta in rendered_metas: + # but only the ones added by solara + if meta.attrs.get("data-solara-head-key"): + metas.append(str(meta)) + + # include all styles + rendered_styles = soup.find_all("style") + for style in rendered_styles: + style_html = str(style) + # in case we want to skip the mathjax css + # if "MJXZERO" in style_html: + # continue + # pre_rendered_css += style_html + styles.append(style_html) + logger.debug("Include style (size is %r mb):\n\t%r", len(style_html) / 1024**2, style_html[:200]) + return SSGData(title=title, html=html, styles=styles, metas=metas) diff --git a/solara/__main__.py b/solara/__main__.py index 8f04e2f8c..bad44b4a8 100644 --- a/solara/__main__.py +++ b/solara/__main__.py @@ -11,12 +11,11 @@ import rich import rich_click as click +import solara import uvicorn from rich import print as rprint -from uvicorn.main import LEVEL_CHOICES, LOOP_CHOICES - -import solara from solara.server import settings +from uvicorn.main import LEVEL_CHOICES, LOOP_CHOICES from .server import telemetry @@ -450,6 +449,7 @@ def ssg(app: str, port: int, host: str, headed: bool): """Static site generation""" settings.ssg.headed = headed settings.ssg.enabled = True + settings.main.mode = "production" # always override this os.environ["SOLARA_APP"] = app from solara.server.starlette import ServerStarlette diff --git a/solara/server/server.py b/solara/server/server.py index a8806a8c3..c501aac6b 100644 --- a/solara/server/server.py +++ b/solara/server/server.py @@ -12,7 +12,6 @@ import ipywidgets import jinja2 import requests - import solara import solara.routing @@ -247,7 +246,15 @@ def busy_idle(parent): return False -def read_root(path: str, root_path: str = "", render_kwargs={}, use_nbextensions=True) -> Optional[str]: +def read_root(path: str, root_path: str = "", render_kwargs={}, use_nbextensions=True, ssg_data=None) -> Optional[str]: + if settings.ssg.enabled and ssg_data is None: + # simply return the pre-rendered html + from solara_enterprise import ssg + + content = ssg.ssg_content(path) + if content is not None: + return content + default_app = app.apps["__default__"] routes = default_app.routes router = solara.routing.Router(path, routes) @@ -267,15 +274,11 @@ def read_root(path: str, root_path: str = "", render_kwargs={}, use_nbextensions pre_rendered_css = "" pre_rendered_metas = "" title = "Solara ☀️" - if settings.ssg.enabled: - from solara_enterprise import ssg - - ssg_data = ssg.ssg_data(path) - if ssg_data is not None: - pre_rendered_html = ssg_data["html"] - pre_rendered_css = "\n".join(ssg_data["styles"]) - pre_rendered_metas = "\n ".join(ssg_data["metas"]) - title = ssg_data["title"] + if ssg_data is not None: + pre_rendered_html = ssg_data["html"] + pre_rendered_css = "\n".join(ssg_data["styles"]) + pre_rendered_metas = "\n ".join(ssg_data["metas"]) + title = ssg_data["title"] if settings.assets.proxy: # solara acts as a proxy