From 44552693b2fe073b7963933fe830296a99e16a87 Mon Sep 17 00:00:00 2001 From: Sid Vishnoi <8426945+sidvishnoi@users.noreply.github.com> Date: Sun, 7 Jul 2024 17:32:34 +0530 Subject: [PATCH] wip: 1 --- profiles/w3c.js | 1 + src/core/unicode.js | 216 + tests/spec/core/simple.html | 40253 ++++++++++++++++++++++++++++++ tests/spec/core/unicode-spec.js | 89 + tests/test-main.js | 2 +- 5 files changed, 40560 insertions(+), 1 deletion(-) create mode 100644 src/core/unicode.js create mode 100644 tests/spec/core/unicode-spec.js diff --git a/profiles/w3c.js b/profiles/w3c.js index 50537315c9..6d8627c7d6 100644 --- a/profiles/w3c.js +++ b/profiles/w3c.js @@ -34,6 +34,7 @@ const modules = [ import("../src/core/data-cite.js"), import("../src/core/render-biblio.js"), import("../src/core/dfn-index.js"), + import("../src/core/unicode.js"), import("../src/core/contrib.js"), import("../src/core/sections.js"), import("../src/core/fix-headers.js"), diff --git a/src/core/unicode.js b/src/core/unicode.js new file mode 100644 index 0000000000..8d39026049 --- /dev/null +++ b/src/core/unicode.js @@ -0,0 +1,216 @@ +/** + * @module core/unicode + * + * Expand char markup (.hx, .ch) to .codepoint spans + * https://github.com/speced/respec/issues/4462 + * Based on https://github.com/r12a/scripts/blob/gh-pages/common29/functions.js + */ + +import { showError } from "./utils.js"; + +export const name = "core/unicode"; + +/** + * @param {Conf} conf + */ +export async function run(conf) { + expandCharMarkup(); +} + +function expandCharMarkup() { + // convert char markup to .codepoint spans (has to be done before the indexing) + // the .ch and .hx classes should only be used for characters in the + // spreadsheet. For other characters, generate the markup in a picker + // if the svg class is appended, use an svg image to display the char + // if the split class used, the characters will be separated by + + // split puts + signs between the characters in a sequence + // init, medi, fina produce positional forms of cursive text using zwj + // skip puts a circle before a mark, and zwj between it and the following consonant + // circle puts a dotted circle before the item - used for combining marks + // coda puts a dotted circle after the item - used for closed syllables + // noname prevents the production of the Unicode name + + expandCharMarkupHx(); + expandCharMarkupCh(); +} + +// convert .hx markup (one or more hex codes) +function expandCharMarkupHx() { + /** @type {NodeListOf} */ + const elements = document.querySelectorAll(".hx"); + for (const elem of elements) { + const split = elem.classList.contains("split"); + const svg = elem.classList.contains("svg"); + const img = elem.classList.contains("img"); + const initial = elem.classList.contains("init"); + const medial = elem.classList.contains("medi"); + const final = elem.classList.contains("fina"); + const skipDiacritic = elem.classList.contains("skip"); + const circle = elem.classList.contains("circle"); + const coda = elem.classList.contains("coda") ? "◌" : ""; + const noname = elem.classList.contains("noname"); + const lang = window.langTag || elem.lang; + + const charlist = elem.textContent.trim().split(" "); + if (charlist[0] === "") { + continue; + } + + let unicodeNames = ""; + let unicodeChars = ""; + let out = ""; + + if (final || medial) { + unicodeChars += "\u200D"; // the space is needed for Safari to work + } + if (circle) { + unicodeChars = `\u25CC${unicodeChars}`; + } + for (let i = 0; i < charlist.length; i++) { + const hex = charlist[i]; + const dec = parseInt(hex, 16); + if (Number.isNaN(dec)) { + showError( + `The link text "${elem.textContent}" is not a number!`, + name, + { elements: [elem] } + ); + continue; + } + const ch = String.fromCodePoint(dec); + + if (!charData[ch]) { + showError( + `The character "${ch}" (U+${hex}) is not in the database!`, + name, + { elements: [elem] } + ); + unicodeChars += ch; + continue; + } + + if (hex !== "25CC") { + if (i > 0) { + unicodeNames += " + "; + } + unicodeNames += `U+${hex} `; + unicodeNames += charData[ch].replace(/:/, ""); + } + + if (split && i > 0) { + unicodeChars += ` + `; + } + if (svg) { + // block = getScriptGroup(dec, false); + // unicodeChars += `${ch}`; + } else if (img) { + // block = getScriptGroup(dec, false); + // unicodeChars += `${ch}`; + } else { + unicodeChars += `&#x${hex};`; + } + if (skipDiacritic && i == 0) { + unicodeChars += "‍"; + } + } + + if (initial || medial) { + unicodeChars += "\u200D "; + } + + out += `${unicodeChars}${coda}`; + if (noname) { + // ok + } else { + out += `${unicodeNames}`; + } + out += ""; + + elem.outerHTML = out; + } +} + +// convert .ch markup (one or more characters using Unicode code points) +function expandCharMarkupCh() { + /** @type {NodeListOf} */ + const elements = document.querySelectorAll(".ch"); + for (const elem of elements) { + const split = elem.classList.contains("split"); + const svg = elem.classList.contains("svg"); + const img = elem.classList.contains("img"); + const initial = elem.classList.contains("init"); + const medial = elem.classList.contains("medi"); + const final = elem.classList.contains("fina"); + const circle = elem.classList.contains("circle"); + const coda = elem.classList.contains("coda") ? "◌" : ""; + const noname = elem.classList.contains("noname"); + const language = window.langTag || elem.lang; + + const charlist = [...elem.textContent]; + let unicodeNames = ""; + let unicodeChars = ""; + let out = ""; + + if (final || medial) { + unicodeChars += " \u200D"; + } + for (let i = 0; i < charlist.length; i++) { + const dec = charlist[i].codePointAt(0); + const hex = dec.toString(16).toUpperCase().padStart(4, "0"); + + if (!charData[charlist[i]]) { + unicodeChars += charlist[i]; + unicodeNames += ` ${charlist[i]} NOT IN DB! `; + continue; + } + + if (i > 0) { + unicodeNames += " + "; + } + unicodeNames += `U+${hex} `; + unicodeNames += charData[charlist[i]].replace(/:/, ""); + + if (split && i > 0) { + unicodeChars += ` + `; + } + + if (svg) { + // block = getScriptGroup(dec, false); + // unicodeChars += `${charlist[i]}`; + } else if (img) { + // block = getScriptGroup(dec, false); + // unicodeChars += `${charlist[i]}`; + } else { + unicodeChars += charlist[i]; + } + } + + if (initial || medial) { + unicodeChars += "\u200D "; + } + if (circle) { + unicodeChars = `\u25CC${unicodeChars}`; + } + + out += `${unicodeChars}${coda}`; + if (noname) { + // ok + } else { + out += `${unicodeNames}`; + } + out += ""; + + elem.outerHTML = out; + } +} diff --git a/tests/spec/core/simple.html b/tests/spec/core/simple.html index acf7ef230d..d81f0b72b3 100644 --- a/tests/spec/core/simple.html +++ b/tests/spec/core/simple.html @@ -3,6 +3,40259 @@ Simple Spec