Skip to content

Commit

Permalink
Better UTF8 support - Terminal can now display UTF8 chars it receives.
Browse files Browse the repository at this point in the history
Can also upload and paste code containing UTF8

Editing UTF8 characters in the REPL is still broken - Espruino doesn't realise that you just need one right-arrow to skip over N UTF8 characters
Still - being able to even display them is still much better than before
  • Loading branch information
gfwilliams committed Jun 21, 2024
1 parent 14d7fe0 commit 0af9555
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 20 deletions.
71 changes: 52 additions & 19 deletions core/terminal.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,10 @@
text+="\x08"; // backspace
text+=thisValue.substr(commonChars);
lastValue = terminalfocus.value;
if (text.length)
if (text.length) {
text = Espruino.Core.Utils.asUTF8Bytes(text); // convert UTF8 to constituent bytes
onInputData(Espruino.Core.Utils.fixBrokenCode(text));
}
}
terminalfocus.addEventListener("input", changeListener);
terminalfocus.addEventListener("keydown", function(e) {
Expand Down Expand Up @@ -294,6 +296,7 @@
e.preventDefault();
terminalfocus.value = "";
lastValue = "";
ch = Espruino.Core.Utils.asUTF8Bytes(ch); // convert UTF8 to constituent bytes
onInputData(ch);
}
});
Expand Down Expand Up @@ -483,10 +486,33 @@
return str.substr(0,s+1);
}



var handleReceivedCharacter = function (/*char*/ch) {
function isUTF8StartChar(ch) {
return (ch>=0xC2) && (ch<=0xF4);
}
function newCharacter(ch) {
if (termText[termCursorY]===undefined) termText[termCursorY]="";
termText[termCursorY] = trimRight(
Espruino.Core.Utils.getSubString(termText[termCursorY],0,termCursorX) +
String.fromCharCode(ch) +
Espruino.Core.Utils.getSubString(termText[termCursorY],termCursorX+1));
termCursorX++;
// check for the 'prompt', eg '>' or 'debug>'
// if we have it, send a 'terminalPrompt' message
if (ch == ">".charCodeAt(0)) {
var prompt = termText[termCursorY];
if (prompt==">" || prompt=="debug>")
Espruino.callProcessor("terminalPrompt", prompt);
}
}

//console.log("IN = "+ch);
if (termControlChars.length==0) {
switch (ch) {
if (isUTF8StartChar(ch)) { // UTF8
termControlChars = [ ch ];
} else switch (ch) {
case 8 : {
if (termCursorX>0) termCursorX--;
} break;
Expand All @@ -503,24 +529,31 @@
} break;
case 19 : break; // XOFF
case 17 : break; // XON
case 0xC2 : break; // UTF8 for <255 - ignore this
default : {
// Else actually add character
if (termText[termCursorY]===undefined) termText[termCursorY]="";
termText[termCursorY] = trimRight(
Espruino.Core.Utils.getSubString(termText[termCursorY],0,termCursorX) +
String.fromCharCode(ch) +
Espruino.Core.Utils.getSubString(termText[termCursorY],termCursorX+1));
termCursorX++;
// check for the 'prompt', eg '>' or 'debug>'
// if we have it, send a 'terminalPrompt' message
if (ch == ">".charCodeAt(0)) {
var prompt = termText[termCursorY];
if (prompt==">" || prompt=="debug>")
Espruino.callProcessor("terminalPrompt", prompt);
}
}
default : newCharacter(ch); // Else actually add character
}
} else if (isUTF8StartChar(termControlChars[0])) { // decode UTF8 chars
termControlChars.push(ch);
if ((ch&0xC0) == 0x80) { // it's valid
var c = termControlChars[0], cp=c, ra=0;
// work out first byte's value and how long this codepoint is
if ((c&0xE0)==0xC0) { // 2-byte code starts with 0b110xxxxx
cp=c&0x1F;ra=1;
} else if ((c&0xF0)==0xE0) { // 3-byte code starts with 0b1110xxxx
cp=c&0x0F;ra=2;
} else if ((c&0xF8)==0xF0) { // 4-byte code starts with 0b11110xxx
cp=c&0x07;ra=3;
}
// if we have enough data, decode it
if (termControlChars.length >= ra) {
for (var i=1;i<=ra;i++)
cp = (cp<<6) | (termControlChars[i] & 0x3F);
newCharacter(cp);
termControlChars = [];
}
} else { // invalid!
console.warn(`Invalid UTF8 sequence (${termControlChars.join(",")})`);
termControlChars = [];
}
} else if (termControlChars[0]==27) { // Esc
if (termControlChars[1]==91) { // Esc [
if (termControlChars[2]==63) {
Expand Down
27 changes: 27 additions & 0 deletions core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,32 @@ while (d!==undefined) {console.log(btoa(d));d=f.read(${CHUNKSIZE});}
return js;
}

/* Convert a normal JS string (one char per character) to a string of UTF8 bytes */
function asUTF8Bytes(str) {
var result = "";
var bytes = String.fromCharCode;
for (var i=0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) result += bytes(charcode);
else if (charcode < 0x800) {
result += bytes(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
} else if (charcode < 0xd800 || charcode >= 0xe000) {
result += bytes(0xe0 | (charcode >> 12),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
} else { // surrogate pair
i++;
charcode = ((charcode&0x3ff)<<10)|(str.charCodeAt(i)&0x3ff)
result += bytes(0xf0 | (charcode >>18),
0x80 | ((charcode>>12) & 0x3f),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return result;
}

// Does the given string contain only ASCII characters?
function isASCII(str) {
for (var i=0;i<str.length;i++) {
Expand Down Expand Up @@ -965,6 +991,7 @@ while (d!==undefined) {console.log(btoa(d));d=f.read(${CHUNKSIZE});}
arrayBufferToString : arrayBufferToString,
parseJSONish : parseJSONish,
toJSONishString : toJSONishString,
asUTF8Bytes : asUTF8Bytes,
isASCII : isASCII,
btoa : btoa,
atob : atob
Expand Down
4 changes: 3 additions & 1 deletion plugins/saveOnSend.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@
}
}

//
// convert any non-0..255 charcodes to UTF8 encoding
code = Espruino.Core.Utils.asUTF8Bytes(code);
// Now create the commands to do the upload
console.log("Uploading "+code.length+" bytes to flash");
if (!hasStorage) { // old style
if (isStorageUpload) {
Expand Down

0 comments on commit 0af9555

Please sign in to comment.