From b5eda297af9b7984cf29c3c89ca0fa8fa433c093 Mon Sep 17 00:00:00 2001 From: Blaise Date: Fri, 28 Oct 2022 20:59:02 -0500 Subject: Update tokens for prose, cleanup --- proselight.js | 112 +++++++++++++--------------------------------------------- 1 file changed, 24 insertions(+), 88 deletions(-) diff --git a/proselight.js b/proselight.js index 73222b1..c79b8ce 100644 --- a/proselight.js +++ b/proselight.js @@ -26,7 +26,7 @@ i, microlighted, - el; // current microlighted element to run through + el; // current microlighted element to run through @@ -36,129 +36,65 @@ for (i = 0; el = microlighted[i++];) { var text = el.textContent, - pos = 0, // current position - next1 = text[0], // next character - chr = 1, // current character - prev1, // previous character - prev2, // the one before the previous - token = // current token content - el.innerHTML = '', // (and cleaning the node) + pos = 0, // current position + next1 = text[0], // next character + chr = 1, // current character + prev1, // previous character + prev2, // the one before the previous + token = // current token content + el.innerHTML = '', // (and cleaning the node) // current token type: - // 0: anything else (whitespaces / newlines) - // 1: operator or brace - // 2: closing braces (after which '/' is division not regex) - // 3: (key)word - // 4: regex - // 5: string starting with " - // 6: string starting with ' - // 7: xml comment - // 8: multiline comment /* */ - // 9: single-line comment starting with two slashes // - // 10: single-line comment starting with hash # tokenType = 0, - // kept to determine between regex and division - lastTokenType, // flag determining if token is multi-character multichar, node; // running through characters and highlighting - while (prev2 = prev1, - // escaping if needed (with except for comments) - // pervious character will not be therefore - // recognized as a token finalize condition - prev1 = tokenType < 7 && prev1 == '\\' ? 1 : chr - ) { + while (prev2 = prev1, prev1 = chr) { chr = next1; next1=text[++pos]; multichar = token.length > 1; // checking if current token should be finalized if (!chr || // end of content - // types 9-10 (single-line comments) end with a - // newline - (tokenType > 8 && chr == '\n') || [ // finalize conditions for other token types - // 0: whitespaces - /\S/[test](chr), // merged together - // 1: operators + // 0: unformatted + /[":;,\\.?!\])\/{}[(|]/[test](chr), + // 1: parentesis or braces 1, // consist of a single character - // 2: braces + // 2: terminators 1, // consist of a single character - // 3: (key)word - !/[$\w]/[test](chr), - // 4: regex - (prev1 == '/' || prev1 == '\n') && multichar, - // 5: string with " + // 3: separators + 1, // consist of a single character + // 4: quotes prev1 == '"' && multichar, - // 6: string with ' - prev1 == "'" && multichar, - // 7: xml comment - text[pos-4]+prev2+prev1 == '-->', - // 8: multiline comment - prev2+prev1 == '*/' ][tokenType] ) { // appending the token to the result if (token) { - // remapping token type into style - // (some types are highlighted similarly) + // map token type into class el[appendChild]( node = _document.createElement('span') - ).setAttribute('class', 'ph'+( - // not formatted - !tokenType ? 0 : - // punctuation - tokenType < 3 ? 2 : - // comments - tokenType > 6 ? 4 : - // regex and strings - tokenType > 3 ? 3 : - // otherwise tokenType == 3, (key)word - // (1 if regexp matches, 0 otherwise) - + /^(a(bstract|lias|nd|rguments|rray|s(m|sert)?|uto)|b(ase|egin|ool(ean)?|reak|yte)|c(ase|atch|har|hecked|lass|lone|ompl|onst|ontinue)|de(bugger|cimal|clare|f(ault|er)?|init|l(egate|ete)?)|do|double|e(cho|ls?if|lse(if)?|nd|nsure|num|vent|x(cept|ec|p(licit|ort)|te(nds|nsion|rn)))|f(allthrough|alse|inal(ly)?|ixed|loat|or(each)?|riend|rom|unc(tion)?)|global|goto|guard|i(f|mp(lements|licit|ort)|n(it|clude(_once)?|line|out|stanceof|t(erface|ernal)?)?|s)|l(ambda|et|ock|ong)|m(icrolight|odule|utable)|NaN|n(amespace|ative|ext|ew|il|ot|ull)|o(bject|perator|r|ut|verride)|p(ackage|arams|rivate|rotected|rotocol|ublic)|r(aise|e(adonly|do|f|gister|peat|quire(_once)?|scue|strict|try|turn))|s(byte|ealed|elf|hort|igned|izeof|tatic|tring|truct|ubscript|uper|ynchronized|witch)|t(emplate|hen|his|hrows?|ransient|rue|ry|ype(alias|def|id|name|of))|u(n(checked|def(ined)?|ion|less|signed|til)|se|sing)|v(ar|irtual|oid|olatile)|w(char_t|hen|here|hile|ith)|xor|yield)$/[test](token) - )); + ).setAttribute('class', 'ph'+tokenType); node[appendChild](_document.createTextNode(token)); } - // saving the previous token type - // (skipping whitespaces and comments) - lastTokenType = - (tokenType && tokenType < 7) ? - tokenType : lastTokenType; - // initializing a new token token = ''; // determining the new token type (going up the // list until matching a token type start // condition) - tokenType = 11; + tokenType = 5; while (![ - 1, // 0: whitespace - // 1: operator or braces - /[\/{}[(\-+*=<>:;|\\.,?!&@~]/[test](chr), - /[\])]/[test](chr), // 2: closing brace - /[$\w]/[test](chr), // 3: (key)word - chr == '/' && // 4: regex - // previous token was an - // opening brace or an - // operator (otherwise - // division, not a regex) - (lastTokenType < 2) && - // workaround for xml - // closing tags - prev1 != '<', - chr == '"', // 5: string with " - chr == "'", // 6: string with ' - // 7: xml comment - chr+next1+text[pos+1]+text[pos+2] == '