summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBlaise2022-10-28 20:59:02 -0500
committerBlaise2022-10-28 20:59:02 -0500
commitb5eda297af9b7984cf29c3c89ca0fa8fa433c093 (patch)
tree2c6733a5e83467c095c151066c6a43fa7722c4da
parenta5105ae3d6a252f16eded4d281c4b0adfad4ccd7 (diff)
Update tokens for prose, cleanup
-rw-r--r--proselight.js112
1 files changed, 24 insertions, 88 deletions
diff --git a/proselight.js b/proselight.js
index 73222b1..c79b8ce 100644
--- a/proselight.js
+++ b/proselight.js
@@ -26,7 +26,7 @@
26 26
27 i, 27 i,
28 microlighted, 28 microlighted,
29 el; // current microlighted element to run through 29 el; // current microlighted element to run through
30 30
31 31
32 32
@@ -36,129 +36,65 @@
36 36
37 for (i = 0; el = microlighted[i++];) { 37 for (i = 0; el = microlighted[i++];) {
38 var text = el.textContent, 38 var text = el.textContent,
39 pos = 0, // current position 39 pos = 0, // current position
40 next1 = text[0], // next character 40 next1 = text[0], // next character
41 chr = 1, // current character 41 chr = 1, // current character
42 prev1, // previous character 42 prev1, // previous character
43 prev2, // the one before the previous 43 prev2, // the one before the previous
44 token = // current token content 44 token = // current token content
45 el.innerHTML = '', // (and cleaning the node) 45 el.innerHTML = '', // (and cleaning the node)
46 46
47 // current token type: 47 // current token type:
48 // 0: anything else (whitespaces / newlines)
49 // 1: operator or brace
50 // 2: closing braces (after which '/' is division not regex)
51 // 3: (key)word
52 // 4: regex
53 // 5: string starting with "
54 // 6: string starting with '
55 // 7: xml comment <!-- -->
56 // 8: multiline comment /* */
57 // 9: single-line comment starting with two slashes //
58 // 10: single-line comment starting with hash #
59 tokenType = 0, 48 tokenType = 0,
60 49
61 // kept to determine between regex and division
62 lastTokenType,
63 // flag determining if token is multi-character 50 // flag determining if token is multi-character
64 multichar, 51 multichar,
65 node; 52 node;
66 53
67 // running through characters and highlighting 54 // running through characters and highlighting
68 while (prev2 = prev1, 55 while (prev2 = prev1, prev1 = chr) {
69 // escaping if needed (with except for comments)
70 // pervious character will not be therefore
71 // recognized as a token finalize condition
72 prev1 = tokenType < 7 && prev1 == '\\' ? 1 : chr
73 ) {
74 chr = next1; 56 chr = next1;
75 next1=text[++pos]; 57 next1=text[++pos];
76 multichar = token.length > 1; 58 multichar = token.length > 1;
77 59
78 // checking if current token should be finalized 60 // checking if current token should be finalized
79 if (!chr || // end of content 61 if (!chr || // end of content
80 // types 9-10 (single-line comments) end with a
81 // newline
82 (tokenType > 8 && chr == '\n') ||
83 [ // finalize conditions for other token types 62 [ // finalize conditions for other token types
84 // 0: whitespaces 63 // 0: unformatted
85 /\S/[test](chr), // merged together 64 /[":;,\\.?!\])\/{}[(|]/[test](chr),
86 // 1: operators 65 // 1: parentesis or braces
87 1, // consist of a single character 66 1, // consist of a single character
88 // 2: braces 67 // 2: terminators
89 1, // consist of a single character 68 1, // consist of a single character
90 // 3: (key)word 69 // 3: separators
91 !/[$\w]/[test](chr), 70 1, // consist of a single character
92 // 4: regex 71 // 4: quotes
93 (prev1 == '/' || prev1 == '\n') && multichar,
94 // 5: string with "
95 prev1 == '"' && multichar, 72 prev1 == '"' && multichar,
96 // 6: string with '
97 prev1 == "'" && multichar,
98 // 7: xml comment
99 text[pos-4]+prev2+prev1 == '-->',
100 // 8: multiline comment
101 prev2+prev1 == '*/'
102 ][tokenType] 73 ][tokenType]
103 ) { 74 ) {
104 // appending the token to the result 75 // appending the token to the result
105 if (token) { 76 if (token) {
106 // remapping token type into style 77 // map token type into class
107 // (some types are highlighted similarly)
108 el[appendChild]( 78 el[appendChild](
109 node = _document.createElement('span') 79 node = _document.createElement('span')
110 ).setAttribute('class', 'ph'+( 80 ).setAttribute('class', 'ph'+tokenType);
111 // not formatted
112 !tokenType ? 0 :
113 // punctuation
114 tokenType < 3 ? 2 :
115 // comments
116 tokenType > 6 ? 4 :
117 // regex and strings
118 tokenType > 3 ? 3 :
119 // otherwise tokenType == 3, (key)word
120 // (1 if regexp matches, 0 otherwise)
121 + /^(a(bstract|lias|nd|rguments|rray|s(m|sert)?|uto)|b(ase|egin|ool(ean)?|reak|yte)|c(ase|atch|har|hecked|lass|lone|ompl|onst|ontinue)|de(bugger|cimal|clare|f(ault|er)?|init|l(egate|ete)?)|do|double|e(cho|ls?if|lse(if)?|nd|nsure|num|vent|x(cept|ec|p(licit|ort)|te(nds|nsion|rn)))|f(allthrough|alse|inal(ly)?|ixed|loat|or(each)?|riend|rom|unc(tion)?)|global|goto|guard|i(f|mp(lements|licit|ort)|n(it|clude(_once)?|line|out|stanceof|t(erface|ernal)?)?|s)|l(ambda|et|ock|ong)|m(icrolight|odule|utable)|NaN|n(amespace|ative|ext|ew|il|ot|ull)|o(bject|perator|r|ut|verride)|p(ackage|arams|rivate|rotected|rotocol|ublic)|r(aise|e(adonly|do|f|gister|peat|quire(_once)?|scue|strict|try|turn))|s(byte|ealed|elf|hort|igned|izeof|tatic|tring|truct|ubscript|uper|ynchronized|witch)|t(emplate|hen|his|hrows?|ransient|rue|ry|ype(alias|def|id|name|of))|u(n(checked|def(ined)?|ion|less|signed|til)|se|sing)|v(ar|irtual|oid|olatile)|w(char_t|hen|here|hile|ith)|xor|yield)$/[test](token)
122 ));
123 81
124 node[appendChild](_document.createTextNode(token)); 82 node[appendChild](_document.createTextNode(token));
125 } 83 }
126 84
127 // saving the previous token type
128 // (skipping whitespaces and comments)
129 lastTokenType =
130 (tokenType && tokenType < 7) ?
131 tokenType : lastTokenType;
132
133 // initializing a new token 85 // initializing a new token
134 token = ''; 86 token = '';
135 87
136 // determining the new token type (going up the 88 // determining the new token type (going up the
137 // list until matching a token type start 89 // list until matching a token type start
138 // condition) 90 // condition)
139 tokenType = 11; 91 tokenType = 5;
140 while (![ 92 while (![
141 1, // 0: whitespace 93 1, // 0: unformatted
142 // 1: operator or braces 94 /[\])\/{}[(|]/[test](chr), // 1: parenthesis or braces
143 /[\/{}[(\-+*=<>:;|\\.,?!&@~]/[test](chr), 95 /[\\.?!]/[test](chr), // 2: terminators
144 /[\])]/[test](chr), // 2: closing brace 96 /[:;,]/[test](chr), // 3: separators
145 /[$\w]/[test](chr), // 3: (key)word 97 chr == '"', // 4: quotes
146 chr == '/' && // 4: regex
147 // previous token was an
148 // opening brace or an
149 // operator (otherwise
150 // division, not a regex)
151 (lastTokenType < 2) &&
152 // workaround for xml
153 // closing tags
154 prev1 != '<',
155 chr == '"', // 5: string with "
156 chr == "'", // 6: string with '
157 // 7: xml comment
158 chr+next1+text[pos+1]+text[pos+2] == '<!--',
159 chr+next1 == '/*', // 8: multiline comment
160 chr+next1 == '//', // 9: single-line comment
161 chr == '#' // 10: hash-style comment
162 ][--tokenType]); 98 ][--tokenType]);
163 } 99 }
164 100