diff options
Diffstat (limited to 'proselight.js')
-rw-r--r-- | proselight.js | 112 |
1 files changed, 24 insertions, 88 deletions
diff --git a/proselight.js b/proselight.js index 73222b1..c79b8ce 100644 --- a/proselight.js +++ b/proselight.js | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | i, | 27 | i, |
28 | microlighted, | 28 | microlighted, |
29 | el; // current microlighted element to run through | 29 | el; // current microlighted element to run through |
30 | 30 | ||
31 | 31 | ||
32 | 32 | ||
@@ -36,129 +36,65 @@ | |||
36 | 36 | ||
37 | for (i = 0; el = microlighted[i++];) { | 37 | for (i = 0; el = microlighted[i++];) { |
38 | var text = el.textContent, | 38 | var text = el.textContent, |
39 | pos = 0, // current position | 39 | pos = 0, // current position |
40 | next1 = text[0], // next character | 40 | next1 = text[0], // next character |
41 | chr = 1, // current character | 41 | chr = 1, // current character |
42 | prev1, // previous character | 42 | prev1, // previous character |
43 | prev2, // the one before the previous | 43 | prev2, // the one before the previous |
44 | token = // current token content | 44 | token = // current token content |
45 | el.innerHTML = '', // (and cleaning the node) | 45 | el.innerHTML = '', // (and cleaning the node) |
46 | 46 | ||
47 | // current token type: | 47 | // current token type: |
48 | // 0: anything else (whitespaces / newlines) | ||
49 | // 1: operator or brace | ||
50 | // 2: closing braces (after which '/' is division not regex) | ||
51 | // 3: (key)word | ||
52 | // 4: regex | ||
53 | // 5: string starting with " | ||
54 | // 6: string starting with ' | ||
55 | // 7: xml comment <!-- --> | ||
56 | // 8: multiline comment /* */ | ||
57 | // 9: single-line comment starting with two slashes // | ||
58 | // 10: single-line comment starting with hash # | ||
59 | tokenType = 0, | 48 | tokenType = 0, |
60 | 49 | ||
61 | // kept to determine between regex and division | ||
62 | lastTokenType, | ||
63 | // flag determining if token is multi-character | 50 | // flag determining if token is multi-character |
64 | multichar, | 51 | multichar, |
65 | node; | 52 | node; |
66 | 53 | ||
67 | // running through characters and highlighting | 54 | // running through characters and highlighting |
68 | while (prev2 = prev1, | 55 | while (prev2 = prev1, prev1 = chr) { |
69 | // escaping if needed (with except for comments) | ||
70 | // pervious character will not be therefore | ||
71 | // recognized as a token finalize condition | ||
72 | prev1 = tokenType < 7 && prev1 == '\\' ? 1 : chr | ||
73 | ) { | ||
74 | chr = next1; | 56 | chr = next1; |
75 | next1=text[++pos]; | 57 | next1=text[++pos]; |
76 | multichar = token.length > 1; | 58 | multichar = token.length > 1; |
77 | 59 | ||
78 | // checking if current token should be finalized | 60 | // checking if current token should be finalized |
79 | if (!chr || // end of content | 61 | if (!chr || // end of content |
80 | // types 9-10 (single-line comments) end with a | ||
81 | // newline | ||
82 | (tokenType > 8 && chr == '\n') || | ||
83 | [ // finalize conditions for other token types | 62 | [ // finalize conditions for other token types |
84 | // 0: whitespaces | 63 | // 0: unformatted |
85 | /\S/[test](chr), // merged together | 64 | /[":;,\\.?!\])\/{}[(|]/[test](chr), |
86 | // 1: operators | 65 | // 1: parentesis or braces |
87 | 1, // consist of a single character | 66 | 1, // consist of a single character |
88 | // 2: braces | 67 | // 2: terminators |
89 | 1, // consist of a single character | 68 | 1, // consist of a single character |
90 | // 3: (key)word | 69 | // 3: separators |
91 | !/[$\w]/[test](chr), | 70 | 1, // consist of a single character |
92 | // 4: regex | 71 | // 4: quotes |
93 | (prev1 == '/' || prev1 == '\n') && multichar, | ||
94 | // 5: string with " | ||
95 | prev1 == '"' && multichar, | 72 | prev1 == '"' && multichar, |
96 | // 6: string with ' | ||
97 | prev1 == "'" && multichar, | ||
98 | // 7: xml comment | ||
99 | text[pos-4]+prev2+prev1 == '-->', | ||
100 | // 8: multiline comment | ||
101 | prev2+prev1 == '*/' | ||
102 | ][tokenType] | 73 | ][tokenType] |
103 | ) { | 74 | ) { |
104 | // appending the token to the result | 75 | // appending the token to the result |
105 | if (token) { | 76 | if (token) { |
106 | // remapping token type into style | 77 | // map token type into class |
107 | // (some types are highlighted similarly) | ||
108 | el[appendChild]( | 78 | el[appendChild]( |
109 | node = _document.createElement('span') | 79 | node = _document.createElement('span') |
110 | ).setAttribute('class', 'ph'+( | 80 | ).setAttribute('class', 'ph'+tokenType); |
111 | // not formatted | ||
112 | !tokenType ? 0 : | ||
113 | // punctuation | ||
114 | tokenType < 3 ? 2 : | ||
115 | // comments | ||
116 | tokenType > 6 ? 4 : | ||
117 | // regex and strings | ||
118 | tokenType > 3 ? 3 : | ||
119 | // otherwise tokenType == 3, (key)word | ||
120 | // (1 if regexp matches, 0 otherwise) | ||
121 | + /^(a(bstract|lias|nd|rguments|rray|s(m|sert)?|uto)|b(ase|egin|ool(ean)?|reak|yte)|c(ase|atch|har|hecked|lass|lone|ompl|onst|ontinue)|de(bugger|cimal|clare|f(ault|er)?|init|l(egate|ete)?)|do|double|e(cho|ls?if|lse(if)?|nd|nsure|num|vent|x(cept|ec|p(licit|ort)|te(nds|nsion|rn)))|f(allthrough|alse|inal(ly)?|ixed|loat|or(each)?|riend|rom|unc(tion)?)|global|goto|guard|i(f|mp(lements|licit|ort)|n(it|clude(_once)?|line|out|stanceof|t(erface|ernal)?)?|s)|l(ambda|et|ock|ong)|m(icrolight|odule|utable)|NaN|n(amespace|ative|ext|ew|il|ot|ull)|o(bject|perator|r|ut|verride)|p(ackage|arams|rivate|rotected|rotocol|ublic)|r(aise|e(adonly|do|f|gister|peat|quire(_once)?|scue|strict|try|turn))|s(byte|ealed|elf|hort|igned|izeof|tatic|tring|truct|ubscript|uper|ynchronized|witch)|t(emplate|hen|his|hrows?|ransient|rue|ry|ype(alias|def|id|name|of))|u(n(checked|def(ined)?|ion|less|signed|til)|se|sing)|v(ar|irtual|oid|olatile)|w(char_t|hen|here|hile|ith)|xor|yield)$/[test](token) | ||
122 | )); | ||
123 | 81 | ||
124 | node[appendChild](_document.createTextNode(token)); | 82 | node[appendChild](_document.createTextNode(token)); |
125 | } | 83 | } |
126 | 84 | ||
127 | // saving the previous token type | ||
128 | // (skipping whitespaces and comments) | ||
129 | lastTokenType = | ||
130 | (tokenType && tokenType < 7) ? | ||
131 | tokenType : lastTokenType; | ||
132 | |||
133 | // initializing a new token | 85 | // initializing a new token |
134 | token = ''; | 86 | token = ''; |
135 | 87 | ||
136 | // determining the new token type (going up the | 88 | // determining the new token type (going up the |
137 | // list until matching a token type start | 89 | // list until matching a token type start |
138 | // condition) | 90 | // condition) |
139 | tokenType = 11; | 91 | tokenType = 5; |
140 | while (![ | 92 | while (![ |
141 | 1, // 0: whitespace | 93 | 1, // 0: unformatted |
142 | // 1: operator or braces | 94 | /[\])\/{}[(|]/[test](chr), // 1: parenthesis or braces |
143 | /[\/{}[(\-+*=<>:;|\\.,?!&@~]/[test](chr), | 95 | /[\\.?!]/[test](chr), // 2: terminators |
144 | /[\])]/[test](chr), // 2: closing brace | 96 | /[:;,]/[test](chr), // 3: separators |
145 | /[$\w]/[test](chr), // 3: (key)word | 97 | chr == '"', // 4: quotes |
146 | chr == '/' && // 4: regex | ||
147 | // previous token was an | ||
148 | // opening brace or an | ||
149 | // operator (otherwise | ||
150 | // division, not a regex) | ||
151 | (lastTokenType < 2) && | ||
152 | // workaround for xml | ||
153 | // closing tags | ||
154 | prev1 != '<', | ||
155 | chr == '"', // 5: string with " | ||
156 | chr == "'", // 6: string with ' | ||
157 | // 7: xml comment | ||
158 | chr+next1+text[pos+1]+text[pos+2] == '<!--', | ||
159 | chr+next1 == '/*', // 8: multiline comment | ||
160 | chr+next1 == '//', // 9: single-line comment | ||
161 | chr == '#' // 10: hash-style comment | ||
162 | ][--tokenType]); | 98 | ][--tokenType]); |
163 | } | 99 | } |
164 | 100 | ||