parse.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. 'use strict';
  2. var SyntaxParseError = require('./error').SyntaxParseError;
  3. var TAB = 9;
  4. var N = 10;
  5. var F = 12;
  6. var R = 13;
  7. var SPACE = 32;
  8. var EXCLAMATIONMARK = 33; // !
  9. var NUMBERSIGN = 35; // #
  10. var PERCENTSIGN = 37; // %
  11. var AMPERSAND = 38; // &
  12. var APOSTROPHE = 39; // '
  13. var LEFTPARENTHESIS = 40; // (
  14. var RIGHTPARENTHESIS = 41; // )
  15. var ASTERISK = 42; // *
  16. var PLUSSIGN = 43; // +
  17. var COMMA = 44; // ,
  18. var SOLIDUS = 47; // /
  19. var LESSTHANSIGN = 60; // <
  20. var GREATERTHANSIGN = 62; // >
  21. var QUESTIONMARK = 63; // ?
  22. var LEFTSQUAREBRACKET = 91; // [
  23. var RIGHTSQUAREBRACKET = 93; // ]
  24. var LEFTCURLYBRACKET = 123; // {
  25. var VERTICALLINE = 124; // |
  26. var RIGHTCURLYBRACKET = 125; // }
  27. var COMBINATOR_PRECEDENCE = {
  28. ' ': 1,
  29. '&&': 2,
  30. '||': 3,
  31. '|': 4
  32. };
  33. var MULTIPLIER_DEFAULT = {
  34. comma: false,
  35. min: 1,
  36. max: 1
  37. };
  38. var MULTIPLIER_ZERO_OR_MORE = {
  39. comma: false,
  40. min: 0,
  41. max: 0
  42. };
  43. var MULTIPLIER_ONE_OR_MORE = {
  44. comma: false,
  45. min: 1,
  46. max: 0
  47. };
  48. var MULTIPLIER_ONE_OR_MORE_COMMA_SEPARATED = {
  49. comma: true,
  50. min: 1,
  51. max: 0
  52. };
  53. var MULTIPLIER_ZERO_OR_ONE = {
  54. comma: false,
  55. min: 0,
  56. max: 1
  57. };
  58. var NAME_CHAR = (function() {
  59. var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128);
  60. for (var i = 0; i < 128; i++) {
  61. array[i] = /[a-zA-Z0-9\-]/.test(String.fromCharCode(i)) ? 1 : 0;
  62. }
  63. return array;
  64. })();
  65. var Tokenizer = function(str) {
  66. this.str = str;
  67. this.pos = 0;
  68. };
  69. Tokenizer.prototype = {
  70. charCode: function() {
  71. return this.pos < this.str.length ? this.str.charCodeAt(this.pos) : 0;
  72. },
  73. nextCharCode: function() {
  74. return this.pos + 1 < this.str.length ? this.str.charCodeAt(this.pos + 1) : 0;
  75. },
  76. substringToPos: function(end) {
  77. return this.str.substring(this.pos, this.pos = end);
  78. },
  79. eat: function(code) {
  80. if (this.charCode() !== code) {
  81. error(this, this.pos, 'Expect `' + String.fromCharCode(code) + '`');
  82. }
  83. this.pos++;
  84. }
  85. };
  86. function scanSpaces(tokenizer) {
  87. var end = tokenizer.pos + 1;
  88. for (; end < tokenizer.str.length; end++) {
  89. var code = tokenizer.str.charCodeAt(end);
  90. if (code !== R && code !== N && code !== F && code !== SPACE && code !== TAB) {
  91. break;
  92. }
  93. }
  94. return tokenizer.substringToPos(end);
  95. }
  96. function scanWord(tokenizer) {
  97. var end = tokenizer.pos;
  98. for (; end < tokenizer.str.length; end++) {
  99. var code = tokenizer.str.charCodeAt(end);
  100. if (code >= 128 || NAME_CHAR[code] === 0) {
  101. break;
  102. }
  103. }
  104. if (tokenizer.pos === end) {
  105. error(tokenizer, tokenizer.pos, 'Expect a keyword');
  106. }
  107. return tokenizer.substringToPos(end);
  108. }
  109. function scanNumber(tokenizer) {
  110. var end = tokenizer.pos;
  111. for (; end < tokenizer.str.length; end++) {
  112. var code = tokenizer.str.charCodeAt(end);
  113. if (code < 48 || code > 57) {
  114. break;
  115. }
  116. }
  117. if (tokenizer.pos === end) {
  118. error(tokenizer, tokenizer.pos, 'Expect a number');
  119. }
  120. return tokenizer.substringToPos(end);
  121. }
  122. function scanString(tokenizer) {
  123. var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1);
  124. if (end === -1) {
  125. error(tokenizer, tokenizer.str.length, 'Expect a quote');
  126. }
  127. return tokenizer.substringToPos(end + 1);
  128. }
  129. function readMultiplierRange(tokenizer, comma) {
  130. var min = null;
  131. var max = null;
  132. tokenizer.eat(LEFTCURLYBRACKET);
  133. min = scanNumber(tokenizer);
  134. if (tokenizer.charCode() === COMMA) {
  135. tokenizer.pos++;
  136. if (tokenizer.charCode() !== RIGHTCURLYBRACKET) {
  137. max = scanNumber(tokenizer);
  138. }
  139. } else {
  140. max = min;
  141. }
  142. tokenizer.eat(RIGHTCURLYBRACKET);
  143. return {
  144. comma: comma,
  145. min: Number(min),
  146. max: max ? Number(max) : 0
  147. };
  148. }
  149. function readMultiplier(tokenizer) {
  150. switch (tokenizer.charCode()) {
  151. case ASTERISK:
  152. tokenizer.pos++;
  153. return MULTIPLIER_ZERO_OR_MORE;
  154. case PLUSSIGN:
  155. tokenizer.pos++;
  156. return MULTIPLIER_ONE_OR_MORE;
  157. case QUESTIONMARK:
  158. tokenizer.pos++;
  159. return MULTIPLIER_ZERO_OR_ONE;
  160. case NUMBERSIGN:
  161. tokenizer.pos++;
  162. if (tokenizer.charCode() !== LEFTCURLYBRACKET) {
  163. return MULTIPLIER_ONE_OR_MORE_COMMA_SEPARATED;
  164. }
  165. return readMultiplierRange(tokenizer, true);
  166. case LEFTCURLYBRACKET:
  167. return readMultiplierRange(tokenizer, false);
  168. }
  169. return MULTIPLIER_DEFAULT;
  170. }
  171. function maybeMultiplied(tokenizer, node) {
  172. var multiplier = readMultiplier(tokenizer);
  173. if (multiplier !== MULTIPLIER_DEFAULT) {
  174. return {
  175. type: 'Group',
  176. terms: [node],
  177. combinator: '|', // `|` combinator is simplest in implementation (and therefore faster)
  178. disallowEmpty: false,
  179. multiplier: multiplier,
  180. explicit: false
  181. };
  182. }
  183. return node;
  184. }
  185. function readProperty(tokenizer) {
  186. var name;
  187. tokenizer.eat(LESSTHANSIGN);
  188. tokenizer.eat(APOSTROPHE);
  189. name = scanWord(tokenizer);
  190. tokenizer.eat(APOSTROPHE);
  191. tokenizer.eat(GREATERTHANSIGN);
  192. return maybeMultiplied(tokenizer, {
  193. type: 'Property',
  194. name: name
  195. });
  196. }
  197. function readType(tokenizer) {
  198. var name;
  199. tokenizer.eat(LESSTHANSIGN);
  200. name = scanWord(tokenizer);
  201. if (tokenizer.charCode() === LEFTPARENTHESIS &&
  202. tokenizer.nextCharCode() === RIGHTPARENTHESIS) {
  203. tokenizer.pos += 2;
  204. name += '()';
  205. }
  206. tokenizer.eat(GREATERTHANSIGN);
  207. return maybeMultiplied(tokenizer, {
  208. type: 'Type',
  209. name: name
  210. });
  211. }
  212. function readKeywordOrFunction(tokenizer) {
  213. var children = null;
  214. var name;
  215. name = scanWord(tokenizer);
  216. if (tokenizer.charCode() === LEFTPARENTHESIS) {
  217. tokenizer.pos++;
  218. children = readImplicitGroup(tokenizer);
  219. tokenizer.eat(RIGHTPARENTHESIS);
  220. return maybeMultiplied(tokenizer, {
  221. type: 'Function',
  222. name: name,
  223. children: children
  224. });
  225. }
  226. return maybeMultiplied(tokenizer, {
  227. type: 'Keyword',
  228. name: name
  229. });
  230. }
  231. function regroupTerms(terms, combinators) {
  232. function createGroup(terms, combinator) {
  233. return {
  234. type: 'Group',
  235. terms: terms,
  236. combinator: combinator,
  237. disallowEmpty: false,
  238. multiplier: MULTIPLIER_DEFAULT,
  239. explicit: false
  240. };
  241. }
  242. combinators = Object.keys(combinators).sort(function(a, b) {
  243. return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b];
  244. });
  245. while (combinators.length > 0) {
  246. var combinator = combinators.shift();
  247. for (var i = 0, subgroupStart = 0; i < terms.length; i++) {
  248. var term = terms[i];
  249. if (term.type === 'Combinator') {
  250. if (term.value === combinator) {
  251. if (subgroupStart === -1) {
  252. subgroupStart = i - 1;
  253. }
  254. terms.splice(i, 1);
  255. i--;
  256. } else {
  257. if (subgroupStart !== -1 && i - subgroupStart > 1) {
  258. terms.splice(
  259. subgroupStart,
  260. i - subgroupStart,
  261. createGroup(terms.slice(subgroupStart, i), combinator)
  262. );
  263. i = subgroupStart + 1;
  264. }
  265. subgroupStart = -1;
  266. }
  267. }
  268. }
  269. if (subgroupStart !== -1 && combinators.length) {
  270. terms.splice(
  271. subgroupStart,
  272. i - subgroupStart,
  273. createGroup(terms.slice(subgroupStart, i), combinator)
  274. );
  275. }
  276. }
  277. return combinator;
  278. }
  279. function readImplicitGroup(tokenizer) {
  280. var terms = [];
  281. var combinators = {};
  282. var token;
  283. var prevToken = null;
  284. var prevTokenPos = tokenizer.pos;
  285. while (token = peek(tokenizer)) {
  286. if (token.type !== 'Spaces') {
  287. if (token.type === 'Combinator') {
  288. // check for combinator in group beginning and double combinator sequence
  289. if (prevToken === null || prevToken.type === 'Combinator') {
  290. error(tokenizer, prevTokenPos, 'Unexpected combinator');
  291. }
  292. combinators[token.value] = true;
  293. } else if (prevToken !== null && prevToken.type !== 'Combinator') {
  294. combinators[' '] = true; // a b
  295. terms.push({
  296. type: 'Combinator',
  297. value: ' '
  298. });
  299. }
  300. terms.push(token);
  301. prevToken = token;
  302. prevTokenPos = tokenizer.pos;
  303. }
  304. }
  305. // check for combinator in group ending
  306. if (prevToken !== null && prevToken.type === 'Combinator') {
  307. error(tokenizer, tokenizer.pos - prevTokenPos, 'Unexpected combinator');
  308. }
  309. return {
  310. type: 'Group',
  311. terms: terms,
  312. combinator: regroupTerms(terms, combinators) || ' ',
  313. disallowEmpty: false,
  314. multiplier: MULTIPLIER_DEFAULT,
  315. explicit: false
  316. };
  317. }
  318. function readGroup(tokenizer) {
  319. var result;
  320. tokenizer.eat(LEFTSQUAREBRACKET);
  321. result = readImplicitGroup(tokenizer);
  322. tokenizer.eat(RIGHTSQUAREBRACKET);
  323. result.explicit = true;
  324. result.multiplier = readMultiplier(tokenizer);
  325. if (tokenizer.charCode() === EXCLAMATIONMARK) {
  326. tokenizer.pos++;
  327. result.disallowEmpty = true;
  328. }
  329. return result;
  330. }
  331. function peek(tokenizer) {
  332. var code = tokenizer.charCode();
  333. if (code < 128 && NAME_CHAR[code] === 1) {
  334. return readKeywordOrFunction(tokenizer);
  335. }
  336. switch (code) {
  337. case LEFTSQUAREBRACKET:
  338. return readGroup(tokenizer);
  339. case LESSTHANSIGN:
  340. if (tokenizer.nextCharCode() === APOSTROPHE) {
  341. return readProperty(tokenizer);
  342. } else {
  343. return readType(tokenizer);
  344. }
  345. case VERTICALLINE:
  346. return {
  347. type: 'Combinator',
  348. value: tokenizer.substringToPos(tokenizer.nextCharCode() === VERTICALLINE ? tokenizer.pos + 2 : tokenizer.pos + 1)
  349. };
  350. case AMPERSAND:
  351. tokenizer.pos++;
  352. tokenizer.eat(AMPERSAND);
  353. return {
  354. type: 'Combinator',
  355. value: '&&'
  356. };
  357. case COMMA:
  358. tokenizer.pos++;
  359. return {
  360. type: 'Comma',
  361. value: ','
  362. };
  363. case SOLIDUS:
  364. tokenizer.pos++;
  365. return {
  366. type: 'Slash',
  367. value: '/'
  368. };
  369. case PERCENTSIGN: // looks like exception, needs for attr()'s <type-or-unit>
  370. tokenizer.pos++;
  371. return {
  372. type: 'Percent',
  373. value: '%'
  374. };
  375. case LEFTPARENTHESIS:
  376. tokenizer.pos++;
  377. var children = readImplicitGroup(tokenizer);
  378. tokenizer.eat(RIGHTPARENTHESIS);
  379. return {
  380. type: 'Parentheses',
  381. children: children
  382. };
  383. case APOSTROPHE:
  384. return {
  385. type: 'String',
  386. value: scanString(tokenizer)
  387. };
  388. case SPACE:
  389. case TAB:
  390. case N:
  391. case R:
  392. case F:
  393. return {
  394. type: 'Spaces',
  395. value: scanSpaces(tokenizer)
  396. };
  397. }
  398. }
  399. function error(tokenizer, pos, msg) {
  400. throw new SyntaxParseError(msg || 'Unexpected input', tokenizer.str, pos);
  401. }
  402. function parse(str) {
  403. var tokenizer = new Tokenizer(str);
  404. var result = readImplicitGroup(tokenizer);
  405. if (tokenizer.pos !== str.length) {
  406. error(tokenizer, tokenizer.pos);
  407. }
  408. // reduce redundant groups with single group term
  409. if (result.terms.length === 1 && result.terms[0].type === 'Group') {
  410. result = result.terms[0];
  411. }
  412. return result;
  413. }
  414. // warm up parse to elimitate code branches that never execute
  415. // fix soft deoptimizations (insufficient type feedback)
  416. parse('[a&&<b>#|<\'c\'>*||e(){2,} f{2} /,(% g#{1,2})]!');
  417. module.exports = parse;