preprocessor.js 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. 'use strict';
  2. var UNICODE = require('../common/unicode');
  3. //Aliases
  4. var $ = UNICODE.CODE_POINTS;
  5. //Utils
  6. //OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
  7. //this functions if they will be situated in another module due to context switch.
  8. //Always perform inlining check before modifying this functions ('node --trace-inlining').
  9. function isSurrogatePair(cp1, cp2) {
  10. return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF;
  11. }
  12. function getSurrogatePairCodePoint(cp1, cp2) {
  13. return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2;
  14. }
  15. //Const
  16. var DEFAULT_BUFFER_WATERLINE = 1 << 16;
  17. //Preprocessor
  18. //NOTE: HTML input preprocessing
  19. //(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
  20. var Preprocessor = module.exports = function () {
  21. this.html = null;
  22. this.pos = -1;
  23. this.lastGapPos = -1;
  24. this.lastCharPos = -1;
  25. this.gapStack = [];
  26. this.skipNextNewLine = false;
  27. this.lastChunkWritten = false;
  28. this.endOfChunkHit = false;
  29. this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
  30. };
  31. Preprocessor.prototype.dropParsedChunk = function () {
  32. if (this.pos > this.bufferWaterline) {
  33. this.lastCharPos -= this.pos;
  34. this.html = this.html.substring(this.pos);
  35. this.pos = 0;
  36. this.lastGapPos = -1;
  37. this.gapStack = [];
  38. }
  39. };
  40. Preprocessor.prototype._addGap = function () {
  41. this.gapStack.push(this.lastGapPos);
  42. this.lastGapPos = this.pos;
  43. };
  44. Preprocessor.prototype._processHighRangeCodePoint = function (cp) {
  45. //NOTE: try to peek a surrogate pair
  46. if (this.pos !== this.lastCharPos) {
  47. var nextCp = this.html.charCodeAt(this.pos + 1);
  48. if (isSurrogatePair(cp, nextCp)) {
  49. //NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
  50. this.pos++;
  51. cp = getSurrogatePairCodePoint(cp, nextCp);
  52. //NOTE: add gap that should be avoided during retreat
  53. this._addGap();
  54. }
  55. }
  56. // NOTE: we've hit the end of chunk, stop processing at this point
  57. else if (!this.lastChunkWritten) {
  58. this.endOfChunkHit = true;
  59. return $.EOF;
  60. }
  61. return cp;
  62. };
  63. Preprocessor.prototype.write = function (chunk, isLastChunk) {
  64. if (this.html)
  65. this.html += chunk;
  66. else
  67. this.html = chunk;
  68. this.lastCharPos = this.html.length - 1;
  69. this.endOfChunkHit = false;
  70. this.lastChunkWritten = isLastChunk;
  71. };
  72. Preprocessor.prototype.insertHtmlAtCurrentPos = function (chunk) {
  73. this.html = this.html.substring(0, this.pos + 1) +
  74. chunk +
  75. this.html.substring(this.pos + 1, this.html.length);
  76. this.lastCharPos = this.html.length - 1;
  77. this.endOfChunkHit = false;
  78. };
  79. Preprocessor.prototype.advance = function () {
  80. this.pos++;
  81. if (this.pos > this.lastCharPos) {
  82. if (!this.lastChunkWritten)
  83. this.endOfChunkHit = true;
  84. return $.EOF;
  85. }
  86. var cp = this.html.charCodeAt(this.pos);
  87. //NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
  88. //must be ignored.
  89. if (this.skipNextNewLine && cp === $.LINE_FEED) {
  90. this.skipNextNewLine = false;
  91. this._addGap();
  92. return this.advance();
  93. }
  94. //NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
  95. if (cp === $.CARRIAGE_RETURN) {
  96. this.skipNextNewLine = true;
  97. return $.LINE_FEED;
  98. }
  99. this.skipNextNewLine = false;
  100. //OPTIMIZATION: first perform check if the code point in the allowed range that covers most common
  101. //HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points.
  102. return cp >= 0xD800 ? this._processHighRangeCodePoint(cp) : cp;
  103. };
  104. Preprocessor.prototype.retreat = function () {
  105. if (this.pos === this.lastGapPos) {
  106. this.lastGapPos = this.gapStack.pop();
  107. this.pos--;
  108. }
  109. this.pos--;
  110. };