parser_feedback_simulator.js 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. 'use strict';
  2. var Tokenizer = require('../tokenizer'),
  3. foreignContent = require('../common/foreign_content'),
  4. UNICODE = require('../common/unicode'),
  5. HTML = require('../common/html');
  6. //Aliases
  7. var $ = HTML.TAG_NAMES,
  8. NS = HTML.NAMESPACES;
  9. //ParserFeedbackSimulator
  10. //Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
  11. var ParserFeedbackSimulator = module.exports = function (tokenizer) {
  12. this.tokenizer = tokenizer;
  13. this.namespaceStack = [];
  14. this.namespaceStackTop = -1;
  15. this._enterNamespace(NS.HTML);
  16. };
  17. ParserFeedbackSimulator.prototype.getNextToken = function () {
  18. var token = this.tokenizer.getNextToken();
  19. if (token.type === Tokenizer.START_TAG_TOKEN)
  20. this._handleStartTagToken(token);
  21. else if (token.type === Tokenizer.END_TAG_TOKEN)
  22. this._handleEndTagToken(token);
  23. else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
  24. token.type = Tokenizer.CHARACTER_TOKEN;
  25. token.chars = UNICODE.REPLACEMENT_CHARACTER;
  26. }
  27. else if (this.skipNextNewLine) {
  28. if (token.type !== Tokenizer.HIBERNATION_TOKEN)
  29. this.skipNextNewLine = false;
  30. if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
  31. if (token.chars.length === 1)
  32. return this.getNextToken();
  33. token.chars = token.chars.substr(1);
  34. }
  35. }
  36. return token;
  37. };
  38. //Namespace stack mutations
  39. ParserFeedbackSimulator.prototype._enterNamespace = function (namespace) {
  40. this.namespaceStackTop++;
  41. this.namespaceStack.push(namespace);
  42. this.inForeignContent = namespace !== NS.HTML;
  43. this.currentNamespace = namespace;
  44. this.tokenizer.allowCDATA = this.inForeignContent;
  45. };
  46. ParserFeedbackSimulator.prototype._leaveCurrentNamespace = function () {
  47. this.namespaceStackTop--;
  48. this.namespaceStack.pop();
  49. this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
  50. this.inForeignContent = this.currentNamespace !== NS.HTML;
  51. this.tokenizer.allowCDATA = this.inForeignContent;
  52. };
  53. //Token handlers
  54. ParserFeedbackSimulator.prototype._ensureTokenizerMode = function (tn) {
  55. if (tn === $.TEXTAREA || tn === $.TITLE)
  56. this.tokenizer.state = Tokenizer.MODE.RCDATA;
  57. else if (tn === $.PLAINTEXT)
  58. this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
  59. else if (tn === $.SCRIPT)
  60. this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
  61. else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
  62. tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
  63. this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
  64. };
  65. ParserFeedbackSimulator.prototype._handleStartTagToken = function (token) {
  66. var tn = token.tagName;
  67. if (tn === $.SVG)
  68. this._enterNamespace(NS.SVG);
  69. else if (tn === $.MATH)
  70. this._enterNamespace(NS.MATHML);
  71. if (this.inForeignContent) {
  72. if (foreignContent.causesExit(token)) {
  73. this._leaveCurrentNamespace();
  74. return;
  75. }
  76. var currentNs = this.currentNamespace;
  77. if (currentNs === NS.MATHML)
  78. foreignContent.adjustTokenMathMLAttrs(token);
  79. else if (currentNs === NS.SVG) {
  80. foreignContent.adjustTokenSVGTagName(token);
  81. foreignContent.adjustTokenSVGAttrs(token);
  82. }
  83. foreignContent.adjustTokenXMLAttrs(token);
  84. tn = token.tagName;
  85. if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs))
  86. this._enterNamespace(NS.HTML);
  87. }
  88. else {
  89. if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
  90. this.skipNextNewLine = true;
  91. else if (tn === $.IMAGE)
  92. token.tagName = $.IMG;
  93. this._ensureTokenizerMode(tn);
  94. }
  95. };
  96. ParserFeedbackSimulator.prototype._handleEndTagToken = function (token) {
  97. var tn = token.tagName;
  98. if (!this.inForeignContent) {
  99. var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
  100. if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn])
  101. tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
  102. //NOTE: check for exit from integration point
  103. if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs))
  104. this._leaveCurrentNamespace();
  105. }
  106. else if (tn === $.SVG && this.currentNamespace === NS.SVG ||
  107. tn === $.MATH && this.currentNamespace === NS.MATHML)
  108. this._leaveCurrentNamespace();
  109. // NOTE: adjust end tag name as well for consistency
  110. if (this.currentNamespace === NS.SVG)
  111. foreignContent.adjustTokenSVGTagName(token);
  112. };