123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- 'use strict';
- var Tokenizer = require('../tokenizer'),
- foreignContent = require('../common/foreign_content'),
- UNICODE = require('../common/unicode'),
- HTML = require('../common/html');
- //Aliases
- var $ = HTML.TAG_NAMES,
- NS = HTML.NAMESPACES;
- //ParserFeedbackSimulator
- //Simulates adjustment of the Tokenizer which performed by standard parser during tree construction.
- var ParserFeedbackSimulator = module.exports = function (tokenizer) {
- this.tokenizer = tokenizer;
- this.namespaceStack = [];
- this.namespaceStackTop = -1;
- this._enterNamespace(NS.HTML);
- };
- ParserFeedbackSimulator.prototype.getNextToken = function () {
- var token = this.tokenizer.getNextToken();
- if (token.type === Tokenizer.START_TAG_TOKEN)
- this._handleStartTagToken(token);
- else if (token.type === Tokenizer.END_TAG_TOKEN)
- this._handleEndTagToken(token);
- else if (token.type === Tokenizer.NULL_CHARACTER_TOKEN && this.inForeignContent) {
- token.type = Tokenizer.CHARACTER_TOKEN;
- token.chars = UNICODE.REPLACEMENT_CHARACTER;
- }
- else if (this.skipNextNewLine) {
- if (token.type !== Tokenizer.HIBERNATION_TOKEN)
- this.skipNextNewLine = false;
- if (token.type === Tokenizer.WHITESPACE_CHARACTER_TOKEN && token.chars[0] === '\n') {
- if (token.chars.length === 1)
- return this.getNextToken();
- token.chars = token.chars.substr(1);
- }
- }
- return token;
- };
- //Namespace stack mutations
- ParserFeedbackSimulator.prototype._enterNamespace = function (namespace) {
- this.namespaceStackTop++;
- this.namespaceStack.push(namespace);
- this.inForeignContent = namespace !== NS.HTML;
- this.currentNamespace = namespace;
- this.tokenizer.allowCDATA = this.inForeignContent;
- };
- ParserFeedbackSimulator.prototype._leaveCurrentNamespace = function () {
- this.namespaceStackTop--;
- this.namespaceStack.pop();
- this.currentNamespace = this.namespaceStack[this.namespaceStackTop];
- this.inForeignContent = this.currentNamespace !== NS.HTML;
- this.tokenizer.allowCDATA = this.inForeignContent;
- };
- //Token handlers
- ParserFeedbackSimulator.prototype._ensureTokenizerMode = function (tn) {
- if (tn === $.TEXTAREA || tn === $.TITLE)
- this.tokenizer.state = Tokenizer.MODE.RCDATA;
- else if (tn === $.PLAINTEXT)
- this.tokenizer.state = Tokenizer.MODE.PLAINTEXT;
- else if (tn === $.SCRIPT)
- this.tokenizer.state = Tokenizer.MODE.SCRIPT_DATA;
- else if (tn === $.STYLE || tn === $.IFRAME || tn === $.XMP ||
- tn === $.NOEMBED || tn === $.NOFRAMES || tn === $.NOSCRIPT)
- this.tokenizer.state = Tokenizer.MODE.RAWTEXT;
- };
- ParserFeedbackSimulator.prototype._handleStartTagToken = function (token) {
- var tn = token.tagName;
- if (tn === $.SVG)
- this._enterNamespace(NS.SVG);
- else if (tn === $.MATH)
- this._enterNamespace(NS.MATHML);
- if (this.inForeignContent) {
- if (foreignContent.causesExit(token)) {
- this._leaveCurrentNamespace();
- return;
- }
- var currentNs = this.currentNamespace;
- if (currentNs === NS.MATHML)
- foreignContent.adjustTokenMathMLAttrs(token);
- else if (currentNs === NS.SVG) {
- foreignContent.adjustTokenSVGTagName(token);
- foreignContent.adjustTokenSVGAttrs(token);
- }
- foreignContent.adjustTokenXMLAttrs(token);
- tn = token.tagName;
- if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs))
- this._enterNamespace(NS.HTML);
- }
- else {
- if (tn === $.PRE || tn === $.TEXTAREA || tn === $.LISTING)
- this.skipNextNewLine = true;
- else if (tn === $.IMAGE)
- token.tagName = $.IMG;
- this._ensureTokenizerMode(tn);
- }
- };
- ParserFeedbackSimulator.prototype._handleEndTagToken = function (token) {
- var tn = token.tagName;
- if (!this.inForeignContent) {
- var previousNs = this.namespaceStack[this.namespaceStackTop - 1];
- if (previousNs === NS.SVG && foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn])
- tn = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP[tn];
- //NOTE: check for exit from integration point
- if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs))
- this._leaveCurrentNamespace();
- }
- else if (tn === $.SVG && this.currentNamespace === NS.SVG ||
- tn === $.MATH && this.currentNamespace === NS.MATHML)
- this._leaveCurrentNamespace();
- // NOTE: adjust end tag name as well for consistency
- if (this.currentNamespace === NS.SVG)
- foreignContent.adjustTokenSVGTagName(token);
- };
|