1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065 |
- "use strict";
- const { isS, isChar, isNameStartChar, isNameChar, S_LIST, NAME_RE } =
- require("xmlchars/xml/1.0/ed5");
- const { isNCNameStartChar, isNCNameChar, NC_NAME_RE } = require("xmlchars/xmlns/1.0/ed3");
- const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace";
- const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/";
- const rootNS = {
- __proto__: null,
- xml: XML_NAMESPACE,
- xmlns: XMLNS_NAMESPACE,
- };
- const XML_ENTITIES = {
- __proto__: null,
- amp: "&",
- gt: ">",
- lt: "<",
- quot: "\"",
- apos: "'",
- };
- const S_INITIAL = "sInitial"; // initial state
- const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace
- const S_DOCTYPE = "sDoctype"; // <!DOCTYPE
- const S_DOCTYPE_QUOTE = "sDoctypeQuote"; // <!DOCTYPE "//blah
- const S_DTD = "sDTD"; // <!DOCTYPE "//blah" [ ...
- const S_DTD_QUOTED = "sDTDQuoted"; // <!DOCTYPE "//blah" [ "foo
- const S_DTD_OPEN_WAKA = "sDTDOpenWaka";
- const S_DTD_OPEN_WAKA_BANG = "sDTDOpenWakaBang";
- const S_DTD_COMMENT = "sDTDComment"; // <!--
- const S_DTD_COMMENT_ENDING = "sDTDCommentEnding"; // <!-- blah -
- const S_DTD_COMMENT_ENDED = "sDTDCommentEnded"; // <!-- blah --
- const S_DTD_PI = "sDTDPI"; // <?
- const S_DTD_PI_ENDING = "sDTDPIEnding"; // <?hi "there" ?
- const S_TEXT = "sText"; // general stuff
- const S_ENTITY = "sEntity"; // & and such
- const S_OPEN_WAKA = "sOpenWaka"; // <
- const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!...
- const S_COMMENT = "sComment"; // <!--
- const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah -
- const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah --
- const S_CDATA = "sCData"; // <![CDATA[ something
- const S_CDATA_ENDING = "sCDataEnding"; // ]
- const S_CDATA_ENDING_2 = "sCDataEnding2"; // ]]
- const S_PI_FIRST_CHAR = "sPIFirstChar"; // <?hi, first char
- const S_PI_REST = "sPIRest"; // <?hi, rest of the name
- const S_PI_BODY = "sPIBody"; // <?hi there
- const S_PI_ENDING = "sPIEnding"; // <?hi "there" ?
- const S_OPEN_TAG = "sOpenTag"; // <strong
- const S_OPEN_TAG_SLASH = "sOpenTagSlash"; // <strong /
- const S_ATTRIB = "sAttrib"; // <a
- const S_ATTRIB_NAME = "sAttribName"; // <a foo
- const S_ATTRIB_NAME_SAW_WHITE = "sAttribNameSawWhite"; // <a foo _
- const S_ATTRIB_VALUE = "sAttribValue"; // <a foo=
- const S_ATTRIB_VALUE_QUOTED = "sAttribValueQuoted"; // <a foo="bar
- const S_ATTRIB_VALUE_CLOSED = "sAttribValueClosed"; // <a foo="bar"
- const S_ATTRIB_VALUE_UNQUOTED = "sAttribValueUnquoted"; // <a foo=bar
- const S_CLOSE_TAG = "sCloseTag"; // </a
- const S_CLOSE_TAG_SAW_WHITE = "sCloseTagSawWhite"; // </a >
- // These states are internal to sPIBody
- const S_XML_DECL_NAME_START = 1; // <?xml
- const S_XML_DECL_NAME = 2; // <?xml foo
- const S_XML_DECL_EQ = 3; // <?xml foo=
- const S_XML_DECL_VALUE_START = 4; // <?xml foo=
- const S_XML_DECL_VALUE = 5; // <?xml foo="bar"
- /**
- * The list of supported events.
- */
- exports.EVENTS = [
- "text",
- "processinginstruction",
- "doctype",
- "comment",
- "opentagstart",
- "opentag",
- "closetag",
- "cdata",
- "error",
- "end",
- "ready",
- ];
- const NL = 0xA;
- const CR = 0xD;
- const SPACE = 0x20;
- const BANG = 0x21;
- const DQUOTE = 0x22;
- const AMP = 0x26;
- const SQUOTE = 0x27;
- const MINUS = 0x2D;
- const FORWARD_SLASH = 0x2F;
- const SEMICOLON = 0x3B;
- const LESS = 0x3C;
- const EQUAL = 0x3D;
- const GREATER = 0x3E;
- const QUESTION = 0x3F;
- const OPEN_BRACKET = 0x5B;
- const CLOSE_BRACKET = 0x5D;
- function isQuote(c) {
- return c === DQUOTE || c === SQUOTE;
- }
- const QUOTES = [DQUOTE, SQUOTE];
- const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER];
- const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET];
- const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST];
- const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS];
- function nsPairCheck(parser, prefix, uri) {
- switch (prefix) {
- case "xml":
- if (uri !== XML_NAMESPACE) {
- parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`);
- }
- break;
- case "xmlns":
- if (uri !== XMLNS_NAMESPACE) {
- parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`);
- }
- break;
- default:
- }
- switch (uri) {
- case XMLNS_NAMESPACE:
- parser.fail(prefix === "" ?
- `the default namespace may not be set to ${uri}.` :
- `may not assign a prefix (even "xmlns") to the URI \
- ${XMLNS_NAMESPACE}.`);
- break;
- case XML_NAMESPACE:
- switch (prefix) {
- case "xml":
- // Assinging the XML namespace to "xml" is fine.
- break;
- case "":
- parser.fail(`the default namespace may not be set to ${uri}.`);
- break;
- default:
- parser.fail("may not assign the xml namespace to another prefix.");
- }
- break;
- default:
- }
- }
- function nsMappingCheck(parser, mapping) {
- for (const local of Object.keys(mapping)) {
- nsPairCheck(parser, local, mapping[local]);
- }
- }
- function isNCName(name) {
- return NC_NAME_RE.test(name);
- }
- function isName(name) {
- return NAME_RE.test(name);
- }
- const FORBIDDEN_START = 0;
- const FORBIDDEN_BRACKET = 1;
- const FORBIDDEN_BRACKET_BRACKET = 2;
- /**
- * Data structure for an XML tag.
- *
- * @typedef {object} SaxesTag
- *
- * @property {string} name The tag's name. This is the combination of prefix and
- * global name. For instance ``<a:b>`` would have ``"a:b"`` for ``name``.
- *
- * @property {string} prefix The tag's prefix. For instance ``<a:b>`` would have
- * ``"a"`` for ``prefix``. Undefined if we do not track namespaces.
- *
- * @property {string} local The tag's local name. For instance ``<a:b>`` would
- * have ``"b"`` for ``local``. Undefined if we do not track namespaces.
- *
- * @property {string} uri The namespace URI of this tag. Undefined if we do not
- * track namespaces.
- *
- * @property {Object.<string, SaxesAttribute> | Object.<string, string>}
- * attributes A map of attribute name to attributes. If namespaces are tracked,
- * the values in the map are {@link SaxesAttribute SaxesAttribute}
- * objects. Otherwise, they are strings.
- *
- * @property {Object.<string, string>} ns The namespace bindings in effect.
- *
- * @property {boolean} isSelfClosing Whether the tag is
- * self-closing (e.g. ``<foo/>``).
- *
- */
- /**
- * Data structure for an XML attribute
- *
- * @typedef {object} SaxesAttribute
- *
- * @property {string} name The attribute's name. This is the combination of
- * prefix and local name. For instance ``a:b="c"`` would have ``a:b`` for name.
- *
- * @property {string} prefix The attribute's prefix. For instance ``a:b="c"``
- * would have ``"a"`` for ``prefix``.
- *
- * @property {string} local The attribute's local name. For instance ``a:b="c"``
- * would have ``"b"`` for ``local``.
- *
- * @property {string} uri The namespace URI of this attribute.
- *
- * @property {string} value The attribute's value.
- */
- /**
- * @typedef XMLDecl
- *
- * @property {string} [version] The version specified by the XML declaration.
- *
- * @property {string} [encoding] The encoding specified by the XML declaration.
- *
- * @property {string} [standalone] The value of the standalone parameter
- * specified by the XML declaration.
- */
- /**
- * @callback ResolvePrefix
- *
- * @param {string} prefix The prefix to check.
- *
- * @returns {string|undefined} The URI corresponding to the prefix, if any.
- */
- /**
- * @typedef SaxesOptions
- *
- * @property {boolean} [xmlns] Whether to track namespaces. Unset means
- * ``false``.
- *
- * @property {boolean} [fragment] Whether to accept XML fragments. Unset means
- * ``false``.
- *
- * @property {boolean} [additionalNamespaces] A plain object whose key, value
- * pairs define namespaces known before parsing the XML file. It is not legal
- * to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``.
- *
- * @property {ResolvePrefix} [resolvePrefix] A function that will be used if the
- * parser cannot resolve a namespace prefix on its own.
- *
- * @property {boolean} [position] Whether to track positions. Unset means
- * ``true``.
- *
- * @property {string} [fileName] A file name to use for error reporting. Leaving
- * this unset will report a file name of "undefined". "File name" is a loose
- * concept. You could use a URL to some resource, or any descriptive name you
- * like.
- */
- class SaxesParser {
- /**
- * @param {SaxesOptions} opt The parser options.
- */
- constructor(opt) {
- this._init(opt);
- }
- /**
- * Reset the parser state.
- *
- * @private
- */
- _init(opt) {
- this.comment = "";
- this.openWakaBang = "";
- this.text = "";
- this.name = "";
- this.doctype = "";
- this.piTarget = "";
- this.piBody = "";
- this.entity = "";
- this.cdata = "";
- this.xmlDeclName = "";
- this.xmlDeclValue = "";
- /**
- * The options passed to the constructor of this parser.
- *
- * @type {SaxesOptions}
- */
- this.opt = opt || {};
- /**
- * Indicates whether or not the parser is closed. If ``true``, wait for
- * the ``ready`` event to write again.
- *
- * @type {boolean}
- */
- this.closed = false;
- /**
- * The XML declaration for this document.
- *
- * @type {XMLDecl}
- */
- this.xmlDecl = {
- version: undefined,
- encoding: undefined,
- standalone: undefined,
- };
- this.q = null;
- this.tags = [];
- this.tag = null;
- this.chunk = "";
- this.chunkPosition = 0;
- this.i = 0;
- this.trailingCR = false;
- this.forbiddenState = FORBIDDEN_START;
- /**
- * A map of entity name to expansion.
- *
- * @type {Object.<string, string>}
- */
- this.ENTITIES = Object.create(XML_ENTITIES);
- this.attribList = [];
- // The logic is organized so as to minimize the need to check
- // this.opt.fragment while parsing.
- const fragmentOpt = this.fragmentOpt = !!this.opt.fragment;
- this.state = fragmentOpt ? S_TEXT : S_INITIAL;
- // We want these to be all true if we are dealing with a fragment.
- this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot =
- this.sawRoot = fragmentOpt;
- // An XML declaration is intially possible only when parsing whole
- // documents.
- this.xmlDeclPossible = !fragmentOpt;
- this.piIsXMLDecl = false;
- this.xmlDeclState = S_XML_DECL_NAME_START;
- this.xmlDeclExpects = ["version"];
- this.requiredSeparator = false;
- this.entityReturnState = undefined;
- const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns;
- if (xmlnsOpt) {
- // This is the function we use to perform name checks on PIs and entities.
- // When namespaces are used, colons are not allowed in PI target names or
- // entity names. So the check depends on whether namespaces are used. See:
- //
- // https://www.w3.org/XML/xml-names-19990114-errata.html
- // NE08
- //
- this.nameStartCheck = isNCNameStartChar;
- this.nameCheck = isNCNameChar;
- this.isName = isNCName;
- this.processAttribs = this.processAttribsNS;
- this.pushAttrib = this.pushAttribNS;
- this.ns = Object.assign({ __proto__: null }, rootNS);
- const additional = this.opt.additionalNamespaces;
- if (additional) {
- nsMappingCheck(this, additional);
- Object.assign(this.ns, additional);
- }
- }
- else {
- this.nameStartCheck = isNameStartChar;
- this.nameCheck = isNameChar;
- this.isName = isName;
- this.processAttribs = this.processAttribsPlain;
- this.pushAttrib = this.pushAttribPlain;
- }
- this.trackPosition = this.opt.position !== false;
- /** The line number the parser is currently looking at. */
- this.line = 1;
- /** The column the parser is currently looking at. */
- this.column = 0;
- this.fileName = this.opt.fileName;
- this.onready();
- }
- /** The stream position the parser is currently looking at. */
- get position() {
- return this.chunkPosition + this.i;
- }
- /* eslint-disable class-methods-use-this */
- /**
- * Event handler for text data. The default implementation is a no-op.
- *
- * @param {string} text The text data encountered by the parser.
- *
- */
- ontext() {}
- /**
- * Event handler for processing instructions. The default implementation is a
- * no-op.
- *
- * @param {{target: string, body: string}} data The target and body of
- * the processing instruction.
- */
- onprocessinginstruction() {}
- /**
- * Event handler for doctype. The default implementation is a no-op.
- *
- * @param {string} doctype The doctype contents.
- */
- ondoctype() {}
- /**
- * Event handler for comments. The default implementation is a no-op.
- *
- * @param {string} comment The comment contents.
- */
- oncomment() {}
- /**
- * Event handler for the start of an open tag. This is called as soon as we
- * have a tag name. The default implementation is a no-op.
- *
- * @param {SaxesTag} tag The tag.
- */
- onopentagstart() {}
- /**
- * Event handler for an open tag. This is called when the open tag is
- * complete. (We've encountered the ">" that ends the open tag.) The default
- * implementation is a no-op.
- *
- * @param {SaxesTag} tag The tag.
- */
- onopentag() {}
- /**
- * Event handler for a close tag. Note that for self-closing tags, this is
- * called right after ``onopentag``. The default implementation is a no-op.
- *
- * @param {SaxesTag} tag The tag.
- */
- onclosetag() {}
- /**
- * Event handler for a CDATA section. This is called when ending the
- * CDATA section. The default implementation is a no-op.
- *
- * @param {string} cdata The contents of the CDATA section.
- */
- oncdata() {}
- /**
- * Event handler for the stream end. This is called when the stream has been
- * closed with ``close`` or by passing ``null`` to ``write``. The default
- * implementation is a no-op.
- */
- onend() {}
- /**
- * Event handler indicating parser readiness . This is called when the parser
- * is ready to parse a new document. The default implementation is a no-op.
- */
- onready() {}
- /**
- * Event handler indicating an error. The default implementation throws the
- * error. Override with a no-op handler if you don't want this.
- *
- * @param {Error} err The error that occurred.
- */
- onerror(err) {
- throw new Error(err);
- }
- /* eslint-enable class-methods-use-this */
- /**
- * Report a parsing error. This method is made public so that client code may
- * check for issues that are outside the scope of this project and can report
- * errors.
- *
- * @param {Error} er The error to report.
- *
- * @returns this
- */
- fail(er) {
- const message = (this.trackPosition) ?
- `${this.fileName}:${this.line}:${this.column}: ${er}` : er;
- this.onerror(new Error(message));
- return this;
- }
- /**
- * Write a XML data to the parser.
- *
- * @param {string} chunk The XML data to write.
- *
- * @returns this
- */
- write(chunk) {
- if (this.closed) {
- return this.fail("cannot write after close; assign an onready handler.");
- }
- let end = false;
- if (chunk === null) {
- end = true;
- chunk = "";
- }
- if (typeof chunk === "object") {
- chunk = chunk.toString();
- }
- // We checked if performing a pre-decomposition of the string into an array
- // of single complete characters (``Array.from(chunk)``) would be faster
- // than the current repeated calls to ``codePointAt``. As of August 2018, it
- // isn't. (There may be Node-specific code that would perform faster than
- // ``Array.from`` but don't want to be dependent on Node.)
- let limit = chunk.length;
- if (this.trailingCR) {
- // The previous chunk had a trailing cr. We need to handle it now.
- chunk = `\r${chunk}`;
- }
- if (!end && chunk[limit - 1] === CR) {
- // The chunk ends with a trailing CR. We cannot know how to handle it
- // until we get the next chunk or the end of the stream. So save it for
- // later.
- limit--;
- this.trailingCR = true;
- }
- this.limit = limit;
- this.chunk = chunk;
- this.i = 0;
- while (this.i < limit) {
- this[this.state]();
- }
- this.chunkPosition += limit;
- return end ? this.end() : this;
- }
- /**
- * Close the current stream. Perform final well-formedness checks and reset
- * the parser tstate.
- *
- * @returns this
- */
- close() {
- return this.write(null);
- }
- /**
- * Get a single code point out of the current chunk. This updates the current
- * position if we do position tracking.
- *
- * @private
- *
- * @returns {number} The character read.
- */
- getCode() {
- const { chunk, i } = this;
- // Using charCodeAt and handling the surrogates ourselves is faster
- // than using codePointAt.
- let code = chunk.charCodeAt(i);
- let skip = 1;
- switch (code) {
- case CR:
- // We may get NaN if we read past the end of the chunk, which is
- // fine.
- if (chunk.charCodeAt(i + 1) === NL) {
- // A \r\n sequence is converted to \n so we have to skip over the next
- // character. We already know it has a size of 1 so ++ is fine here.
- skip++;
- }
- // Otherwise, a \r is just converted to \n, so we don't have to skip
- // ahead.
- // In either case, \r becomes \n.
- code = NL;
- /* yes, fall through */
- case NL:
- this.line++;
- this.column = 0;
- break;
- default:
- this.column++;
- if (code >= 0xD800 && code <= 0xDBFF) {
- code = 0x10000 + ((code - 0xD800) * 0x400) +
- (chunk.charCodeAt(i + 1) - 0xDC00);
- this.column++;
- skip++;
- }
- if (!isChar(code)) {
- this.fail("disallowed character.");
- }
- }
- this.i += skip;
- return code;
- }
- /**
- * @callback CharacterTest
- *
- * @private
- *
- * @param {string} c The character to test.
- *
- * @returns {boolean} ``true`` if the method should continue capturing text,
- * ``false`` otherwise.
- */
- /**
- * Capture characters into a buffer until encountering one of a set of
- * characters.
- *
- * @private
- *
- * @param {number[]} chars An array of codepoints. Encountering a character in
- * the array ends the capture.
- *
- * @param {string} buffer The name of the buffer to save into.
- *
- * @return {number|undefined} The character code that made the capture end, or
- * ``undefined`` if we hit the end of the chunk.
- */
- captureTo(chars, buffer) {
- const { chunk, limit, i: start } = this;
- while (this.i < limit) {
- const c = this.getCode();
- if (chars.includes(c)) {
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start,
- this.i - (c <= 0xFFFF ? 1 : 2));
- return c;
- }
- }
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start);
- return undefined;
- }
- /**
- * Capture characters into a buffer until encountering a character.
- *
- * @private
- *
- * @param {number} char The codepoint that ends the capture.
- *
- * @param {string} buffer The name of the buffer to save into.
- *
- * @return {boolean} ``true`` if we ran into the character. Otherwise, we ran
- * into the end of the current chunk.
- */
- captureToChar(char, buffer) {
- const { chunk, limit, i: start } = this;
- while (this.i < limit) {
- const c = this.getCode();
- if (c === char) {
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start,
- this.i - (c <= 0xFFFF ? 1 : 2));
- return true;
- }
- }
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start);
- return false;
- }
- /**
- * Capture characters that satisfy ``isNameChar`` into the ``name`` field of
- * this parser.
- *
- * @private
- *
- * @return {number|undefined} The character code that made the test fail, or
- * ``undefined`` if we hit the end of the chunk.
- */
- captureNameChars() {
- const { chunk, limit, i: start } = this;
- while (this.i < limit) {
- const c = this.getCode();
- if (!isNameChar(c)) {
- // This is faster than adding codepoints one by one.
- this.name += chunk.substring(start,
- this.i - (c <= 0xFFFF ? 1 : 2));
- return c;
- }
- }
- // This is faster than adding codepoints one by one.
- this.name += chunk.substring(start);
- return undefined;
- }
- /**
- * Capture characters into a buffer while ``this.nameCheck`` run on the
- * character read returns true.
- *
- * @private
- *
- * @param {string} buffer The name of the buffer to save into.
- *
- * @return {number|undefined} The character code that made the test fail, or
- * ``undefined`` if we hit the end of the chunk.
- */
- captureWhileNameCheck(buffer) {
- const { chunk, limit, i: start } = this;
- while (this.i < limit) {
- const c = this.getCode();
- if (!this.nameCheck(c)) {
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start,
- this.i - (c <= 0xFFFF ? 1 : 2));
- return c;
- }
- }
- // This is faster than adding codepoints one by one.
- this[buffer] += chunk.substring(start);
- return undefined;
- }
- /**
- * Skip white spaces.
- *
- * @private
- *
- * @return {string|undefined} The character that ended the skip, or
- * ``undefined`` if we hit the end of the chunk.
- */
- skipSpaces() {
- const { limit } = this;
- while (this.i < limit) {
- const c = this.getCode();
- if (!isS(c)) {
- return c;
- }
- }
- return undefined;
- }
- // STATE HANDLERS
- /** @private */
- sInitial() {
- // We are essentially peeking at the first character of the chunk. Since
- // S_INITIAL can be in effect only when we start working on the first chunk,
- // the index at which we must look is necessarily 0. Note also that the
- // following tests do not depend on decoding surrogates.
- const c = this.chunk.charCodeAt(0);
- // If the initial character is 0xFEFF, ignore it.
- if (c === 0xFEFF) {
- this.i++;
- this.column++;
- }
- else if (isS(c)) {
- this.i++;
- this.column++;
- // An XML declaration cannot appear after initial spaces.
- this.xmlDeclPossible = false;
- }
- this.state = S_BEGIN_WHITESPACE;
- }
- /** @private */
- sBeginWhitespace() {
- const c = this.skipSpaces();
- if (c === LESS) {
- this.state = S_OPEN_WAKA;
- }
- else if (c) {
- // have to process this as a text node.
- // weird, but happens.
- if (!this.reportedTextBeforeRoot) {
- this.fail("text data outside of root node.");
- this.reportedTextBeforeRoot = true;
- }
- this.text = String.fromCodePoint(c);
- this.state = S_TEXT;
- this.xmlDeclPossible = false;
- }
- }
- /** @private */
- sText() {
- //
- // We did try a version of saxes where the S_TEXT state was split in two
- // states: one for text inside the root element, and one for text
- // outside. This was avoiding having to test this.tags.length to decide what
- // implementation to actually use.
- //
- // Peformance testing on gigabyte-size files did not show any advantage to
- // using the two states solution instead of the current one. Conversely, it
- // made the code a bit more complicated elsewhere. For instance, a comment
- // can appear before the root element so when a comment ended it was
- // necessary to determine whether to return to the S_TEXT state or to the
- // new text-outside-root state.
- //
- if (this.tags.length !== 0) {
- this.handleTextInRoot();
- }
- else {
- this.handleTextOutsideRoot();
- }
- }
- /** @private */
- handleTextInRoot() {
- // This is essentially a specialized version of captureTo which is optimized
- // for performing the ]]> check. A previous version of this code, checked
- // ``this.text`` for the presence of ]]>. It simplified the code but was
- // very costly when character data contained a lot of entities to be parsed.
- //
- // Since we are using a specialized loop, we also keep track of the presence
- // of ]]> in text data. The sequence ]]> is forbidden to appear as-is.
- //
- const { chunk, limit, i: start } = this;
- let { forbiddenState } = this;
- let c;
- // eslint-disable-next-line no-labels, no-restricted-syntax
- scanLoop:
- while (this.i < limit) {
- const code = this.getCode();
- switch (code) {
- case LESS:
- this.state = S_OPEN_WAKA;
- c = code;
- forbiddenState = FORBIDDEN_START;
- // eslint-disable-next-line no-labels
- break scanLoop;
- case AMP:
- this.state = S_ENTITY;
- this.entityReturnState = S_TEXT;
- c = code;
- forbiddenState = FORBIDDEN_START;
- // eslint-disable-next-line no-labels
- break scanLoop;
- case CLOSE_BRACKET:
- switch (forbiddenState) {
- case FORBIDDEN_START:
- forbiddenState = FORBIDDEN_BRACKET;
- break;
- case FORBIDDEN_BRACKET:
- forbiddenState = FORBIDDEN_BRACKET_BRACKET;
- break;
- case FORBIDDEN_BRACKET_BRACKET:
- break;
- default:
- throw new Error("impossible state");
- }
- break;
- case GREATER:
- if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) {
- this.fail("the string \"]]>\" is disallowed in char data.");
- }
- forbiddenState = FORBIDDEN_START;
- break;
- default:
- forbiddenState = FORBIDDEN_START;
- }
- }
- this.forbiddenState = forbiddenState;
- // This is faster than adding codepoints one by one.
- this.text += chunk.substring(start,
- c === undefined ? undefined :
- (this.i - (c <= 0xFFFF ? 1 : 2)));
- }
- /** @private */
- handleTextOutsideRoot() {
- // This is essentially a specialized version of captureTo which is optimized
- // for performing the ]]> check. A previous version of this code, checked
- // ``this.text`` for the presence of ]]>. It simplified the code but was
- // very costly when character data contained a lot of entities to be parsed.
- //
- // Since we are using a specialized loop, we also keep track of the presence
- // of non-space characters in the text since these are errors when appearing
- // outside the document root element.
- //
- const { chunk, limit, i: start } = this;
- let nonSpace = false;
- let c;
- // eslint-disable-next-line no-labels, no-restricted-syntax
- outRootLoop:
- while (this.i < limit) {
- const code = this.getCode();
- switch (code) {
- case LESS:
- this.state = S_OPEN_WAKA;
- c = code;
- // eslint-disable-next-line no-labels
- break outRootLoop;
- case AMP:
- this.state = S_ENTITY;
- this.entityReturnState = S_TEXT;
- c = code;
- nonSpace = true;
- // eslint-disable-next-line no-labels
- break outRootLoop;
- default:
- if (!isS(code)) {
- nonSpace = true;
- }
- }
- }
- // This is faster than adding codepoints one by one.
- this.text += chunk.substring(start,
- c === undefined ? undefined :
- (this.i - (c <= 0xFFFF ? 1 : 2)));
- if (!nonSpace) {
- return;
- }
- // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags
- // to avoid reporting errors for every single character that is out of
- // place.
- if (!this.sawRoot && !this.reportedTextBeforeRoot) {
- this.fail("text data outside of root node.");
- this.reportedTextBeforeRoot = true;
- }
- if (this.closedRoot && !this.reportedTextAfterRoot) {
- this.fail("text data outside of root node.");
- this.reportedTextAfterRoot = true;
- }
- }
- /** @private */
- sOpenWaka() {
- const c = this.getCode();
- // either a /, ?, !, or text is coming next.
- if (isNameStartChar(c)) {
- this.state = S_OPEN_TAG;
- this.name = String.fromCodePoint(c);
- this.xmlDeclPossible = false;
- }
- else {
- switch (c) {
- case FORWARD_SLASH:
- this.state = S_CLOSE_TAG;
- this.xmlDeclPossible = false;
- break;
- case BANG:
- this.state = S_OPEN_WAKA_BANG;
- this.openWakaBang = "";
- this.xmlDeclPossible = false;
- break;
- case QUESTION:
- this.state = S_PI_FIRST_CHAR;
- break;
- default:
- this.fail("disallowed character in tag name.");
- this.state = S_TEXT;
- this.xmlDeclPossible = false;
- }
- }
- }
- /** @private */
- sOpenWakaBang() {
- this.openWakaBang += String.fromCodePoint(this.getCode());
- switch (this.openWakaBang) {
- case "[CDATA[":
- if (!this.sawRoot && !this.reportedTextBeforeRoot) {
- this.fail("text data outside of root node.");
- this.reportedTextBeforeRoot = true;
- }
- if (this.closedRoot && !this.reportedTextAfterRoot) {
- this.fail("text data outside of root node.");
- this.reportedTextAfterRoot = true;
- }
- this.state = S_CDATA;
- this.openWakaBang = "";
- break;
- case "--":
- this.state = S_COMMENT;
- this.openWakaBang = "";
- break;
- case "DOCTYPE":
- this.state = S_DOCTYPE;
- if (this.doctype || this.sawRoot) {
- this.fail("inappropriately located doctype declaration.");
- }
- this.openWakaBang = "";
- break;
- default:
- // 7 happens to be the maximum length of the string that can possibly
- // match one of the cases above.
- if (this.openWakaBang.length >= 7) {
- this.fail("incorrect syntax.");
- }
- }
- }
- /** @private */
- sDoctype() {
- const c = this.captureTo(DOCTYPE_TERMINATOR, "doctype");
- if (c === GREATER) {
- this.state = S_TEXT;
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.ondoctype(this.doctype);
- this.doctype = true; // just remember that we saw it.
- }
- else if (c) {
- this.doctype += String.fromCodePoint(c);
- if (c === OPEN_BRACKET) {
- this.state = S_DTD;
- }
- else if (isQuote(c)) {
- this.state = S_DOCTYPE_QUOTE;
- this.q = c;
- }
- }
- }
- /** @private */
- sDoctypeQuote() {
- const { q } = this;
- if (this.captureToChar(q, "doctype")) {
- this.doctype += String.fromCodePoint(q);
- this.q = null;
- this.state = S_DOCTYPE;
- }
- }
- /** @private */
- sDTD() {
- const c = this.captureTo(DTD_TERMINATOR, "doctype");
- if (!c) {
- return;
- }
- this.doctype += String.fromCodePoint(c);
- if (c === CLOSE_BRACKET) {
- this.state = S_DOCTYPE;
- }
- else if (c === LESS) {
- this.state = S_DTD_OPEN_WAKA;
- }
- else if (isQuote(c)) {
- this.state = S_DTD_QUOTED;
- this.q = c;
- }
- }
- /** @private */
- sDTDQuoted() {
- const { q } = this;
- if (this.captureToChar(q, "doctype")) {
- this.doctype += String.fromCodePoint(q);
- this.state = S_DTD;
- this.q = null;
- }
- }
- /** @private */
- sDTDOpenWaka() {
- const c = this.getCode();
- this.doctype += String.fromCodePoint(c);
- switch (c) {
- case BANG:
- this.state = S_DTD_OPEN_WAKA_BANG;
- this.openWakaBang = "";
- break;
- case QUESTION:
- this.state = S_DTD_PI;
- break;
- default:
- this.state = S_DTD;
- }
- }
- /** @private */
- sDTDOpenWakaBang() {
- const char = String.fromCodePoint(this.getCode());
- const owb = this.openWakaBang += char;
- this.doctype += char;
- if (owb !== "-") {
- this.state = owb === "--" ? S_DTD_COMMENT : S_DTD;
- this.openWakaBang = "";
- }
- }
- /** @private */
- sDTDComment() {
- if (this.captureToChar(MINUS, "doctype")) {
- this.doctype += "-";
- this.state = S_DTD_COMMENT_ENDING;
- }
- }
- /** @private */
- sDTDCommentEnding() {
- const c = this.getCode();
- this.doctype += String.fromCodePoint(c);
- this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT;
- }
- /** @private */
- sDTDCommentEnded() {
- const c = this.getCode();
- this.doctype += String.fromCodePoint(c);
- if (c === GREATER) {
- this.state = S_DTD;
- }
- else {
- this.fail("malformed comment.");
- // <!-- blah -- bloo --> will be recorded as
- // a comment of " blah -- bloo "
- this.state = S_DTD_COMMENT;
- }
- }
- /** @private */
- sDTDPI() {
- if (this.captureToChar(QUESTION, "doctype")) {
- this.doctype += "?";
- this.state = S_DTD_PI_ENDING;
- }
- }
- /** @private */
- sDTDPIEnding() {
- const c = this.getCode();
- this.doctype += String.fromCodePoint(c);
- if (c === GREATER) {
- this.state = S_DTD;
- }
- }
- /** @private */
- sComment() {
- if (this.captureToChar(MINUS, "comment")) {
- this.state = S_COMMENT_ENDING;
- }
- }
- /** @private */
- sCommentEnding() {
- const c = this.getCode();
- if (c === MINUS) {
- this.state = S_COMMENT_ENDED;
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.oncomment(this.comment);
- this.comment = "";
- }
- else {
- this.comment += `-${String.fromCodePoint(c)}`;
- this.state = S_COMMENT;
- }
- }
- /** @private */
- sCommentEnded() {
- const c = this.getCode();
- if (c !== GREATER) {
- this.fail("malformed comment.");
- // <!-- blah -- bloo --> will be recorded as
- // a comment of " blah -- bloo "
- this.comment += `--${String.fromCodePoint(c)}`;
- this.state = S_COMMENT;
- }
- else {
- this.state = S_TEXT;
- }
- }
- /** @private */
- sCData() {
- if (this.captureToChar(CLOSE_BRACKET, "cdata")) {
- this.state = S_CDATA_ENDING;
- }
- }
- /** @private */
- sCDataEnding() {
- const c = this.getCode();
- if (c === CLOSE_BRACKET) {
- this.state = S_CDATA_ENDING_2;
- }
- else {
- this.cdata += `]${String.fromCodePoint(c)}`;
- this.state = S_CDATA;
- }
- }
- /** @private */
- sCDataEnding2() {
- const c = this.getCode();
- switch (c) {
- case GREATER:
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.oncdata(this.cdata);
- this.cdata = "";
- this.state = S_TEXT;
- break;
- case CLOSE_BRACKET:
- this.cdata += "]";
- break;
- default:
- this.cdata += `]]${String.fromCodePoint(c)}`;
- this.state = S_CDATA;
- }
- }
- /** @private */
- sPIFirstChar() {
- const c = this.getCode();
- if (this.nameStartCheck(c)) {
- this.piTarget += String.fromCodePoint(c);
- this.state = S_PI_REST;
- }
- else if (c === QUESTION || isS(c)) {
- this.fail("processing instruction without a target.");
- this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
- }
- else {
- this.fail("disallowed character in processing instruction name.");
- this.piTarget += String.fromCodePoint(c);
- this.state = S_PI_REST;
- }
- }
- /** @private */
- sPIRest() {
- const c = this.captureWhileNameCheck("piTarget");
- if ((c === QUESTION || isS(c))) {
- this.piIsXMLDecl = this.piTarget === "xml";
- if (this.piIsXMLDecl && !this.xmlDeclPossible) {
- this.fail("an XML declaration must be at the start of the document.");
- }
- this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY;
- }
- else if (c) {
- this.fail("disallowed character in processing instruction name.");
- this.piTarget += String.fromCodePoint(c);
- }
- }
- /** @private */
- sPIBody() {
- let c;
- if (this.piIsXMLDecl) {
- switch (this.xmlDeclState) {
- case S_XML_DECL_NAME_START: {
- c = this.getCode();
- if (isS(c)) {
- c = this.skipSpaces();
- }
- else if (this.requiredSeparator && c !== QUESTION) {
- this.fail("whitespace required.");
- }
- this.requiredSeparator = false;
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- return;
- }
- if (c) {
- this.xmlDeclState = S_XML_DECL_NAME;
- this.xmlDeclName = String.fromCodePoint(c);
- }
- break;
- }
- case S_XML_DECL_NAME:
- c = this.captureTo(XML_DECL_NAME_TERMINATOR, "xmlDeclName");
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- return;
- }
- if (isS(c) || c === EQUAL) {
- if (!this.xmlDeclExpects.includes(this.xmlDeclName)) {
- switch (this.xmlDeclName.length) {
- case 0:
- this.fail("did not expect any more name/value pairs.");
- break;
- case 1:
- this.fail(`expected the name ${this.xmlDeclExpects[0]}.`);
- break;
- default:
- this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`);
- }
- }
- this.xmlDeclState = (c === EQUAL) ? S_XML_DECL_VALUE_START :
- S_XML_DECL_EQ;
- }
- break;
- case S_XML_DECL_EQ:
- c = this.getCode();
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- return;
- }
- if (!isS(c)) {
- if (c !== EQUAL) {
- this.fail("value required.");
- }
- this.xmlDeclState = S_XML_DECL_VALUE_START;
- }
- break;
- case S_XML_DECL_VALUE_START:
- c = this.getCode();
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- return;
- }
- if (!isS(c)) {
- if (!isQuote(c)) {
- this.fail("value must be quoted.");
- this.q = SPACE;
- }
- else {
- this.q = c;
- }
- this.xmlDeclState = S_XML_DECL_VALUE;
- }
- break;
- case S_XML_DECL_VALUE:
- c = this.captureTo([this.q, QUESTION], "xmlDeclValue");
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- return;
- }
- if (c) {
- switch (this.xmlDeclName) {
- case "version":
- if (!/^1\.[0-9]+$/.test(this.xmlDeclValue)) {
- this.fail("version number must match /^1\\.[0-9]+$/.");
- }
- this.xmlDeclExpects = ["encoding", "standalone"];
- this.xmlDecl.version = this.xmlDeclValue;
- break;
- case "encoding":
- if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) {
- this.fail("encoding value must match \
- /^[A-Za-z0-9][A-Za-z0-9._-]*$/.");
- }
- this.xmlDeclExpects = ["standalone"];
- this.xmlDecl.encoding = this.xmlDeclValue;
- break;
- case "standalone":
- if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") {
- this.fail("standalone value must match \"yes\" or \"no\".");
- }
- this.xmlDeclExpects = [];
- this.xmlDecl.standalone = this.xmlDeclValue;
- break;
- default:
- // We don't need to raise an error here since we've already
- // raised one when checking what name was expected.
- }
- this.xmlDeclName = this.xmlDeclValue = "";
- this.xmlDeclState = S_XML_DECL_NAME_START;
- this.requiredSeparator = true;
- }
- break;
- default:
- throw new Error(this,
- `Unknown XML declaration state: ${this.xmlDeclState}`);
- }
- }
- else if (this.piBody.length === 0) {
- c = this.getCode();
- if (c === QUESTION) {
- this.state = S_PI_ENDING;
- }
- else if (!isS(c)) {
- this.piBody = String.fromCodePoint(c);
- }
- }
- // The question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- else if (this.captureToChar(QUESTION, "piBody")) {
- this.state = S_PI_ENDING;
- }
- }
- /** @private */
- sPIEnding() {
- const c = this.getCode();
- if (this.piIsXMLDecl) {
- if (c === GREATER) {
- if (this.piTarget !== "xml") {
- this.fail("processing instructions are not allowed before root.");
- }
- else if (this.xmlDeclState !== S_XML_DECL_NAME_START) {
- this.fail("XML declaration is incomplete.");
- }
- else if (this.xmlDeclExpects.includes("version")) {
- this.fail("XML declaration must contain a version.");
- }
- this.xmlDeclName = this.xmlDeclValue = "";
- this.requiredSeparator = false;
- this.piTarget = this.piBody = "";
- this.state = S_TEXT;
- }
- else {
- // We got here because the previous character was a ?, but the
- // question mark character is not valid inside any of the XML
- // declaration name/value pairs.
- this.fail(
- "The character ? is disallowed anywhere in XML declarations.");
- }
- }
- else if (c === GREATER) {
- if (this.piTarget.trim().toLowerCase() === "xml") {
- this.fail("the XML declaration must appear at the start of the document.");
- }
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.onprocessinginstruction({
- target: this.piTarget,
- body: this.piBody,
- });
- this.piTarget = this.piBody = "";
- this.state = S_TEXT;
- }
- else if (c === QUESTION) {
- // We ran into ?? as part of a processing instruction. We initially
- // took the first ? as a sign that the PI was ending, but it is
- // not. So we have to add it to the body but we take the new ? as a
- // sign that the PI is ending.
- this.piBody += "?";
- }
- else {
- this.piBody += `?${String.fromCodePoint(c)}`;
- this.state = S_PI_BODY;
- }
- this.xmlDeclPossible = false;
- }
- /** @private */
- sOpenTag() {
- const c = this.captureNameChars();
- if (!c) {
- return;
- }
- const tag = this.tag = {
- name: this.name,
- attributes: Object.create(null),
- };
- if (this.xmlnsOpt) {
- tag.ns = Object.create(null);
- }
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.onopentagstart(tag);
- this.sawRoot = true;
- if (!this.fragmentOpt && this.closedRoot) {
- this.fail("documents may contain only one root.");
- }
- switch (c) {
- case GREATER:
- this.openTag();
- break;
- case FORWARD_SLASH:
- this.state = S_OPEN_TAG_SLASH;
- break;
- default:
- if (!isS(c)) {
- this.fail("disallowed character in tag name.");
- }
- this.state = S_ATTRIB;
- }
- }
- /** @private */
- sOpenTagSlash() {
- const c = this.getCode();
- if (c === GREATER) {
- this.openSelfClosingTag();
- }
- else {
- this.fail("forward-slash in opening tag not followed by >.");
- this.state = S_ATTRIB;
- }
- }
- /** @private */
- sAttrib() {
- const c = this.skipSpaces();
- if (!c) {
- return;
- }
- if (isNameStartChar(c)) {
- this.name = String.fromCodePoint(c);
- this.state = S_ATTRIB_NAME;
- }
- else if (c === GREATER) {
- this.openTag();
- }
- else if (c === FORWARD_SLASH) {
- this.state = S_OPEN_TAG_SLASH;
- }
- else {
- this.fail("disallowed character in attribute name.");
- }
- }
- /** @private */
- pushAttribNS(name, value) {
- const { prefix, local } = this.qname(name);
- this.attribList.push({ name, prefix, local, value, uri: undefined });
- if (prefix === "xmlns") {
- const trimmed = value.trim();
- this.tag.ns[local] = trimmed;
- nsPairCheck(this, local, trimmed);
- }
- else if (name === "xmlns") {
- const trimmed = value.trim();
- this.tag.ns[""] = trimmed;
- nsPairCheck(this, "", trimmed);
- }
- }
- /** @private */
- pushAttribPlain(name, value) {
- this.attribList.push({ name, value });
- }
- /** @private */
- sAttribName() {
- const c = this.captureNameChars();
- if (c === EQUAL) {
- this.state = S_ATTRIB_VALUE;
- }
- else if (isS(c)) {
- this.state = S_ATTRIB_NAME_SAW_WHITE;
- }
- else if (c === GREATER) {
- this.fail("attribute without value.");
- this.pushAttrib(this.name, this.name);
- this.name = this.text = "";
- this.openTag();
- }
- else if (c) {
- this.fail("disallowed character in attribute name.");
- }
- }
- /** @private */
- sAttribNameSawWhite() {
- const c = this.skipSpaces();
- if (!c) {
- return;
- }
- if (c === EQUAL) {
- this.state = S_ATTRIB_VALUE;
- }
- else {
- this.fail("attribute without value.");
- this.tag.attributes[this.name] = "";
- this.text = "";
- this.name = "";
- if (c === GREATER) {
- this.openTag();
- }
- else if (isNameStartChar(c)) {
- this.name = String.fromCodePoint(c);
- this.state = S_ATTRIB_NAME;
- }
- else {
- this.fail("disallowed character in attribute name.");
- this.state = S_ATTRIB;
- }
- }
- }
- /** @private */
- sAttribValue() {
- const c = this.getCode();
- if (isQuote(c)) {
- this.q = c;
- this.state = S_ATTRIB_VALUE_QUOTED;
- }
- else if (!isS(c)) {
- this.fail("unquoted attribute value.");
- this.state = S_ATTRIB_VALUE_UNQUOTED;
- this.text = String.fromCodePoint(c);
- }
- }
- /** @private */
- sAttribValueQuoted() {
- // We deliberately do not use captureTo here. The specialized code we use
- // here is faster than using captureTo.
- const { q } = this;
- const { chunk, limit, i: start } = this;
- // eslint-disable-next-line no-constant-condition
- while (true) {
- if (this.i >= limit) {
- // This is faster than adding codepoints one by one.
- this.text += chunk.substring(start);
- return;
- }
- const code = this.getCode();
- if (code === q || code === AMP || code === LESS) {
- // This is faster than adding codepoints one by one.
- const slice = chunk.substring(start,
- this.i - (code <= 0xFFFF ? 1 : 2));
- switch (code) {
- case q:
- this.pushAttrib(this.name, this.text + slice);
- this.name = this.text = "";
- this.q = null;
- this.state = S_ATTRIB_VALUE_CLOSED;
- return;
- case AMP:
- this.text += slice;
- this.state = S_ENTITY;
- this.entityReturnState = S_ATTRIB_VALUE_QUOTED;
- return;
- default:
- this.text += slice;
- this.fail("disallowed character.");
- return;
- }
- }
- }
- }
- /** @private */
- sAttribValueClosed() {
- const c = this.getCode();
- if (isS(c)) {
- this.state = S_ATTRIB;
- }
- else if (c === GREATER) {
- this.openTag();
- }
- else if (c === FORWARD_SLASH) {
- this.state = S_OPEN_TAG_SLASH;
- }
- else if (isNameStartChar(c)) {
- this.fail("no whitespace between attributes.");
- this.name = String.fromCodePoint(c);
- this.state = S_ATTRIB_NAME;
- }
- else {
- this.fail("disallowed character in attribute name.");
- }
- }
- /** @private */
- sAttribValueUnquoted() {
- const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR, "text");
- if (c === AMP) {
- this.state = S_ENTITY;
- this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED;
- }
- else if (c === LESS) {
- this.fail("disallowed character.");
- }
- else if (c) {
- if (this.text.includes("]]>")) {
- this.fail("the string \"]]>\" is disallowed in char data.");
- }
- this.pushAttrib(this.name, this.text);
- this.name = this.text = "";
- if (c === GREATER) {
- this.openTag();
- }
- else {
- this.state = S_ATTRIB;
- }
- }
- }
- /** @private */
- sCloseTag() {
- const c = this.captureNameChars();
- if (c === GREATER) {
- this.closeTag();
- }
- else if (isS(c)) {
- this.state = S_CLOSE_TAG_SAW_WHITE;
- }
- else if (c) {
- this.fail("disallowed character in closing tag.");
- }
- }
- /** @private */
- sCloseTagSawWhite() {
- const c = this.skipSpaces();
- if (c === GREATER) {
- this.closeTag();
- }
- else if (c) {
- this.fail("disallowed character in closing tag.");
- }
- }
- /** @private */
- sEntity() {
- if (this.captureToChar(SEMICOLON, "entity")) {
- this.state = this.entityReturnState;
- if (this.entity === "") {
- this.fail("empty entity name.");
- this.text += "&;";
- return;
- }
- this.text += this.parseEntity(this.entity);
- this.entity = "";
- }
- }
- // END OF STATE HANDLERS
- /**
- * End parsing. This performs final well-formedness checks and resets the
- * parser to a clean state.
- *
- * @private
- *
- * @returns this
- */
- end() {
- if (!this.sawRoot) {
- this.fail("document must contain a root element.");
- }
- const { tags } = this;
- while (tags.length > 0) {
- const tag = tags.pop();
- this.fail(`unclosed tag: ${tag.name}`);
- }
- if ((this.state !== S_INITIAL) &&
- (this.state !== S_TEXT)) {
- this.fail("unexpected end.");
- }
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.closed = true;
- this.onend();
- this._init(this.opt);
- return this;
- }
- /**
- * If there's text to emit ``ontext``, emit it.
- *
- * @private
- */
- closeText() {
- this.ontext(this.text);
- this.text = "";
- }
- /**
- * Resolve a namespace prefix.
- *
- * @param {string} prefix The prefix to resolve.
- *
- * @returns {string|undefined} The namespace URI or ``undefined`` if the
- * prefix is not defined.
- */
- resolve(prefix) {
- let uri = this.tag.ns[prefix];
- if (uri !== undefined) {
- return uri;
- }
- const { tags } = this;
- for (let index = tags.length - 1; index >= 0; index--) {
- uri = tags[index].ns[prefix];
- if (uri !== undefined) {
- return uri;
- }
- }
- uri = this.ns[prefix];
- if (uri) {
- return uri;
- }
- const { resolvePrefix } = this.opt;
- return resolvePrefix ? resolvePrefix(prefix) : undefined;
- }
- /**
- * Parse a qname into its prefix and local name parts.
- *
- * @private
- *
- * @param {string} name The name to parse
- *
- * @returns {{prefix: string, local: string}}
- */
- qname(name) {
- const colon = name.indexOf(":");
- if (colon === -1) {
- return { prefix: "", local: name };
- }
- const local = name.substring(colon + 1);
- const prefix = name.substring(0, colon);
- if (prefix === "" || local === "" || local.includes(":")) {
- this.fail(`malformed name: ${name}.`);
- }
- return { prefix, local };
- }
- /** @private */
- processAttribsNS() {
- const { tag, attribList } = this;
- const { name: tagName, attributes } = tag;
- {
- // add namespace info to tag
- const { prefix, local } = this.qname(tagName);
- tag.prefix = prefix;
- tag.local = local;
- const uri = tag.uri = this.resolve(prefix) || "";
- if (prefix) {
- if (prefix === "xmlns") {
- this.fail("tags may not have \"xmlns\" as prefix.");
- }
- if (!uri) {
- this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
- tag.uri = prefix;
- }
- }
- }
- if (attribList.length === 0) {
- return;
- }
- const seen = new Set();
- // Note: do not apply default ns to attributes:
- // http://www.w3.org/TR/REC-xml-names/#defaulting
- for (const attr of attribList) {
- const { name, prefix, local } = attr;
- let uri;
- let eqname;
- if (prefix === "") {
- uri = (name === "xmlns") ? XMLNS_NAMESPACE : "";
- eqname = name;
- }
- else {
- uri = this.resolve(prefix);
- // if there's any attributes with an undefined namespace,
- // then fail on them now.
- if (!uri) {
- this.fail(`unbound namespace prefix: ${JSON.stringify(prefix)}.`);
- uri = prefix;
- }
- eqname = `{${uri}}${local}`;
- }
- if (seen.has(eqname)) {
- this.fail(`duplicate attribute: ${eqname}.`);
- }
- seen.add(eqname);
- attr.uri = uri;
- attributes[name] = attr;
- }
- this.attribList = [];
- }
- /** @private */
- processAttribsPlain() {
- const { attribList, tag: { attributes } } = this;
- for (const { name, value } of attribList) {
- if (attributes[name]) {
- this.fail(`duplicate attribute: ${name}.`);
- }
- attributes[name] = value;
- }
- this.attribList = [];
- }
- /**
- * Handle a complete open tag. This parser code calls this once it has seen
- * the whole tag. This method checks for well-formeness and then emits
- * ``onopentag``.
- *
- * @private
- */
- openTag() {
- this.processAttribs();
- const { tag, tags } = this;
- tag.isSelfClosing = false;
- // There cannot be any pending text here due to the onopentagstart that was
- // necessarily emitted before we get here. So we do not check text.
- this.onopentag(tag);
- tags.push(tag);
- this.state = S_TEXT;
- this.name = "";
- }
- /**
- * Handle a complete self-closing tag. This parser code calls this once it has
- * seen the whole tag. This method checks for well-formeness and then emits
- * ``onopentag`` and ``onclosetag``.
- *
- * @private
- */
- openSelfClosingTag() {
- this.processAttribs();
- const { tag, tags } = this;
- tag.isSelfClosing = true;
- // There cannot be any pending text here due to the onopentagstart that was
- // necessarily emitted before we get here. So we do not check text.
- this.onopentag(tag);
- this.onclosetag(tag);
- const top = this.tag = tags[tags.length - 1];
- if (!top) {
- this.closedRoot = true;
- }
- this.state = S_TEXT;
- this.name = "";
- }
- /**
- * Handle a complete close tag. This parser code calls this once it has seen
- * the whole tag. This method checks for well-formeness and then emits
- * ``onclosetag``.
- *
- * @private
- */
- closeTag() {
- const { tags, name } = this;
- // Our state after this will be S_TEXT, no matter what, and we can clear
- // tagName now.
- this.state = S_TEXT;
- this.name = "";
- if (!name) {
- this.fail("weird empty close tag.");
- this.text += "</>";
- return;
- }
- let l = tags.length;
- while (l-- > 0) {
- const tag = this.tag = tags.pop();
- if (this.text.length !== 0) {
- this.closeText();
- }
- this.onclosetag(tag);
- if (tag.name === name) {
- break;
- }
- this.fail("unexpected close tag.");
- }
- if (l === 0) {
- this.closedRoot = true;
- }
- else if (l < 0) {
- this.fail(`unmatched closing tag: ${name}.`);
- this.text += `</${name}>`;
- }
- }
- /**
- * Resolves an entity. Makes any necessary well-formedness checks.
- *
- * @private
- *
- * @param {string} entity The entity to resolve.
- *
- * @returns {string} The parsed entity.
- */
- parseEntity(entity) {
- if (entity[0] !== "#") {
- const defined = this.ENTITIES[entity];
- if (defined) {
- return defined;
- }
- this.fail(this.isName(entity) ? "undefined entity." :
- "disallowed character in entity name.");
- return `&${entity};`;
- }
- let num = NaN;
- if (entity[1] === "x" && /^#x[0-9a-f]+$/i.test(entity)) {
- num = parseInt(entity.slice(2), 16);
- }
- else if (/^#[0-9]+$/.test(entity)) {
- num = parseInt(entity.slice(1), 10);
- }
- // The character reference is required to match the CHAR production.
- if (!isChar(num)) {
- this.fail("malformed character entity.");
- return `&${entity};`;
- }
- return String.fromCodePoint(num);
- }
- }
- exports.SaxesParser = SaxesParser;
|