url-state-machine.js 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299
  1. "use strict";
  2. const punycode = require("punycode");
  3. const tr46 = require("tr46");
  4. const infra = require("./infra");
  5. const { percentEncode, percentDecode } = require("./urlencoded");
  6. const specialSchemes = {
  7. ftp: 21,
  8. file: null,
  9. gopher: 70,
  10. http: 80,
  11. https: 443,
  12. ws: 80,
  13. wss: 443
  14. };
  15. const failure = Symbol("failure");
  16. function countSymbols(str) {
  17. return punycode.ucs2.decode(str).length;
  18. }
  19. function at(input, idx) {
  20. const c = input[idx];
  21. return isNaN(c) ? undefined : String.fromCodePoint(c);
  22. }
  23. function isSingleDot(buffer) {
  24. return buffer === "." || buffer.toLowerCase() === "%2e";
  25. }
  26. function isDoubleDot(buffer) {
  27. buffer = buffer.toLowerCase();
  28. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  29. }
  30. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  31. return infra.isASCIIAlpha(cp1) && (cp2 === 58 || cp2 === 124);
  32. }
  33. function isWindowsDriveLetterString(string) {
  34. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  35. }
  36. function isNormalizedWindowsDriveLetterString(string) {
  37. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  38. }
  39. function containsForbiddenHostCodePoint(string) {
  40. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|%|\/|:|\?|@|\[|\\|\]/) !== -1;
  41. }
  42. function containsForbiddenHostCodePointExcludingPercent(string) {
  43. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|\?|@|\[|\\|\]/) !== -1;
  44. }
  45. function isSpecialScheme(scheme) {
  46. return specialSchemes[scheme] !== undefined;
  47. }
  48. function isSpecial(url) {
  49. return isSpecialScheme(url.scheme);
  50. }
  51. function isNotSpecial(url) {
  52. return !isSpecialScheme(url.scheme);
  53. }
  54. function defaultPort(scheme) {
  55. return specialSchemes[scheme];
  56. }
  57. function utf8PercentEncode(c) {
  58. const buf = Buffer.from(c);
  59. let str = "";
  60. for (let i = 0; i < buf.length; ++i) {
  61. str += percentEncode(buf[i]);
  62. }
  63. return str;
  64. }
  65. function isC0ControlPercentEncode(c) {
  66. return c <= 0x1F || c > 0x7E;
  67. }
  68. const extraUserinfoPercentEncodeSet =
  69. new Set([47, 58, 59, 61, 64, 91, 92, 93, 94, 124]);
  70. function isUserinfoPercentEncode(c) {
  71. return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
  72. }
  73. const extraFragmentPercentEncodeSet = new Set([32, 34, 60, 62, 96]);
  74. function isFragmentPercentEncode(c) {
  75. return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
  76. }
  77. const extraPathPercentEncodeSet = new Set([35, 63, 123, 125]);
  78. function isPathPercentEncode(c) {
  79. return isFragmentPercentEncode(c) || extraPathPercentEncodeSet.has(c);
  80. }
  81. function percentEncodeChar(c, encodeSetPredicate) {
  82. const cStr = String.fromCodePoint(c);
  83. if (encodeSetPredicate(c)) {
  84. return utf8PercentEncode(cStr);
  85. }
  86. return cStr;
  87. }
  88. function parseIPv4Number(input) {
  89. let R = 10;
  90. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  91. input = input.substring(2);
  92. R = 16;
  93. } else if (input.length >= 2 && input.charAt(0) === "0") {
  94. input = input.substring(1);
  95. R = 8;
  96. }
  97. if (input === "") {
  98. return 0;
  99. }
  100. let regex = /[^0-7]/;
  101. if (R === 10) {
  102. regex = /[^0-9]/;
  103. }
  104. if (R === 16) {
  105. regex = /[^0-9A-Fa-f]/;
  106. }
  107. if (regex.test(input)) {
  108. return failure;
  109. }
  110. return parseInt(input, R);
  111. }
  112. function parseIPv4(input) {
  113. const parts = input.split(".");
  114. if (parts[parts.length - 1] === "") {
  115. if (parts.length > 1) {
  116. parts.pop();
  117. }
  118. }
  119. if (parts.length > 4) {
  120. return input;
  121. }
  122. const numbers = [];
  123. for (const part of parts) {
  124. if (part === "") {
  125. return input;
  126. }
  127. const n = parseIPv4Number(part);
  128. if (n === failure) {
  129. return input;
  130. }
  131. numbers.push(n);
  132. }
  133. for (let i = 0; i < numbers.length - 1; ++i) {
  134. if (numbers[i] > 255) {
  135. return failure;
  136. }
  137. }
  138. if (numbers[numbers.length - 1] >= Math.pow(256, 5 - numbers.length)) {
  139. return failure;
  140. }
  141. let ipv4 = numbers.pop();
  142. let counter = 0;
  143. for (const n of numbers) {
  144. ipv4 += n * Math.pow(256, 3 - counter);
  145. ++counter;
  146. }
  147. return ipv4;
  148. }
  149. function serializeIPv4(address) {
  150. let output = "";
  151. let n = address;
  152. for (let i = 1; i <= 4; ++i) {
  153. output = String(n % 256) + output;
  154. if (i !== 4) {
  155. output = "." + output;
  156. }
  157. n = Math.floor(n / 256);
  158. }
  159. return output;
  160. }
  161. function parseIPv6(input) {
  162. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  163. let pieceIndex = 0;
  164. let compress = null;
  165. let pointer = 0;
  166. input = punycode.ucs2.decode(input);
  167. if (input[pointer] === 58) {
  168. if (input[pointer + 1] !== 58) {
  169. return failure;
  170. }
  171. pointer += 2;
  172. ++pieceIndex;
  173. compress = pieceIndex;
  174. }
  175. while (pointer < input.length) {
  176. if (pieceIndex === 8) {
  177. return failure;
  178. }
  179. if (input[pointer] === 58) {
  180. if (compress !== null) {
  181. return failure;
  182. }
  183. ++pointer;
  184. ++pieceIndex;
  185. compress = pieceIndex;
  186. continue;
  187. }
  188. let value = 0;
  189. let length = 0;
  190. while (length < 4 && infra.isASCIIHex(input[pointer])) {
  191. value = value * 0x10 + parseInt(at(input, pointer), 16);
  192. ++pointer;
  193. ++length;
  194. }
  195. if (input[pointer] === 46) {
  196. if (length === 0) {
  197. return failure;
  198. }
  199. pointer -= length;
  200. if (pieceIndex > 6) {
  201. return failure;
  202. }
  203. let numbersSeen = 0;
  204. while (input[pointer] !== undefined) {
  205. let ipv4Piece = null;
  206. if (numbersSeen > 0) {
  207. if (input[pointer] === 46 && numbersSeen < 4) {
  208. ++pointer;
  209. } else {
  210. return failure;
  211. }
  212. }
  213. if (!infra.isASCIIDigit(input[pointer])) {
  214. return failure;
  215. }
  216. while (infra.isASCIIDigit(input[pointer])) {
  217. const number = parseInt(at(input, pointer));
  218. if (ipv4Piece === null) {
  219. ipv4Piece = number;
  220. } else if (ipv4Piece === 0) {
  221. return failure;
  222. } else {
  223. ipv4Piece = ipv4Piece * 10 + number;
  224. }
  225. if (ipv4Piece > 255) {
  226. return failure;
  227. }
  228. ++pointer;
  229. }
  230. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  231. ++numbersSeen;
  232. if (numbersSeen === 2 || numbersSeen === 4) {
  233. ++pieceIndex;
  234. }
  235. }
  236. if (numbersSeen !== 4) {
  237. return failure;
  238. }
  239. break;
  240. } else if (input[pointer] === 58) {
  241. ++pointer;
  242. if (input[pointer] === undefined) {
  243. return failure;
  244. }
  245. } else if (input[pointer] !== undefined) {
  246. return failure;
  247. }
  248. address[pieceIndex] = value;
  249. ++pieceIndex;
  250. }
  251. if (compress !== null) {
  252. let swaps = pieceIndex - compress;
  253. pieceIndex = 7;
  254. while (pieceIndex !== 0 && swaps > 0) {
  255. const temp = address[compress + swaps - 1];
  256. address[compress + swaps - 1] = address[pieceIndex];
  257. address[pieceIndex] = temp;
  258. --pieceIndex;
  259. --swaps;
  260. }
  261. } else if (compress === null && pieceIndex !== 8) {
  262. return failure;
  263. }
  264. return address;
  265. }
  266. function serializeIPv6(address) {
  267. let output = "";
  268. const seqResult = findLongestZeroSequence(address);
  269. const compress = seqResult.idx;
  270. let ignore0 = false;
  271. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  272. if (ignore0 && address[pieceIndex] === 0) {
  273. continue;
  274. } else if (ignore0) {
  275. ignore0 = false;
  276. }
  277. if (compress === pieceIndex) {
  278. const separator = pieceIndex === 0 ? "::" : ":";
  279. output += separator;
  280. ignore0 = true;
  281. continue;
  282. }
  283. output += address[pieceIndex].toString(16);
  284. if (pieceIndex !== 7) {
  285. output += ":";
  286. }
  287. }
  288. return output;
  289. }
  290. function parseHost(input, isNotSpecialArg = false) {
  291. if (input[0] === "[") {
  292. if (input[input.length - 1] !== "]") {
  293. return failure;
  294. }
  295. return parseIPv6(input.substring(1, input.length - 1));
  296. }
  297. if (isNotSpecialArg) {
  298. return parseOpaqueHost(input);
  299. }
  300. const domain = percentDecode(Buffer.from(input)).toString();
  301. const asciiDomain = domainToASCII(domain);
  302. if (asciiDomain === failure) {
  303. return failure;
  304. }
  305. if (containsForbiddenHostCodePoint(asciiDomain)) {
  306. return failure;
  307. }
  308. const ipv4Host = parseIPv4(asciiDomain);
  309. if (typeof ipv4Host === "number" || ipv4Host === failure) {
  310. return ipv4Host;
  311. }
  312. return asciiDomain;
  313. }
  314. function parseOpaqueHost(input) {
  315. if (containsForbiddenHostCodePointExcludingPercent(input)) {
  316. return failure;
  317. }
  318. let output = "";
  319. const decoded = punycode.ucs2.decode(input);
  320. for (let i = 0; i < decoded.length; ++i) {
  321. output += percentEncodeChar(decoded[i], isC0ControlPercentEncode);
  322. }
  323. return output;
  324. }
  325. function findLongestZeroSequence(arr) {
  326. let maxIdx = null;
  327. let maxLen = 1; // only find elements > 1
  328. let currStart = null;
  329. let currLen = 0;
  330. for (let i = 0; i < arr.length; ++i) {
  331. if (arr[i] !== 0) {
  332. if (currLen > maxLen) {
  333. maxIdx = currStart;
  334. maxLen = currLen;
  335. }
  336. currStart = null;
  337. currLen = 0;
  338. } else {
  339. if (currStart === null) {
  340. currStart = i;
  341. }
  342. ++currLen;
  343. }
  344. }
  345. // if trailing zeros
  346. if (currLen > maxLen) {
  347. maxIdx = currStart;
  348. maxLen = currLen;
  349. }
  350. return {
  351. idx: maxIdx,
  352. len: maxLen
  353. };
  354. }
  355. function serializeHost(host) {
  356. if (typeof host === "number") {
  357. return serializeIPv4(host);
  358. }
  359. // IPv6 serializer
  360. if (host instanceof Array) {
  361. return "[" + serializeIPv6(host) + "]";
  362. }
  363. return host;
  364. }
  365. function domainToASCII(domain, beStrict = false) {
  366. const result = tr46.toASCII(domain, {
  367. checkBidi: true,
  368. checkHyphens: false,
  369. checkJoiners: true,
  370. useSTD3ASCIIRules: beStrict,
  371. verifyDNSLength: beStrict
  372. });
  373. if (result === null) {
  374. return failure;
  375. }
  376. return result;
  377. }
  378. function trimControlChars(url) {
  379. return url.replace(/^[\u0000-\u001F\u0020]+|[\u0000-\u001F\u0020]+$/g, "");
  380. }
  381. function trimTabAndNewline(url) {
  382. return url.replace(/\u0009|\u000A|\u000D/g, "");
  383. }
  384. function shortenPath(url) {
  385. const { path } = url;
  386. if (path.length === 0) {
  387. return;
  388. }
  389. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  390. return;
  391. }
  392. path.pop();
  393. }
  394. function includesCredentials(url) {
  395. return url.username !== "" || url.password !== "";
  396. }
  397. function cannotHaveAUsernamePasswordPort(url) {
  398. return url.host === null || url.host === "" || url.cannotBeABaseURL || url.scheme === "file";
  399. }
  400. function isNormalizedWindowsDriveLetter(string) {
  401. return /^[A-Za-z]:$/.test(string);
  402. }
  403. function URLStateMachine(input, base, encodingOverride, url, stateOverride) {
  404. this.pointer = 0;
  405. this.input = input;
  406. this.base = base || null;
  407. this.encodingOverride = encodingOverride || "utf-8";
  408. this.stateOverride = stateOverride;
  409. this.url = url;
  410. this.failure = false;
  411. this.parseError = false;
  412. if (!this.url) {
  413. this.url = {
  414. scheme: "",
  415. username: "",
  416. password: "",
  417. host: null,
  418. port: null,
  419. path: [],
  420. query: null,
  421. fragment: null,
  422. cannotBeABaseURL: false
  423. };
  424. const res = trimControlChars(this.input);
  425. if (res !== this.input) {
  426. this.parseError = true;
  427. }
  428. this.input = res;
  429. }
  430. const res = trimTabAndNewline(this.input);
  431. if (res !== this.input) {
  432. this.parseError = true;
  433. }
  434. this.input = res;
  435. this.state = stateOverride || "scheme start";
  436. this.buffer = "";
  437. this.atFlag = false;
  438. this.arrFlag = false;
  439. this.passwordTokenSeenFlag = false;
  440. this.input = punycode.ucs2.decode(this.input);
  441. for (; this.pointer <= this.input.length; ++this.pointer) {
  442. const c = this.input[this.pointer];
  443. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  444. // exec state machine
  445. const ret = this["parse " + this.state](c, cStr);
  446. if (!ret) {
  447. break; // terminate algorithm
  448. } else if (ret === failure) {
  449. this.failure = true;
  450. break;
  451. }
  452. }
  453. }
  454. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  455. if (infra.isASCIIAlpha(c)) {
  456. this.buffer += cStr.toLowerCase();
  457. this.state = "scheme";
  458. } else if (!this.stateOverride) {
  459. this.state = "no scheme";
  460. --this.pointer;
  461. } else {
  462. this.parseError = true;
  463. return failure;
  464. }
  465. return true;
  466. };
  467. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  468. if (infra.isASCIIAlphanumeric(c) || c === 43 || c === 45 || c === 46) {
  469. this.buffer += cStr.toLowerCase();
  470. } else if (c === 58) {
  471. if (this.stateOverride) {
  472. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  473. return false;
  474. }
  475. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  476. return false;
  477. }
  478. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  479. return false;
  480. }
  481. if (this.url.scheme === "file" && (this.url.host === "" || this.url.host === null)) {
  482. return false;
  483. }
  484. }
  485. this.url.scheme = this.buffer;
  486. if (this.stateOverride) {
  487. if (this.url.port === defaultPort(this.url.scheme)) {
  488. this.url.port = null;
  489. }
  490. return false;
  491. }
  492. this.buffer = "";
  493. if (this.url.scheme === "file") {
  494. if (this.input[this.pointer + 1] !== 47 || this.input[this.pointer + 2] !== 47) {
  495. this.parseError = true;
  496. }
  497. this.state = "file";
  498. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  499. this.state = "special relative or authority";
  500. } else if (isSpecial(this.url)) {
  501. this.state = "special authority slashes";
  502. } else if (this.input[this.pointer + 1] === 47) {
  503. this.state = "path or authority";
  504. ++this.pointer;
  505. } else {
  506. this.url.cannotBeABaseURL = true;
  507. this.url.path.push("");
  508. this.state = "cannot-be-a-base-URL path";
  509. }
  510. } else if (!this.stateOverride) {
  511. this.buffer = "";
  512. this.state = "no scheme";
  513. this.pointer = -1;
  514. } else {
  515. this.parseError = true;
  516. return failure;
  517. }
  518. return true;
  519. };
  520. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  521. if (this.base === null || (this.base.cannotBeABaseURL && c !== 35)) {
  522. return failure;
  523. } else if (this.base.cannotBeABaseURL && c === 35) {
  524. this.url.scheme = this.base.scheme;
  525. this.url.path = this.base.path.slice();
  526. this.url.query = this.base.query;
  527. this.url.fragment = "";
  528. this.url.cannotBeABaseURL = true;
  529. this.state = "fragment";
  530. } else if (this.base.scheme === "file") {
  531. this.state = "file";
  532. --this.pointer;
  533. } else {
  534. this.state = "relative";
  535. --this.pointer;
  536. }
  537. return true;
  538. };
  539. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  540. if (c === 47 && this.input[this.pointer + 1] === 47) {
  541. this.state = "special authority ignore slashes";
  542. ++this.pointer;
  543. } else {
  544. this.parseError = true;
  545. this.state = "relative";
  546. --this.pointer;
  547. }
  548. return true;
  549. };
  550. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  551. if (c === 47) {
  552. this.state = "authority";
  553. } else {
  554. this.state = "path";
  555. --this.pointer;
  556. }
  557. return true;
  558. };
  559. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  560. this.url.scheme = this.base.scheme;
  561. if (isNaN(c)) {
  562. this.url.username = this.base.username;
  563. this.url.password = this.base.password;
  564. this.url.host = this.base.host;
  565. this.url.port = this.base.port;
  566. this.url.path = this.base.path.slice();
  567. this.url.query = this.base.query;
  568. } else if (c === 47) {
  569. this.state = "relative slash";
  570. } else if (c === 63) {
  571. this.url.username = this.base.username;
  572. this.url.password = this.base.password;
  573. this.url.host = this.base.host;
  574. this.url.port = this.base.port;
  575. this.url.path = this.base.path.slice();
  576. this.url.query = "";
  577. this.state = "query";
  578. } else if (c === 35) {
  579. this.url.username = this.base.username;
  580. this.url.password = this.base.password;
  581. this.url.host = this.base.host;
  582. this.url.port = this.base.port;
  583. this.url.path = this.base.path.slice();
  584. this.url.query = this.base.query;
  585. this.url.fragment = "";
  586. this.state = "fragment";
  587. } else if (isSpecial(this.url) && c === 92) {
  588. this.parseError = true;
  589. this.state = "relative slash";
  590. } else {
  591. this.url.username = this.base.username;
  592. this.url.password = this.base.password;
  593. this.url.host = this.base.host;
  594. this.url.port = this.base.port;
  595. this.url.path = this.base.path.slice(0, this.base.path.length - 1);
  596. this.state = "path";
  597. --this.pointer;
  598. }
  599. return true;
  600. };
  601. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  602. if (isSpecial(this.url) && (c === 47 || c === 92)) {
  603. if (c === 92) {
  604. this.parseError = true;
  605. }
  606. this.state = "special authority ignore slashes";
  607. } else if (c === 47) {
  608. this.state = "authority";
  609. } else {
  610. this.url.username = this.base.username;
  611. this.url.password = this.base.password;
  612. this.url.host = this.base.host;
  613. this.url.port = this.base.port;
  614. this.state = "path";
  615. --this.pointer;
  616. }
  617. return true;
  618. };
  619. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  620. if (c === 47 && this.input[this.pointer + 1] === 47) {
  621. this.state = "special authority ignore slashes";
  622. ++this.pointer;
  623. } else {
  624. this.parseError = true;
  625. this.state = "special authority ignore slashes";
  626. --this.pointer;
  627. }
  628. return true;
  629. };
  630. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  631. if (c !== 47 && c !== 92) {
  632. this.state = "authority";
  633. --this.pointer;
  634. } else {
  635. this.parseError = true;
  636. }
  637. return true;
  638. };
  639. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  640. if (c === 64) {
  641. this.parseError = true;
  642. if (this.atFlag) {
  643. this.buffer = "%40" + this.buffer;
  644. }
  645. this.atFlag = true;
  646. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  647. const len = countSymbols(this.buffer);
  648. for (let pointer = 0; pointer < len; ++pointer) {
  649. const codePoint = this.buffer.codePointAt(pointer);
  650. if (codePoint === 58 && !this.passwordTokenSeenFlag) {
  651. this.passwordTokenSeenFlag = true;
  652. continue;
  653. }
  654. const encodedCodePoints = percentEncodeChar(codePoint, isUserinfoPercentEncode);
  655. if (this.passwordTokenSeenFlag) {
  656. this.url.password += encodedCodePoints;
  657. } else {
  658. this.url.username += encodedCodePoints;
  659. }
  660. }
  661. this.buffer = "";
  662. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  663. (isSpecial(this.url) && c === 92)) {
  664. if (this.atFlag && this.buffer === "") {
  665. this.parseError = true;
  666. return failure;
  667. }
  668. this.pointer -= countSymbols(this.buffer) + 1;
  669. this.buffer = "";
  670. this.state = "host";
  671. } else {
  672. this.buffer += cStr;
  673. }
  674. return true;
  675. };
  676. URLStateMachine.prototype["parse hostname"] =
  677. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  678. if (this.stateOverride && this.url.scheme === "file") {
  679. --this.pointer;
  680. this.state = "file host";
  681. } else if (c === 58 && !this.arrFlag) {
  682. if (this.buffer === "") {
  683. this.parseError = true;
  684. return failure;
  685. }
  686. const host = parseHost(this.buffer, isNotSpecial(this.url));
  687. if (host === failure) {
  688. return failure;
  689. }
  690. this.url.host = host;
  691. this.buffer = "";
  692. this.state = "port";
  693. if (this.stateOverride === "hostname") {
  694. return false;
  695. }
  696. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  697. (isSpecial(this.url) && c === 92)) {
  698. --this.pointer;
  699. if (isSpecial(this.url) && this.buffer === "") {
  700. this.parseError = true;
  701. return failure;
  702. } else if (this.stateOverride && this.buffer === "" &&
  703. (includesCredentials(this.url) || this.url.port !== null)) {
  704. this.parseError = true;
  705. return false;
  706. }
  707. const host = parseHost(this.buffer, isNotSpecial(this.url));
  708. if (host === failure) {
  709. return failure;
  710. }
  711. this.url.host = host;
  712. this.buffer = "";
  713. this.state = "path start";
  714. if (this.stateOverride) {
  715. return false;
  716. }
  717. } else {
  718. if (c === 91) {
  719. this.arrFlag = true;
  720. } else if (c === 93) {
  721. this.arrFlag = false;
  722. }
  723. this.buffer += cStr;
  724. }
  725. return true;
  726. };
  727. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  728. if (infra.isASCIIDigit(c)) {
  729. this.buffer += cStr;
  730. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  731. (isSpecial(this.url) && c === 92) ||
  732. this.stateOverride) {
  733. if (this.buffer !== "") {
  734. const port = parseInt(this.buffer);
  735. if (port > Math.pow(2, 16) - 1) {
  736. this.parseError = true;
  737. return failure;
  738. }
  739. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  740. this.buffer = "";
  741. }
  742. if (this.stateOverride) {
  743. return false;
  744. }
  745. this.state = "path start";
  746. --this.pointer;
  747. } else {
  748. this.parseError = true;
  749. return failure;
  750. }
  751. return true;
  752. };
  753. const fileOtherwiseCodePoints = new Set([47, 92, 63, 35]);
  754. function startsWithWindowsDriveLetter(input, pointer) {
  755. const length = input.length - pointer;
  756. return length >= 2 &&
  757. isWindowsDriveLetterCodePoints(input[pointer], input[pointer + 1]) &&
  758. (length === 2 || fileOtherwiseCodePoints.has(input[pointer + 2]));
  759. }
  760. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  761. this.url.scheme = "file";
  762. if (c === 47 || c === 92) {
  763. if (c === 92) {
  764. this.parseError = true;
  765. }
  766. this.state = "file slash";
  767. } else if (this.base !== null && this.base.scheme === "file") {
  768. if (isNaN(c)) {
  769. this.url.host = this.base.host;
  770. this.url.path = this.base.path.slice();
  771. this.url.query = this.base.query;
  772. } else if (c === 63) {
  773. this.url.host = this.base.host;
  774. this.url.path = this.base.path.slice();
  775. this.url.query = "";
  776. this.state = "query";
  777. } else if (c === 35) {
  778. this.url.host = this.base.host;
  779. this.url.path = this.base.path.slice();
  780. this.url.query = this.base.query;
  781. this.url.fragment = "";
  782. this.state = "fragment";
  783. } else {
  784. if (!startsWithWindowsDriveLetter(this.input, this.pointer)) {
  785. this.url.host = this.base.host;
  786. this.url.path = this.base.path.slice();
  787. shortenPath(this.url);
  788. } else {
  789. this.parseError = true;
  790. }
  791. this.state = "path";
  792. --this.pointer;
  793. }
  794. } else {
  795. this.state = "path";
  796. --this.pointer;
  797. }
  798. return true;
  799. };
  800. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  801. if (c === 47 || c === 92) {
  802. if (c === 92) {
  803. this.parseError = true;
  804. }
  805. this.state = "file host";
  806. } else {
  807. if (this.base !== null && this.base.scheme === "file" &&
  808. !startsWithWindowsDriveLetter(this.input, this.pointer)) {
  809. if (isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  810. this.url.path.push(this.base.path[0]);
  811. } else {
  812. this.url.host = this.base.host;
  813. }
  814. }
  815. this.state = "path";
  816. --this.pointer;
  817. }
  818. return true;
  819. };
  820. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  821. if (isNaN(c) || c === 47 || c === 92 || c === 63 || c === 35) {
  822. --this.pointer;
  823. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  824. this.parseError = true;
  825. this.state = "path";
  826. } else if (this.buffer === "") {
  827. this.url.host = "";
  828. if (this.stateOverride) {
  829. return false;
  830. }
  831. this.state = "path start";
  832. } else {
  833. let host = parseHost(this.buffer, isNotSpecial(this.url));
  834. if (host === failure) {
  835. return failure;
  836. }
  837. if (host === "localhost") {
  838. host = "";
  839. }
  840. this.url.host = host;
  841. if (this.stateOverride) {
  842. return false;
  843. }
  844. this.buffer = "";
  845. this.state = "path start";
  846. }
  847. } else {
  848. this.buffer += cStr;
  849. }
  850. return true;
  851. };
  852. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  853. if (isSpecial(this.url)) {
  854. if (c === 92) {
  855. this.parseError = true;
  856. }
  857. this.state = "path";
  858. if (c !== 47 && c !== 92) {
  859. --this.pointer;
  860. }
  861. } else if (!this.stateOverride && c === 63) {
  862. this.url.query = "";
  863. this.state = "query";
  864. } else if (!this.stateOverride && c === 35) {
  865. this.url.fragment = "";
  866. this.state = "fragment";
  867. } else if (c !== undefined) {
  868. this.state = "path";
  869. if (c !== 47) {
  870. --this.pointer;
  871. }
  872. }
  873. return true;
  874. };
  875. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  876. if (isNaN(c) || c === 47 || (isSpecial(this.url) && c === 92) ||
  877. (!this.stateOverride && (c === 63 || c === 35))) {
  878. if (isSpecial(this.url) && c === 92) {
  879. this.parseError = true;
  880. }
  881. if (isDoubleDot(this.buffer)) {
  882. shortenPath(this.url);
  883. if (c !== 47 && !(isSpecial(this.url) && c === 92)) {
  884. this.url.path.push("");
  885. }
  886. } else if (isSingleDot(this.buffer) && c !== 47 &&
  887. !(isSpecial(this.url) && c === 92)) {
  888. this.url.path.push("");
  889. } else if (!isSingleDot(this.buffer)) {
  890. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  891. if (this.url.host !== "" && this.url.host !== null) {
  892. this.parseError = true;
  893. this.url.host = "";
  894. }
  895. this.buffer = this.buffer[0] + ":";
  896. }
  897. this.url.path.push(this.buffer);
  898. }
  899. this.buffer = "";
  900. if (this.url.scheme === "file" && (c === undefined || c === 63 || c === 35)) {
  901. while (this.url.path.length > 1 && this.url.path[0] === "") {
  902. this.parseError = true;
  903. this.url.path.shift();
  904. }
  905. }
  906. if (c === 63) {
  907. this.url.query = "";
  908. this.state = "query";
  909. }
  910. if (c === 35) {
  911. this.url.fragment = "";
  912. this.state = "fragment";
  913. }
  914. } else {
  915. // TODO: If c is not a URL code point and not "%", parse error.
  916. if (c === 37 &&
  917. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  918. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  919. this.parseError = true;
  920. }
  921. this.buffer += percentEncodeChar(c, isPathPercentEncode);
  922. }
  923. return true;
  924. };
  925. URLStateMachine.prototype["parse cannot-be-a-base-URL path"] = function parseCannotBeABaseURLPath(c) {
  926. if (c === 63) {
  927. this.url.query = "";
  928. this.state = "query";
  929. } else if (c === 35) {
  930. this.url.fragment = "";
  931. this.state = "fragment";
  932. } else {
  933. // TODO: Add: not a URL code point
  934. if (!isNaN(c) && c !== 37) {
  935. this.parseError = true;
  936. }
  937. if (c === 37 &&
  938. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  939. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  940. this.parseError = true;
  941. }
  942. if (!isNaN(c)) {
  943. this.url.path[0] = this.url.path[0] + percentEncodeChar(c, isC0ControlPercentEncode);
  944. }
  945. }
  946. return true;
  947. };
  948. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  949. if (isNaN(c) || (!this.stateOverride && c === 35)) {
  950. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  951. this.encodingOverride = "utf-8";
  952. }
  953. const buffer = Buffer.from(this.buffer); // TODO: Use encoding override instead
  954. for (let i = 0; i < buffer.length; ++i) {
  955. if (buffer[i] < 0x21 ||
  956. buffer[i] > 0x7E ||
  957. buffer[i] === 0x22 || buffer[i] === 0x23 || buffer[i] === 0x3C || buffer[i] === 0x3E ||
  958. (buffer[i] === 0x27 && isSpecial(this.url))) {
  959. this.url.query += percentEncode(buffer[i]);
  960. } else {
  961. this.url.query += String.fromCodePoint(buffer[i]);
  962. }
  963. }
  964. this.buffer = "";
  965. if (c === 35) {
  966. this.url.fragment = "";
  967. this.state = "fragment";
  968. }
  969. } else {
  970. // TODO: If c is not a URL code point and not "%", parse error.
  971. if (c === 37 &&
  972. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  973. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  974. this.parseError = true;
  975. }
  976. this.buffer += cStr;
  977. }
  978. return true;
  979. };
  980. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  981. if (isNaN(c)) { // do nothing
  982. } else if (c === 0x0) {
  983. this.parseError = true;
  984. } else {
  985. // TODO: If c is not a URL code point and not "%", parse error.
  986. if (c === 37 &&
  987. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  988. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  989. this.parseError = true;
  990. }
  991. this.url.fragment += percentEncodeChar(c, isFragmentPercentEncode);
  992. }
  993. return true;
  994. };
  995. function serializeURL(url, excludeFragment) {
  996. let output = url.scheme + ":";
  997. if (url.host !== null) {
  998. output += "//";
  999. if (url.username !== "" || url.password !== "") {
  1000. output += url.username;
  1001. if (url.password !== "") {
  1002. output += ":" + url.password;
  1003. }
  1004. output += "@";
  1005. }
  1006. output += serializeHost(url.host);
  1007. if (url.port !== null) {
  1008. output += ":" + url.port;
  1009. }
  1010. } else if (url.host === null && url.scheme === "file") {
  1011. output += "//";
  1012. }
  1013. if (url.cannotBeABaseURL) {
  1014. output += url.path[0];
  1015. } else {
  1016. for (const string of url.path) {
  1017. output += "/" + string;
  1018. }
  1019. }
  1020. if (url.query !== null) {
  1021. output += "?" + url.query;
  1022. }
  1023. if (!excludeFragment && url.fragment !== null) {
  1024. output += "#" + url.fragment;
  1025. }
  1026. return output;
  1027. }
  1028. function serializeOrigin(tuple) {
  1029. let result = tuple.scheme + "://";
  1030. result += serializeHost(tuple.host);
  1031. if (tuple.port !== null) {
  1032. result += ":" + tuple.port;
  1033. }
  1034. return result;
  1035. }
  1036. module.exports.serializeURL = serializeURL;
  1037. module.exports.serializeURLOrigin = function (url) {
  1038. // https://url.spec.whatwg.org/#concept-url-origin
  1039. switch (url.scheme) {
  1040. case "blob":
  1041. try {
  1042. return module.exports.serializeURLOrigin(module.exports.parseURL(url.path[0]));
  1043. } catch (e) {
  1044. // serializing an opaque origin returns "null"
  1045. return "null";
  1046. }
  1047. case "ftp":
  1048. case "gopher":
  1049. case "http":
  1050. case "https":
  1051. case "ws":
  1052. case "wss":
  1053. return serializeOrigin({
  1054. scheme: url.scheme,
  1055. host: url.host,
  1056. port: url.port
  1057. });
  1058. case "file":
  1059. // spec says "exercise to the reader", chrome says "file://"
  1060. return "file://";
  1061. default:
  1062. // serializing an opaque origin returns "null"
  1063. return "null";
  1064. }
  1065. };
  1066. module.exports.basicURLParse = function (input, options) {
  1067. if (options === undefined) {
  1068. options = {};
  1069. }
  1070. const usm = new URLStateMachine(input, options.baseURL, options.encodingOverride, options.url, options.stateOverride);
  1071. if (usm.failure) {
  1072. return null;
  1073. }
  1074. return usm.url;
  1075. };
  1076. module.exports.setTheUsername = function (url, username) {
  1077. url.username = "";
  1078. const decoded = punycode.ucs2.decode(username);
  1079. for (let i = 0; i < decoded.length; ++i) {
  1080. url.username += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1081. }
  1082. };
  1083. module.exports.setThePassword = function (url, password) {
  1084. url.password = "";
  1085. const decoded = punycode.ucs2.decode(password);
  1086. for (let i = 0; i < decoded.length; ++i) {
  1087. url.password += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1088. }
  1089. };
  1090. module.exports.serializeHost = serializeHost;
  1091. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1092. module.exports.serializeInteger = function (integer) {
  1093. return String(integer);
  1094. };
  1095. module.exports.parseURL = function (input, options) {
  1096. if (options === undefined) {
  1097. options = {};
  1098. }
  1099. // We don't handle blobs, so this just delegates:
  1100. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encodingOverride: options.encodingOverride });
  1101. };