xml.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. /**
  2. * @param {string} value
  3. * @returns {RegExp}
  4. * */
  5. /**
  6. * @param {RegExp | string } re
  7. * @returns {string}
  8. */
  9. function source(re) {
  10. if (!re) return null;
  11. if (typeof re === "string") return re;
  12. return re.source;
  13. }
  14. /**
  15. * @param {RegExp | string } re
  16. * @returns {string}
  17. */
  18. function lookahead(re) {
  19. return concat('(?=', re, ')');
  20. }
  21. /**
  22. * @param {RegExp | string } re
  23. * @returns {string}
  24. */
  25. function optional(re) {
  26. return concat('(', re, ')?');
  27. }
  28. /**
  29. * @param {...(RegExp | string) } args
  30. * @returns {string}
  31. */
  32. function concat(...args) {
  33. const joined = args.map((x) => source(x)).join("");
  34. return joined;
  35. }
  36. /**
  37. * Any of the passed expresssions may match
  38. *
  39. * Creates a huge this | this | that | that match
  40. * @param {(RegExp | string)[] } args
  41. * @returns {string}
  42. */
  43. function either(...args) {
  44. const joined = '(' + args.map((x) => source(x)).join("|") + ")";
  45. return joined;
  46. }
  47. /*
  48. Language: HTML, XML
  49. Website: https://www.w3.org/XML/
  50. Category: common
  51. Audit: 2020
  52. */
  53. /** @type LanguageFn */
  54. function xml(hljs) {
  55. // Element names can contain letters, digits, hyphens, underscores, and periods
  56. const TAG_NAME_RE = concat(/[A-Z_]/, optional(/[A-Z0-9_.-]*:/), /[A-Z0-9_.-]*/);
  57. const XML_IDENT_RE = /[A-Za-z0-9._:-]+/;
  58. const XML_ENTITIES = {
  59. className: 'symbol',
  60. begin: /&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/
  61. };
  62. const XML_META_KEYWORDS = {
  63. begin: /\s/,
  64. contains: [
  65. {
  66. className: 'meta-keyword',
  67. begin: /#?[a-z_][a-z1-9_-]+/,
  68. illegal: /\n/
  69. }
  70. ]
  71. };
  72. const XML_META_PAR_KEYWORDS = hljs.inherit(XML_META_KEYWORDS, {
  73. begin: /\(/,
  74. end: /\)/
  75. });
  76. const APOS_META_STRING_MODE = hljs.inherit(hljs.APOS_STRING_MODE, {
  77. className: 'meta-string'
  78. });
  79. const QUOTE_META_STRING_MODE = hljs.inherit(hljs.QUOTE_STRING_MODE, {
  80. className: 'meta-string'
  81. });
  82. const TAG_INTERNALS = {
  83. endsWithParent: true,
  84. illegal: /</,
  85. relevance: 0,
  86. contains: [
  87. {
  88. className: 'attr',
  89. begin: XML_IDENT_RE,
  90. relevance: 0
  91. },
  92. {
  93. begin: /=\s*/,
  94. relevance: 0,
  95. contains: [
  96. {
  97. className: 'string',
  98. endsParent: true,
  99. variants: [
  100. {
  101. begin: /"/,
  102. end: /"/,
  103. contains: [ XML_ENTITIES ]
  104. },
  105. {
  106. begin: /'/,
  107. end: /'/,
  108. contains: [ XML_ENTITIES ]
  109. },
  110. {
  111. begin: /[^\s"'=<>`]+/
  112. }
  113. ]
  114. }
  115. ]
  116. }
  117. ]
  118. };
  119. return {
  120. name: 'HTML, XML',
  121. aliases: [
  122. 'html',
  123. 'xhtml',
  124. 'rss',
  125. 'atom',
  126. 'xjb',
  127. 'xsd',
  128. 'xsl',
  129. 'plist',
  130. 'wsf',
  131. 'svg'
  132. ],
  133. case_insensitive: true,
  134. contains: [
  135. {
  136. className: 'meta',
  137. begin: /<![a-z]/,
  138. end: />/,
  139. relevance: 10,
  140. contains: [
  141. XML_META_KEYWORDS,
  142. QUOTE_META_STRING_MODE,
  143. APOS_META_STRING_MODE,
  144. XML_META_PAR_KEYWORDS,
  145. {
  146. begin: /\[/,
  147. end: /\]/,
  148. contains: [
  149. {
  150. className: 'meta',
  151. begin: /<![a-z]/,
  152. end: />/,
  153. contains: [
  154. XML_META_KEYWORDS,
  155. XML_META_PAR_KEYWORDS,
  156. QUOTE_META_STRING_MODE,
  157. APOS_META_STRING_MODE
  158. ]
  159. }
  160. ]
  161. }
  162. ]
  163. },
  164. hljs.COMMENT(
  165. /<!--/,
  166. /-->/,
  167. {
  168. relevance: 10
  169. }
  170. ),
  171. {
  172. begin: /<!\[CDATA\[/,
  173. end: /\]\]>/,
  174. relevance: 10
  175. },
  176. XML_ENTITIES,
  177. {
  178. className: 'meta',
  179. begin: /<\?xml/,
  180. end: /\?>/,
  181. relevance: 10
  182. },
  183. {
  184. className: 'tag',
  185. /*
  186. The lookahead pattern (?=...) ensures that 'begin' only matches
  187. '<style' as a single word, followed by a whitespace or an
  188. ending braket. The '$' is needed for the lexeme to be recognized
  189. by hljs.subMode() that tests lexemes outside the stream.
  190. */
  191. begin: /<style(?=\s|>)/,
  192. end: />/,
  193. keywords: {
  194. name: 'style'
  195. },
  196. contains: [ TAG_INTERNALS ],
  197. starts: {
  198. end: /<\/style>/,
  199. returnEnd: true,
  200. subLanguage: [
  201. 'css',
  202. 'xml'
  203. ]
  204. }
  205. },
  206. {
  207. className: 'tag',
  208. // See the comment in the <style tag about the lookahead pattern
  209. begin: /<script(?=\s|>)/,
  210. end: />/,
  211. keywords: {
  212. name: 'script'
  213. },
  214. contains: [ TAG_INTERNALS ],
  215. starts: {
  216. end: /<\/script>/,
  217. returnEnd: true,
  218. subLanguage: [
  219. 'javascript',
  220. 'handlebars',
  221. 'xml'
  222. ]
  223. }
  224. },
  225. // we need this for now for jSX
  226. {
  227. className: 'tag',
  228. begin: /<>|<\/>/
  229. },
  230. // open tag
  231. {
  232. className: 'tag',
  233. begin: concat(
  234. /</,
  235. lookahead(concat(
  236. TAG_NAME_RE,
  237. // <tag/>
  238. // <tag>
  239. // <tag ...
  240. either(/\/>/, />/, /\s/)
  241. ))
  242. ),
  243. end: /\/?>/,
  244. contains: [
  245. {
  246. className: 'name',
  247. begin: TAG_NAME_RE,
  248. relevance: 0,
  249. starts: TAG_INTERNALS
  250. }
  251. ]
  252. },
  253. // close tag
  254. {
  255. className: 'tag',
  256. begin: concat(
  257. /<\//,
  258. lookahead(concat(
  259. TAG_NAME_RE, />/
  260. ))
  261. ),
  262. contains: [
  263. {
  264. className: 'name',
  265. begin: TAG_NAME_RE,
  266. relevance: 0
  267. },
  268. {
  269. begin: />/,
  270. relevance: 0,
  271. endsParent: true
  272. }
  273. ]
  274. }
  275. ]
  276. };
  277. }
  278. module.exports = xml;