r.js 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. /**
  2. * @param {string} value
  3. * @returns {RegExp}
  4. * */
  5. /**
  6. * @param {RegExp | string } re
  7. * @returns {string}
  8. */
  9. function source(re) {
  10. if (!re) return null;
  11. if (typeof re === "string") return re;
  12. return re.source;
  13. }
  14. /**
  15. * @param {RegExp | string } re
  16. * @returns {string}
  17. */
  18. function lookahead(re) {
  19. return concat('(?=', re, ')');
  20. }
  21. /**
  22. * @param {...(RegExp | string) } args
  23. * @returns {string}
  24. */
  25. function concat(...args) {
  26. const joined = args.map((x) => source(x)).join("");
  27. return joined;
  28. }
  29. /*
  30. Language: R
  31. Description: R is a free software environment for statistical computing and graphics.
  32. Author: Joe Cheng <joe@rstudio.org>
  33. Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
  34. Website: https://www.r-project.org
  35. Category: common,scientific
  36. */
  37. /** @type LanguageFn */
  38. function r(hljs) {
  39. // Identifiers in R cannot start with `_`, but they can start with `.` if it
  40. // is not immediately followed by a digit.
  41. // R also supports quoted identifiers, which are near-arbitrary sequences
  42. // delimited by backticks (`…`), which may contain escape sequences. These are
  43. // handled in a separate mode. See `test/markup/r/names.txt` for examples.
  44. // FIXME: Support Unicode identifiers.
  45. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
  46. const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/;
  47. return {
  48. name: 'R',
  49. // only in Haskell, not R
  50. illegal: /->/,
  51. keywords: {
  52. $pattern: IDENT_RE,
  53. keyword:
  54. 'function if in break next repeat else for while',
  55. literal:
  56. 'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 ' +
  57. 'NA_character_|10 NA_complex_|10',
  58. built_in:
  59. // Builtin constants
  60. 'LETTERS letters month.abb month.name pi T F ' +
  61. // Primitive functions
  62. // These are all the functions in `base` that are implemented as a
  63. // `.Primitive`, minus those functions that are also keywords.
  64. 'abs acos acosh all any anyNA Arg as.call as.character ' +
  65. 'as.complex as.double as.environment as.integer as.logical ' +
  66. 'as.null.default as.numeric as.raw asin asinh atan atanh attr ' +
  67. 'attributes baseenv browser c call ceiling class Conj cos cosh ' +
  68. 'cospi cummax cummin cumprod cumsum digamma dim dimnames ' +
  69. 'emptyenv exp expression floor forceAndCall gamma gc.time ' +
  70. 'globalenv Im interactive invisible is.array is.atomic is.call ' +
  71. 'is.character is.complex is.double is.environment is.expression ' +
  72. 'is.finite is.function is.infinite is.integer is.language ' +
  73. 'is.list is.logical is.matrix is.na is.name is.nan is.null ' +
  74. 'is.numeric is.object is.pairlist is.raw is.recursive is.single ' +
  75. 'is.symbol lazyLoadDBfetch length lgamma list log max min ' +
  76. 'missing Mod names nargs nzchar oldClass on.exit pos.to.env ' +
  77. 'proc.time prod quote range Re rep retracemem return round ' +
  78. 'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt ' +
  79. 'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
  80. 'trigamma trunc unclass untracemem UseMethod xtfrm',
  81. },
  82. compilerExtensions: [
  83. // allow beforeMatch to act as a "qualifier" for the match
  84. // the full match begin must be [beforeMatch][begin]
  85. (mode, parent) => {
  86. if (!mode.beforeMatch) return;
  87. // starts conflicts with endsParent which we need to make sure the child
  88. // rule is not matched multiple times
  89. if (mode.starts) throw new Error("beforeMatch cannot be used with starts");
  90. const originalMode = Object.assign({}, mode);
  91. Object.keys(mode).forEach((key) => { delete mode[key]; });
  92. mode.begin = concat(originalMode.beforeMatch, lookahead(originalMode.begin));
  93. mode.starts = {
  94. relevance: 0,
  95. contains: [
  96. Object.assign(originalMode, { endsParent: true })
  97. ]
  98. };
  99. mode.relevance = 0;
  100. delete originalMode.beforeMatch;
  101. }
  102. ],
  103. contains: [
  104. // Roxygen comments
  105. hljs.COMMENT(
  106. /#'/,
  107. /$/,
  108. {
  109. contains: [
  110. {
  111. // Handle `@examples` separately to cause all subsequent code
  112. // until the next `@`-tag on its own line to be kept as-is,
  113. // preventing highlighting. This code is example R code, so nested
  114. // doctags shouldn’t be treated as such. See
  115. // `test/markup/r/roxygen.txt` for an example.
  116. className: 'doctag',
  117. begin: '@examples',
  118. starts: {
  119. contains: [
  120. { begin: /\n/ },
  121. {
  122. begin: /#'\s*(?=@[a-zA-Z]+)/,
  123. endsParent: true,
  124. },
  125. {
  126. begin: /#'/,
  127. end: /$/,
  128. excludeBegin: true,
  129. }
  130. ]
  131. }
  132. },
  133. {
  134. // Handle `@param` to highlight the parameter name following
  135. // after.
  136. className: 'doctag',
  137. begin: '@param',
  138. end: /$/,
  139. contains: [
  140. {
  141. className: 'variable',
  142. variants: [
  143. { begin: IDENT_RE },
  144. { begin: /`(?:\\.|[^`\\])+`/ }
  145. ],
  146. endsParent: true
  147. }
  148. ]
  149. },
  150. {
  151. className: 'doctag',
  152. begin: /@[a-zA-Z]+/
  153. },
  154. {
  155. className: 'meta-keyword',
  156. begin: /\\[a-zA-Z]+/,
  157. }
  158. ]
  159. }
  160. ),
  161. hljs.HASH_COMMENT_MODE,
  162. {
  163. className: 'string',
  164. contains: [hljs.BACKSLASH_ESCAPE],
  165. variants: [
  166. hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
  167. hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\{/, end: /\}(-*)"/ }),
  168. hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\[/, end: /\](-*)"/ }),
  169. hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\(/, end: /\)(-*)'/ }),
  170. hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\{/, end: /\}(-*)'/ }),
  171. hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\[/, end: /\](-*)'/ }),
  172. {begin: '"', end: '"', relevance: 0},
  173. {begin: "'", end: "'", relevance: 0}
  174. ],
  175. },
  176. {
  177. className: 'number',
  178. relevance: 0,
  179. beforeMatch: /([^a-zA-Z0-9._])/, // not part of an identifier
  180. variants: [
  181. // TODO: replace with negative look-behind when available
  182. // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
  183. // { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
  184. // { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
  185. {
  186. // Special case: only hexadecimal binary powers can contain fractions.
  187. match: /0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
  188. },
  189. {
  190. match: /0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/
  191. },
  192. {
  193. match: /(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/,
  194. }
  195. ],
  196. },
  197. {
  198. // infix operator
  199. begin: '%',
  200. end: '%'
  201. },
  202. // relevance boost for assignment
  203. {
  204. begin: concat(SIMPLE_IDENT, "\\s+<-\\s+")
  205. },
  206. {
  207. // escaped identifier
  208. begin: '`',
  209. end: '`',
  210. contains: [
  211. { begin: /\\./ }
  212. ]
  213. }
  214. ]
  215. };
  216. }
  217. module.exports = r;