index.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. "use strict"
  2. var defaults = require('defaults')
  3. var combining = require('./combining')
  4. var DEFAULTS = {
  5. nul: 0,
  6. control: 0
  7. }
  8. module.exports = function wcwidth(str) {
  9. return wcswidth(str, DEFAULTS)
  10. }
  11. module.exports.config = function(opts) {
  12. opts = defaults(opts || {}, DEFAULTS)
  13. return function wcwidth(str) {
  14. return wcswidth(str, opts)
  15. }
  16. }
  17. /*
  18. * The following functions define the column width of an ISO 10646
  19. * character as follows:
  20. * - The null character (U+0000) has a column width of 0.
  21. * - Other C0/C1 control characters and DEL will lead to a return value
  22. * of -1.
  23. * - Non-spacing and enclosing combining characters (general category
  24. * code Mn or Me in the
  25. * Unicode database) have a column width of 0.
  26. * - SOFT HYPHEN (U+00AD) has a column width of 1.
  27. * - Other format characters (general category code Cf in the Unicode
  28. * database) and ZERO WIDTH
  29. * SPACE (U+200B) have a column width of 0.
  30. * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
  31. * have a column width of 0.
  32. * - Spacing characters in the East Asian Wide (W) or East Asian
  33. * Full-width (F) category as
  34. * defined in Unicode Technical Report #11 have a column width of 2.
  35. * - All remaining characters (including all printable ISO 8859-1 and
  36. * WGL4 characters, Unicode control characters, etc.) have a column
  37. * width of 1.
  38. * This implementation assumes that characters are encoded in ISO 10646.
  39. */
  40. function wcswidth(str, opts) {
  41. if (typeof str !== 'string') return wcwidth(str, opts)
  42. var s = 0
  43. for (var i = 0; i < str.length; i++) {
  44. var n = wcwidth(str.charCodeAt(i), opts)
  45. if (n < 0) return -1
  46. s += n
  47. }
  48. return s
  49. }
  50. function wcwidth(ucs, opts) {
  51. // test for 8-bit control characters
  52. if (ucs === 0) return opts.nul
  53. if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return opts.control
  54. // binary search in table of non-spacing characters
  55. if (bisearch(ucs)) return 0
  56. // if we arrive here, ucs is not a combining or C0/C1 control character
  57. return 1 +
  58. (ucs >= 0x1100 &&
  59. (ucs <= 0x115f || // Hangul Jamo init. consonants
  60. ucs == 0x2329 || ucs == 0x232a ||
  61. (ucs >= 0x2e80 && ucs <= 0xa4cf &&
  62. ucs != 0x303f) || // CJK ... Yi
  63. (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables
  64. (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compatibility Ideographs
  65. (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms
  66. (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compatibility Forms
  67. (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms
  68. (ucs >= 0xffe0 && ucs <= 0xffe6) ||
  69. (ucs >= 0x20000 && ucs <= 0x2fffd) ||
  70. (ucs >= 0x30000 && ucs <= 0x3fffd)));
  71. }
  72. function bisearch(ucs) {
  73. var min = 0
  74. var max = combining.length - 1
  75. var mid
  76. if (ucs < combining[0][0] || ucs > combining[max][1]) return false
  77. while (max >= min) {
  78. mid = Math.floor((min + max) / 2)
  79. if (ucs > combining[mid][1]) min = mid + 1
  80. else if (ucs < combining[mid][0]) max = mid - 1
  81. else return true
  82. }
  83. return false
  84. }