string.go 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package seq
  5. import "unicode/utf16"
  6. // Based heavily on package unicode/utf16 from the Go standard library.
  7. const (
  8. replacementChar = '\uFFFD' // Unicode replacement character
  9. maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  10. )
  11. const (
  12. // 0xd800-0xdc00 encodes the high 10 bits of a pair.
  13. // 0xdc00-0xe000 encodes the low 10 bits of a pair.
  14. // the value is those 20 bits plus 0x10000.
  15. surr1 = 0xd800
  16. surr2 = 0xdc00
  17. surr3 = 0xe000
  18. surrSelf = 0x10000
  19. )
  20. // UTF16Encode utf16 encodes s into chars. It returns the resulting
  21. // length in units of uint16. It is assumed that the chars slice
  22. // has enough room for the encoded string.
  23. func UTF16Encode(s string, chars []uint16) int {
  24. n := 0
  25. for _, v := range s {
  26. switch {
  27. case v < 0, surr1 <= v && v < surr3, v > maxRune:
  28. v = replacementChar
  29. fallthrough
  30. case v < surrSelf:
  31. chars[n] = uint16(v)
  32. n += 1
  33. default:
  34. // surrogate pair, two uint16 values
  35. r1, r2 := utf16.EncodeRune(v)
  36. chars[n] = uint16(r1)
  37. chars[n+1] = uint16(r2)
  38. n += 2
  39. }
  40. }
  41. return n
  42. }