Utf8Encoder.h 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. /**
  2. * @file Utf8Encoder.h
  3. * @author Ambroz Bizjak <ambrop7@gmail.com>
  4. *
  5. * @section LICENSE
  6. *
  7. * This file is part of BadVPN.
  8. *
  9. * BadVPN is free software: you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License version 2
  11. * as published by the Free Software Foundation.
  12. *
  13. * BadVPN is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21. */
  22. #ifndef BADVPN_UTF8ENCODER_H
  23. #define BADVPN_UTF8ENCODER_H
  24. #include <stdint.h>
  25. /**
  26. * Encodes a Unicode character into a sequence of bytes according to UTF-8.
  27. *
  28. * @param ch Unicode character to encode
  29. * @param out will receive the encoded bytes. Must have space for 4 bytes.
  30. * @return number of bytes written, 0-4, with 0 meaning the character cannot
  31. * be encoded
  32. */
  33. static int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out);
  34. int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out)
  35. {
  36. if (ch <= UINT32_C(0x007F)) {
  37. out[0] = ch;
  38. return 1;
  39. }
  40. if (ch <= UINT32_C(0x07FF)) {
  41. out[0] = (0xC0 | (ch >> 6));
  42. out[1] = (0x80 | ((ch >> 0) & 0x3F));
  43. return 2;
  44. }
  45. if (ch <= UINT32_C(0xFFFF)) {
  46. // surrogates
  47. if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
  48. return 0;
  49. }
  50. out[0] = (0xE0 | (ch >> 12));
  51. out[1] = (0x80 | ((ch >> 6) & 0x3F));
  52. out[2] = (0x80 | ((ch >> 0) & 0x3F));
  53. return 3;
  54. }
  55. if (ch < UINT32_C(0x10FFFF)) {
  56. out[0] = (0xF0 | (ch >> 18));
  57. out[1] = (0x80 | ((ch >> 12) & 0x3F));
  58. out[2] = (0x80 | ((ch >> 6) & 0x3F));
  59. out[3] = (0x80 | ((ch >> 0) & 0x3F));
  60. return 4;
  61. }
  62. return 0;
  63. }
  64. #endif