14 лет назад · 5cffb05fd7
--- a/misc/Utf16Decoder.h
+++ b/misc/Utf16Decoder.h
@@ -0,0 +1,106 @@
 
															+/**
														
 
															+ * @file Utf16Decoder.h
														
 
															+ * @author Ambroz Bizjak <ambrop7@gmail.com>
														
 
															+ * 
														
 
															+ * @section LICENSE
														
 
															+ * 
														
 
															+ * This file is part of BadVPN.
														
 
															+ * 
														
 
															+ * BadVPN is free software: you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ * 
														
 
															+ * BadVPN is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ * 
														
 
															+ * You should have received a copy of the GNU General Public License along
														
 
															+ * with this program; if not, write to the Free Software Foundation, Inc.,
														
 
															+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef BADVPN_UTF16DECODER_H
														
 
															+#define BADVPN_UTF16DECODER_H
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+
														
 
															+#include <misc/debug.h>
														
 
															+
														
 
															+/**
														
 
															+ * Decodes UTF-16 data into Unicode characters.
														
 
															+ */
														
 
															+typedef struct {
														
 
															+    int cont;
														
 
															+    uint32_t ch;
														
 
															+} Utf16Decoder;
														
 
															+
														
 
															+/**
														
 
															+ * Initializes the UTF-16 decoder.
														
 
															+ * 
														
 
															+ * @param o the object
														
 
															+ */
														
 
															+static void Utf16Decoder_Init (Utf16Decoder *o);
														
 
															+
														
 
															+/**
														
 
															+ * Inputs a 16-bit value to the decoder.
														
 
															+ * 
														
 
															+ * @param o the object
														
 
															+ * @param b 16-bit value to input
														
 
															+ * @param out_ch will receive a Unicode character if this function returns 1.
														
 
															+ *               If written, the character will be in the range 0 - 0x10FFFF,
														
 
															+ *               excluding the surrogate range 0xD800 - 0xDFFF.
														
 
															+ * @return 1 if a Unicode character has been written to *out_ch, 0 if not
														
 
															+ */
														
 
															+static int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch);
														
 
															+
														
 
															+void Utf16Decoder_Init (Utf16Decoder *o)
														
 
															+{
														
 
															+    o->cont = 0;
														
 
															+}
														
 
															+
														
 
															+int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch)
														
 
															+{
														
 
															+    // high surrogate
														
 
															+    if (b >= UINT16_C(0xD800) && b <= UINT16_C(0xDBFF)) {
														
 
															+        // set continuation state
														
 
															+        o->cont = 1;
														
 
															+        
														
 
															+        // add high bits
														
 
															+        o->ch = (uint32_t)(b - UINT16_C(0xD800)) << 10;
														
 
															+        
														
 
															+        return 0;
														
 
															+    }
														
 
															+    
														
 
															+    // low surrogate
														
 
															+    if (b >= UINT16_C(0xDC00) && b <= UINT16_C(0xDFFF)) {
														
 
															+        // check continuation
														
 
															+        if (!o->cont) {
														
 
															+            return 0;
														
 
															+        }
														
 
															+        
														
 
															+        // add low bits
														
 
															+        o->ch |= (b - UINT16_C(0xDC00));
														
 
															+        
														
 
															+        // reset state
														
 
															+        o->cont = 0;
														
 
															+        
														
 
															+        // don't report surrogates
														
 
															+        if (o->ch >= UINT32_C(0xD800) && o->ch <= UINT32_C(0xDFFF)) {
														
 
															+            return 0;
														
 
															+        }
														
 
															+        
														
 
															+        // return character
														
 
															+        *out_ch = o->ch;
														
 
															+        return 1;
														
 
															+    }
														
 
															+    
														
 
															+    // reset state
														
 
															+    o->cont = 0;
														
 
															+    
														
 
															+    // return character
														
 
															+    *out_ch = b;
														
 
															+    return 1;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/misc/Utf16Encoder.h
+++ b/misc/Utf16Encoder.h
@@ -0,0 +1,60 @@
 
															+/**
														
 
															+ * @file Utf16Encoder.h
														
 
															+ * @author Ambroz Bizjak <ambrop7@gmail.com>
														
 
															+ * 
														
 
															+ * @section LICENSE
														
 
															+ * 
														
 
															+ * This file is part of BadVPN.
														
 
															+ * 
														
 
															+ * BadVPN is free software: you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ * 
														
 
															+ * BadVPN is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ * 
														
 
															+ * You should have received a copy of the GNU General Public License along
														
 
															+ * with this program; if not, write to the Free Software Foundation, Inc.,
														
 
															+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef BADVPN_UTF16ENCODER_H
														
 
															+#define BADVPN_UTF16ENCODER_H
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+
														
 
															+/**
														
 
															+ * Encodes a Unicode character into a sequence of 16-bit values according to UTF-16.
														
 
															+ * 
														
 
															+ * @param ch Unicode character to encode
														
 
															+ * @param out will receive the encoded 16-bit values. Must have space for 2 values.
														
 
															+ * @return number of 16-bit values written, 0-2, with 0 meaning the character cannot
														
 
															+ *         be encoded
														
 
															+ */
														
 
															+static int Utf16Encoder_EncodeCharacter (uint32_t ch, uint16_t *out);
														
 
															+
														
 
															+int Utf16Encoder_EncodeCharacter (uint32_t ch, uint16_t *out)
														
 
															+{
														
 
															+    if (ch <= UINT32_C(0xFFFF)) {
														
 
															+        // surrogates
														
 
															+        if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
														
 
															+            return 0;
														
 
															+        }
														
 
															+        
														
 
															+        out[0] = ch;
														
 
															+        return 1;
														
 
															+    }
														
 
															+    
														
 
															+    if (ch <= UINT32_C(0x10FFFF)) {
														
 
															+        uint32_t x = ch - UINT32_C(0x10000);
														
 
															+        out[0] = UINT32_C(0xD800) + (x >> 10);
														
 
															+        out[1] = UINT32_C(0xDC00) + (x & UINT32_C(0x3FF));
														
 
															+        return 2;
														
 
															+    }
														
 
															+    
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/misc/Utf8Decoder.h
+++ b/misc/Utf8Decoder.h
@@ -0,0 +1,136 @@
 
															+/**
														
 
															+ * @file Utf8Decoder.h
														
 
															+ * @author Ambroz Bizjak <ambrop7@gmail.com>
														
 
															+ * 
														
 
															+ * @section LICENSE
														
 
															+ * 
														
 
															+ * This file is part of BadVPN.
														
 
															+ * 
														
 
															+ * BadVPN is free software: you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ * 
														
 
															+ * BadVPN is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ * 
														
 
															+ * You should have received a copy of the GNU General Public License along
														
 
															+ * with this program; if not, write to the Free Software Foundation, Inc.,
														
 
															+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef BADVPN_UTF8DECODER_H
														
 
															+#define BADVPN_UTF8DECODER_H
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+
														
 
															+#include <misc/debug.h>
														
 
															+
														
 
															+/**
														
 
															+ * Decodes UTF-8 data into Unicode characters.
														
 
															+ */
														
 
															+typedef struct {
														
 
															+    int bytes;
														
 
															+    int pos;
														
 
															+    uint32_t ch;
														
 
															+} Utf8Decoder;
														
 
															+
														
 
															+/**
														
 
															+ * Initializes the UTF-8 decoder.
														
 
															+ * 
														
 
															+ * @param o the object
														
 
															+ */
														
 
															+static void Utf8Decoder_Init (Utf8Decoder *o);
														
 
															+
														
 
															+/**
														
 
															+ * Inputs a byte to the decoder.
														
 
															+ * 
														
 
															+ * @param o the object
														
 
															+ * @param b byte to input
														
 
															+ * @param out_ch will receive a Unicode character if this function returns 1.
														
 
															+ *               If written, the character will be in the range 0 - 0x10FFFF,
														
 
															+ *               excluding the surrogate range 0xD800 - 0xDFFF.
														
 
															+ * @return 1 if a Unicode character has been written to *out_ch, 0 if not
														
 
															+ */
														
 
															+static int Utf8Decoder_Input (Utf8Decoder *o, uint8_t b, uint32_t *out_ch);
														
 
															+
														
 
															+void Utf8Decoder_Init (Utf8Decoder *o)
														
 
															+{
														
 
															+    o->bytes = 0;
														
 
															+}
														
 
															+
														
 
															+int Utf8Decoder_Input (Utf8Decoder *o, uint8_t b, uint32_t *out_ch)
														
 
															+{
														
 
															+    // one-byte character
														
 
															+    if ((b & 128) == 0) {
														
 
															+        o->bytes = 0;
														
 
															+        *out_ch = b;
														
 
															+        return 1;
														
 
															+    }
														
 
															+    
														
 
															+    // start of two-byte character
														
 
															+    if ((b & 224) == 192) {
														
 
															+        o->bytes = 2;
														
 
															+        o->pos = 1;
														
 
															+        o->ch = (uint32_t)(b & 31) << 6;
														
 
															+        return 0;
														
 
															+    }
														
 
															+    
														
 
															+    // start of three-byte character
														
 
															+    if ((b & 240) == 224) {
														
 
															+        o->bytes = 3;
														
 
															+        o->pos = 1;
														
 
															+        o->ch = (uint32_t)(b & 15) << 12;
														
 
															+        return 0;
														
 
															+    }
														
 
															+    
														
 
															+    // start of four-byte character
														
 
															+    if ((b & 248) == 240) {
														
 
															+        o->bytes = 4;
														
 
															+        o->pos = 1;
														
 
															+        o->ch = (uint32_t)(b & 7) << 18;
														
 
															+        return 0;
														
 
															+    }
														
 
															+    
														
 
															+    // continuation of multi-byte character
														
 
															+    if ((b & 192) == 128 && o->bytes > 0) {
														
 
															+        ASSERT(o->bytes <= 4)
														
 
															+        ASSERT(o->pos > 0)
														
 
															+        ASSERT(o->pos < o->bytes)
														
 
															+        
														
 
															+        // add bits from this byte
														
 
															+        o->ch |= (uint32_t)(b & 63) << (6 * (o->bytes - o->pos - 1));
														
 
															+        
														
 
															+        // end of multi-byte character?
														
 
															+        if (o->pos == o->bytes - 1) {
														
 
															+            // reset state
														
 
															+            o->bytes = 0;
														
 
															+            
														
 
															+            // don't report out-of-range characters
														
 
															+            if (o->ch > UINT32_C(0x10FFFF)) {
														
 
															+                return 0;
														
 
															+            }
														
 
															+            
														
 
															+            // don't report surrogates
														
 
															+            if (o->ch >= UINT32_C(0xD800) && o->ch <= UINT32_C(0xDFFF)) {
														
 
															+                return 0;
														
 
															+            }
														
 
															+            
														
 
															+            *out_ch = o->ch;
														
 
															+            return 1;
														
 
															+        }
														
 
															+        
														
 
															+        // increment byte index
														
 
															+        o->pos++;
														
 
															+        
														
 
															+        return 0;
														
 
															+    }
														
 
															+    
														
 
															+    // error, reset state
														
 
															+    o->bytes = 0;
														
 
															+    
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/misc/Utf8Encoder.h
+++ b/misc/Utf8Encoder.h
@@ -0,0 +1,74 @@
 
															+/**
														
 
															+ * @file Utf8Encoder.h
														
 
															+ * @author Ambroz Bizjak <ambrop7@gmail.com>
														
 
															+ * 
														
 
															+ * @section LICENSE
														
 
															+ * 
														
 
															+ * This file is part of BadVPN.
														
 
															+ * 
														
 
															+ * BadVPN is free software: you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ * 
														
 
															+ * BadVPN is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ * 
														
 
															+ * You should have received a copy of the GNU General Public License along
														
 
															+ * with this program; if not, write to the Free Software Foundation, Inc.,
														
 
															+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef BADVPN_UTF8ENCODER_H
														
 
															+#define BADVPN_UTF8ENCODER_H
														
 
															+
														
 
															+#include <stdint.h>
														
 
															+
														
 
															+/**
														
 
															+ * Encodes a Unicode character into a sequence of bytes according to UTF-8.
														
 
															+ * 
														
 
															+ * @param ch Unicode character to encode
														
 
															+ * @param out will receive the encoded bytes. Must have space for 4 bytes.
														
 
															+ * @return number of bytes written, 0-4, with 0 meaning the character cannot
														
 
															+ *         be encoded
														
 
															+ */
														
 
															+static int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out);
														
 
															+
														
 
															+int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out)
														
 
															+{
														
 
															+    if (ch <= UINT32_C(0x007F)) {
														
 
															+        out[0] = ch;
														
 
															+        return 1;
														
 
															+    }
														
 
															+    
														
 
															+    if (ch <= UINT32_C(0x07FF)) {
														
 
															+        out[0] = (0xC0 | (ch >> 6));
														
 
															+        out[1] = (0x80 | ((ch >> 0) & 0x3F));
														
 
															+        return 2;
														
 
															+    }
														
 
															+    
														
 
															+    if (ch <= UINT32_C(0xFFFF)) {
														
 
															+        // surrogates
														
 
															+        if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
														
 
															+            return 0;
														
 
															+        }
														
 
															+        
														
 
															+        out[0] = (0xE0 | (ch >> 12));
														
 
															+        out[1] = (0x80 | ((ch >> 6) & 0x3F));
														
 
															+        out[2] = (0x80 | ((ch >> 0) & 0x3F));
														
 
															+        return 3;
														
 
															+    }
														
 
															+    
														
 
															+    if (ch < UINT32_C(0x10FFFF)) {
														
 
															+        out[0] = (0xF0 | (ch >> 18));
														
 
															+        out[1] = (0x80 | ((ch >> 12) & 0x3F));
														
 
															+        out[2] = (0x80 | ((ch >> 6) & 0x3F));
														
 
															+        out[3] = (0x80 | ((ch >> 0) & 0x3F));
														
 
															+        return 4;
														
 
															+    }
														
 
															+    
														
 
															+    return 0;
														
 
															+}
														
 
															+
														
 
															+#endif
														
--- a/misc/unicode_funcs.h
+++ b/misc/unicode_funcs.h
@@ -0,0 +1,225 @@
 
															+/**
														
 
															+ * @file unicode_funcs.h
														
 
															+ * @author Ambroz Bizjak <ambrop7@gmail.com>
														
 
															+ * 
														
 
															+ * @section LICENSE
														
 
															+ * 
														
 
															+ * This file is part of BadVPN.
														
 
															+ * 
														
 
															+ * BadVPN is free software: you can redistribute it and/or modify
														
 
															+ * it under the terms of the GNU General Public License version 2
														
 
															+ * as published by the Free Software Foundation.
														
 
															+ * 
														
 
															+ * BadVPN is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ * 
														
 
															+ * You should have received a copy of the GNU General Public License along
														
 
															+ * with this program; if not, write to the Free Software Foundation, Inc.,
														
 
															+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
														
 
															+ */
														
 
															+
														
 
															+#ifndef BADVPN_UNICODE_FUNCS_H
														
 
															+#define BADVPN_UNICODE_FUNCS_H
														
 
															+
														
 
															+#include <misc/expstring.h>
														
 
															+#include <misc/bsize.h>
														
 
															+#include <misc/Utf8Encoder.h>
														
 
															+#include <misc/Utf8Decoder.h>
														
 
															+#include <misc/Utf16Encoder.h>
														
 
															+#include <misc/Utf16Decoder.h>
														
 
															+
														
 
															+/**
														
 
															+ * Decodes UTF-16 data as bytes into an allocated null-terminated UTF-8 string.
														
 
															+ * 
														
 
															+ * @param data UTF-16 data, in big endian
														
 
															+ * @param data_len size of data in bytes
														
 
															+ * @param out_is_error if not NULL and the function returns a string,
														
 
															+ *                     *out_is_error will be set to 0 or 1, indicating
														
 
															+ *                     whether there have been errors decoding the input.
														
 
															+ *                     A null decoded character is treated as an error.
														
 
															+ * @return An UTF-8 null-terminated string which can be freed with free(),
														
 
															+ *         or NULL if out of memory.
														
 
															+ */
														
 
															+static char * unicode_decode_utf16_to_utf8 (const uint8_t *data, size_t data_len, int *out_is_error);
														
 
															+
														
 
															+/**
														
 
															+ * Decodes UTF-8 data into UTF-16 data as bytes.
														
 
															+ * 
														
 
															+ * @param data UTF-8 data
														
 
															+ * @param data_len size of data in bytes
														
 
															+ * @param out output buffer
														
 
															+ * @param out_avail number of bytes available in output buffer
														
 
															+ * @param out_len if not NULL, *out_len will contain the number of bytes
														
 
															+ *                required to store the resulting data (or overflow)
														
 
															+ * @param out_is_error if not NULL, *out_is_error will contain 0 or 1,
														
 
															+ *                     indicating whether there have been errors decoding
														
 
															+ *                     the input
														
 
															+ */
														
 
															+static void unicode_decode_utf8_to_utf16 (const uint8_t *data, size_t data_len, uint8_t *out, size_t out_avail, bsize_t *out_len, int *out_is_error);
														
 
															+
														
 
															+static char * unicode_decode_utf16_to_utf8 (const uint8_t *data, size_t data_len, int *out_is_error)
														
 
															+{
														
 
															+    // will build the resulting UTF-8 string by appending to ExpString
														
 
															+    ExpString str;
														
 
															+    if (!ExpString_Init(&str)) {
														
 
															+        goto fail0;
														
 
															+    }
														
 
															+    
														
 
															+    // init UTF-16 decoder
														
 
															+    Utf16Decoder decoder;
														
 
															+    Utf16Decoder_Init(&decoder);
														
 
															+    
														
 
															+    // set initial input and input matching positions
														
 
															+    size_t i_in = 0;
														
 
															+    size_t i_ch = 0;
														
 
															+    
														
 
															+    int error = 0;
														
 
															+    
														
 
															+    while (i_in < data_len) {
														
 
															+        // read two input bytes from the input position
														
 
															+        uint8_t x = data[i_in++];
														
 
															+        if (i_in == data_len) {
														
 
															+            break;
														
 
															+        }
														
 
															+        uint8_t y = data[i_in++];
														
 
															+        
														
 
															+        // combine them into a 16-bit value
														
 
															+        uint16_t xy = (((uint16_t)x << 8) | (uint16_t)y);
														
 
															+        
														
 
															+        // give the 16-bit value to the UTF-16 decoder and maybe
														
 
															+        // receive a Unicode character back
														
 
															+        uint32_t ch;
														
 
															+        if (!Utf16Decoder_Input(&decoder, xy, &ch)) {
														
 
															+            continue;
														
 
															+        }
														
 
															+        
														
 
															+        if (!error) {
														
 
															+            // encode the Unicode character back into UTF-16
														
 
															+            uint16_t chenc[2];
														
 
															+            int chenc_n = Utf16Encoder_EncodeCharacter(ch, chenc);
														
 
															+            ASSERT(chenc_n > 0)
														
 
															+            
														
 
															+            // match the result with input
														
 
															+            for (int chenc_i = 0; chenc_i < chenc_n; chenc_i++) {
														
 
															+                uint8_t cx = (chenc[chenc_i] >> 8);
														
 
															+                uint8_t cy = (chenc[chenc_i] & 0xFF);
														
 
															+                
														
 
															+                if (i_ch >= data_len || data[i_ch] != cx) {
														
 
															+                    error = 1;
														
 
															+                    break;
														
 
															+                }
														
 
															+                i_ch++;
														
 
															+                
														
 
															+                if (i_ch >= data_len || data[i_ch] != cy) {
														
 
															+                    error = 1;
														
 
															+                    break;
														
 
															+                }
														
 
															+                i_ch++;
														
 
															+            }
														
 
															+        }
														
 
															+        
														
 
															+        // we don't like null Unicode characters because we're building a
														
 
															+        // null-terminated UTF-8 string
														
 
															+        if (ch == 0) {
														
 
															+            error = 1;
														
 
															+            continue;
														
 
															+        }
														
 
															+        
														
 
															+        // encode the Unicode character into UTF-8
														
 
															+        uint8_t enc[5];
														
 
															+        int enc_n = Utf8Encoder_EncodeCharacter(ch, enc);
														
 
															+        ASSERT(enc_n > 0)
														
 
															+        
														
 
															+        // append the resulting UTF-8 bytes to the result string
														
 
															+        enc[enc_n] = 0;
														
 
															+        if (!ExpString_Append(&str, enc)) {
														
 
															+            goto fail1;
														
 
															+        }
														
 
															+    }
														
 
															+    
														
 
															+    // check if we matched the whole input string when encoding back
														
 
															+    if (i_ch < data_len) {
														
 
															+        error = 1;
														
 
															+    }
														
 
															+    
														
 
															+    if (out_is_error) {
														
 
															+        *out_is_error = error;
														
 
															+    }
														
 
															+    return ExpString_Get(&str);
														
 
															+    
														
 
															+fail1:
														
 
															+    ExpString_Free(&str);
														
 
															+fail0:
														
 
															+    return NULL;
														
 
															+}
														
 
															+
														
 
															+static void unicode_decode_utf8_to_utf16 (const uint8_t *data, size_t data_len, uint8_t *out, size_t out_avail, bsize_t *out_len, int *out_is_error)
														
 
															+{
														
 
															+    Utf8Decoder decoder;
														
 
															+    Utf8Decoder_Init(&decoder);
														
 
															+    
														
 
															+    size_t i_in = 0;
														
 
															+    size_t i_ch = 0;
														
 
															+    
														
 
															+    bsize_t len = bsize_fromsize(0);
														
 
															+    
														
 
															+    int error = 0;
														
 
															+    
														
 
															+    while (i_in < data_len) {
														
 
															+        uint8_t x = data[i_in++];
														
 
															+        
														
 
															+        uint32_t ch;
														
 
															+        if (!Utf8Decoder_Input(&decoder, x, &ch)) {
														
 
															+            continue;
														
 
															+        }
														
 
															+        
														
 
															+        if (!error) {
														
 
															+            uint8_t chenc[4];
														
 
															+            int chenc_n = Utf8Encoder_EncodeCharacter(ch, chenc);
														
 
															+            ASSERT(chenc_n > 0)
														
 
															+            
														
 
															+            for (int chenc_i = 0; chenc_i < chenc_n; chenc_i++) {
														
 
															+                if (i_ch >= data_len || data[i_ch] != chenc[chenc_i]) {
														
 
															+                    error = 1;
														
 
															+                    break;
														
 
															+                }
														
 
															+                i_ch++;
														
 
															+            }
														
 
															+        }
														
 
															+        
														
 
															+        uint16_t enc[2];
														
 
															+        int enc_n = Utf16Encoder_EncodeCharacter(ch, enc);
														
 
															+        ASSERT(enc_n > 0)
														
 
															+        
														
 
															+        len = bsize_add(len, bsize_fromsize(2 * enc_n));
														
 
															+        
														
 
															+        for (int enc_i = 0; enc_i < enc_n; enc_i++) {
														
 
															+            if (out_avail == 0) {
														
 
															+                break;
														
 
															+            }
														
 
															+            *(out++) = (enc[enc_i] >> 8);
														
 
															+            out_avail--;
														
 
															+            
														
 
															+            if (out_avail == 0) {
														
 
															+                break;
														
 
															+            }
														
 
															+            *(out++) = (enc[enc_i] & 0xFF);
														
 
															+            out_avail--;
														
 
															+        }
														
 
															+    }
														
 
															+    
														
 
															+    if (i_ch < data_len) {
														
 
															+        error = 1;
														
 
															+    }
														
 
															+    
														
 
															+    if (out_len) {
														
 
															+        *out_len = len;
														
 
															+    }
														
 
															+    if (out_is_error) {
														
 
															+        *out_is_error = error;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+#endif