14 anni fa · 5cffb05fd7
--- a/misc/Utf16Decoder.h
+++ b/misc/Utf16Decoder.h
@@ -0,0 +1,106 @@
 
				+/**
			
 
				+ * @file Utf16Decoder.h
			
 
				+ * @author Ambroz Bizjak <ambrop7@gmail.com>
			
 
				+ * 
			
 
				+ * @section LICENSE
			
 
				+ * 
			
 
				+ * This file is part of BadVPN.
			
 
				+ * 
			
 
				+ * BadVPN is free software: you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ * 
			
 
				+ * BadVPN is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ * 
			
 
				+ * You should have received a copy of the GNU General Public License along
			
 
				+ * with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef BADVPN_UTF16DECODER_H
			
 
				+#define BADVPN_UTF16DECODER_H
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#include <misc/debug.h>
			
 
				+
			
 
				+/**
			
 
				+ * Decodes UTF-16 data into Unicode characters.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+    int cont;
			
 
				+    uint32_t ch;
			
 
				+} Utf16Decoder;
			
 
				+
			
 
				+/**
			
 
				+ * Initializes the UTF-16 decoder.
			
 
				+ * 
			
 
				+ * @param o the object
			
 
				+ */
			
 
				+static void Utf16Decoder_Init (Utf16Decoder *o);
			
 
				+
			
 
				+/**
			
 
				+ * Inputs a 16-bit value to the decoder.
			
 
				+ * 
			
 
				+ * @param o the object
			
 
				+ * @param b 16-bit value to input
			
 
				+ * @param out_ch will receive a Unicode character if this function returns 1.
			
 
				+ *               If written, the character will be in the range 0 - 0x10FFFF,
			
 
				+ *               excluding the surrogate range 0xD800 - 0xDFFF.
			
 
				+ * @return 1 if a Unicode character has been written to *out_ch, 0 if not
			
 
				+ */
			
 
				+static int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch);
			
 
				+
			
 
				+void Utf16Decoder_Init (Utf16Decoder *o)
			
 
				+{
			
 
				+    o->cont = 0;
			
 
				+}
			
 
				+
			
 
				+int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch)
			
 
				+{
			
 
				+    // high surrogate
			
 
				+    if (b >= UINT16_C(0xD800) && b <= UINT16_C(0xDBFF)) {
			
 
				+        // set continuation state
			
 
				+        o->cont = 1;
			
 
				+        
			
 
				+        // add high bits
			
 
				+        o->ch = (uint32_t)(b - UINT16_C(0xD800)) << 10;
			
 
				+        
			
 
				+        return 0;
			
 
				+    }
			
 
				+    
			
 
				+    // low surrogate
			
 
				+    if (b >= UINT16_C(0xDC00) && b <= UINT16_C(0xDFFF)) {
			
 
				+        // check continuation
			
 
				+        if (!o->cont) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+        
			
 
				+        // add low bits
			
 
				+        o->ch |= (b - UINT16_C(0xDC00));
			
 
				+        
			
 
				+        // reset state
			
 
				+        o->cont = 0;
			
 
				+        
			
 
				+        // don't report surrogates
			
 
				+        if (o->ch >= UINT32_C(0xD800) && o->ch <= UINT32_C(0xDFFF)) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+        
			
 
				+        // return character
			
 
				+        *out_ch = o->ch;
			
 
				+        return 1;
			
 
				+    }
			
 
				+    
			
 
				+    // reset state
			
 
				+    o->cont = 0;
			
 
				+    
			
 
				+    // return character
			
 
				+    *out_ch = b;
			
 
				+    return 1;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/misc/Utf16Encoder.h
+++ b/misc/Utf16Encoder.h
@@ -0,0 +1,60 @@
 
				+/**
			
 
				+ * @file Utf16Encoder.h
			
 
				+ * @author Ambroz Bizjak <ambrop7@gmail.com>
			
 
				+ * 
			
 
				+ * @section LICENSE
			
 
				+ * 
			
 
				+ * This file is part of BadVPN.
			
 
				+ * 
			
 
				+ * BadVPN is free software: you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ * 
			
 
				+ * BadVPN is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ * 
			
 
				+ * You should have received a copy of the GNU General Public License along
			
 
				+ * with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef BADVPN_UTF16ENCODER_H
			
 
				+#define BADVPN_UTF16ENCODER_H
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+/**
			
 
				+ * Encodes a Unicode character into a sequence of 16-bit values according to UTF-16.
			
 
				+ * 
			
 
				+ * @param ch Unicode character to encode
			
 
				+ * @param out will receive the encoded 16-bit values. Must have space for 2 values.
			
 
				+ * @return number of 16-bit values written, 0-2, with 0 meaning the character cannot
			
 
				+ *         be encoded
			
 
				+ */
			
 
				+static int Utf16Encoder_EncodeCharacter (uint32_t ch, uint16_t *out);
			
 
				+
			
 
				+int Utf16Encoder_EncodeCharacter (uint32_t ch, uint16_t *out)
			
 
				+{
			
 
				+    if (ch <= UINT32_C(0xFFFF)) {
			
 
				+        // surrogates
			
 
				+        if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+        
			
 
				+        out[0] = ch;
			
 
				+        return 1;
			
 
				+    }
			
 
				+    
			
 
				+    if (ch <= UINT32_C(0x10FFFF)) {
			
 
				+        uint32_t x = ch - UINT32_C(0x10000);
			
 
				+        out[0] = UINT32_C(0xD800) + (x >> 10);
			
 
				+        out[1] = UINT32_C(0xDC00) + (x & UINT32_C(0x3FF));
			
 
				+        return 2;
			
 
				+    }
			
 
				+    
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/misc/Utf8Decoder.h
+++ b/misc/Utf8Decoder.h
@@ -0,0 +1,136 @@
 
				+/**
			
 
				+ * @file Utf8Decoder.h
			
 
				+ * @author Ambroz Bizjak <ambrop7@gmail.com>
			
 
				+ * 
			
 
				+ * @section LICENSE
			
 
				+ * 
			
 
				+ * This file is part of BadVPN.
			
 
				+ * 
			
 
				+ * BadVPN is free software: you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ * 
			
 
				+ * BadVPN is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ * 
			
 
				+ * You should have received a copy of the GNU General Public License along
			
 
				+ * with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef BADVPN_UTF8DECODER_H
			
 
				+#define BADVPN_UTF8DECODER_H
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#include <misc/debug.h>
			
 
				+
			
 
				+/**
			
 
				+ * Decodes UTF-8 data into Unicode characters.
			
 
				+ */
			
 
				+typedef struct {
			
 
				+    int bytes;
			
 
				+    int pos;
			
 
				+    uint32_t ch;
			
 
				+} Utf8Decoder;
			
 
				+
			
 
				+/**
			
 
				+ * Initializes the UTF-8 decoder.
			
 
				+ * 
			
 
				+ * @param o the object
			
 
				+ */
			
 
				+static void Utf8Decoder_Init (Utf8Decoder *o);
			
 
				+
			
 
				+/**
			
 
				+ * Inputs a byte to the decoder.
			
 
				+ * 
			
 
				+ * @param o the object
			
 
				+ * @param b byte to input
			
 
				+ * @param out_ch will receive a Unicode character if this function returns 1.
			
 
				+ *               If written, the character will be in the range 0 - 0x10FFFF,
			
 
				+ *               excluding the surrogate range 0xD800 - 0xDFFF.
			
 
				+ * @return 1 if a Unicode character has been written to *out_ch, 0 if not
			
 
				+ */
			
 
				+static int Utf8Decoder_Input (Utf8Decoder *o, uint8_t b, uint32_t *out_ch);
			
 
				+
			
 
				+void Utf8Decoder_Init (Utf8Decoder *o)
			
 
				+{
			
 
				+    o->bytes = 0;
			
 
				+}
			
 
				+
			
 
				+int Utf8Decoder_Input (Utf8Decoder *o, uint8_t b, uint32_t *out_ch)
			
 
				+{
			
 
				+    // one-byte character
			
 
				+    if ((b & 128) == 0) {
			
 
				+        o->bytes = 0;
			
 
				+        *out_ch = b;
			
 
				+        return 1;
			
 
				+    }
			
 
				+    
			
 
				+    // start of two-byte character
			
 
				+    if ((b & 224) == 192) {
			
 
				+        o->bytes = 2;
			
 
				+        o->pos = 1;
			
 
				+        o->ch = (uint32_t)(b & 31) << 6;
			
 
				+        return 0;
			
 
				+    }
			
 
				+    
			
 
				+    // start of three-byte character
			
 
				+    if ((b & 240) == 224) {
			
 
				+        o->bytes = 3;
			
 
				+        o->pos = 1;
			
 
				+        o->ch = (uint32_t)(b & 15) << 12;
			
 
				+        return 0;
			
 
				+    }
			
 
				+    
			
 
				+    // start of four-byte character
			
 
				+    if ((b & 248) == 240) {
			
 
				+        o->bytes = 4;
			
 
				+        o->pos = 1;
			
 
				+        o->ch = (uint32_t)(b & 7) << 18;
			
 
				+        return 0;
			
 
				+    }
			
 
				+    
			
 
				+    // continuation of multi-byte character
			
 
				+    if ((b & 192) == 128 && o->bytes > 0) {
			
 
				+        ASSERT(o->bytes <= 4)
			
 
				+        ASSERT(o->pos > 0)
			
 
				+        ASSERT(o->pos < o->bytes)
			
 
				+        
			
 
				+        // add bits from this byte
			
 
				+        o->ch |= (uint32_t)(b & 63) << (6 * (o->bytes - o->pos - 1));
			
 
				+        
			
 
				+        // end of multi-byte character?
			
 
				+        if (o->pos == o->bytes - 1) {
			
 
				+            // reset state
			
 
				+            o->bytes = 0;
			
 
				+            
			
 
				+            // don't report out-of-range characters
			
 
				+            if (o->ch > UINT32_C(0x10FFFF)) {
			
 
				+                return 0;
			
 
				+            }
			
 
				+            
			
 
				+            // don't report surrogates
			
 
				+            if (o->ch >= UINT32_C(0xD800) && o->ch <= UINT32_C(0xDFFF)) {
			
 
				+                return 0;
			
 
				+            }
			
 
				+            
			
 
				+            *out_ch = o->ch;
			
 
				+            return 1;
			
 
				+        }
			
 
				+        
			
 
				+        // increment byte index
			
 
				+        o->pos++;
			
 
				+        
			
 
				+        return 0;
			
 
				+    }
			
 
				+    
			
 
				+    // error, reset state
			
 
				+    o->bytes = 0;
			
 
				+    
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/misc/Utf8Encoder.h
+++ b/misc/Utf8Encoder.h
@@ -0,0 +1,74 @@
 
				+/**
			
 
				+ * @file Utf8Encoder.h
			
 
				+ * @author Ambroz Bizjak <ambrop7@gmail.com>
			
 
				+ * 
			
 
				+ * @section LICENSE
			
 
				+ * 
			
 
				+ * This file is part of BadVPN.
			
 
				+ * 
			
 
				+ * BadVPN is free software: you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ * 
			
 
				+ * BadVPN is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ * 
			
 
				+ * You should have received a copy of the GNU General Public License along
			
 
				+ * with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef BADVPN_UTF8ENCODER_H
			
 
				+#define BADVPN_UTF8ENCODER_H
			
 
				+
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+/**
			
 
				+ * Encodes a Unicode character into a sequence of bytes according to UTF-8.
			
 
				+ * 
			
 
				+ * @param ch Unicode character to encode
			
 
				+ * @param out will receive the encoded bytes. Must have space for 4 bytes.
			
 
				+ * @return number of bytes written, 0-4, with 0 meaning the character cannot
			
 
				+ *         be encoded
			
 
				+ */
			
 
				+static int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out);
			
 
				+
			
 
				+int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out)
			
 
				+{
			
 
				+    if (ch <= UINT32_C(0x007F)) {
			
 
				+        out[0] = ch;
			
 
				+        return 1;
			
 
				+    }
			
 
				+    
			
 
				+    if (ch <= UINT32_C(0x07FF)) {
			
 
				+        out[0] = (0xC0 | (ch >> 6));
			
 
				+        out[1] = (0x80 | ((ch >> 0) & 0x3F));
			
 
				+        return 2;
			
 
				+    }
			
 
				+    
			
 
				+    if (ch <= UINT32_C(0xFFFF)) {
			
 
				+        // surrogates
			
 
				+        if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
			
 
				+            return 0;
			
 
				+        }
			
 
				+        
			
 
				+        out[0] = (0xE0 | (ch >> 12));
			
 
				+        out[1] = (0x80 | ((ch >> 6) & 0x3F));
			
 
				+        out[2] = (0x80 | ((ch >> 0) & 0x3F));
			
 
				+        return 3;
			
 
				+    }
			
 
				+    
			
 
				+    if (ch < UINT32_C(0x10FFFF)) {
			
 
				+        out[0] = (0xF0 | (ch >> 18));
			
 
				+        out[1] = (0x80 | ((ch >> 12) & 0x3F));
			
 
				+        out[2] = (0x80 | ((ch >> 6) & 0x3F));
			
 
				+        out[3] = (0x80 | ((ch >> 0) & 0x3F));
			
 
				+        return 4;
			
 
				+    }
			
 
				+    
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
--- a/misc/unicode_funcs.h
+++ b/misc/unicode_funcs.h
@@ -0,0 +1,225 @@
 
				+/**
			
 
				+ * @file unicode_funcs.h
			
 
				+ * @author Ambroz Bizjak <ambrop7@gmail.com>
			
 
				+ * 
			
 
				+ * @section LICENSE
			
 
				+ * 
			
 
				+ * This file is part of BadVPN.
			
 
				+ * 
			
 
				+ * BadVPN is free software: you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License version 2
			
 
				+ * as published by the Free Software Foundation.
			
 
				+ * 
			
 
				+ * BadVPN is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ * 
			
 
				+ * You should have received a copy of the GNU General Public License along
			
 
				+ * with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+ */
			
 
				+
			
 
				+#ifndef BADVPN_UNICODE_FUNCS_H
			
 
				+#define BADVPN_UNICODE_FUNCS_H
			
 
				+
			
 
				+#include <misc/expstring.h>
			
 
				+#include <misc/bsize.h>
			
 
				+#include <misc/Utf8Encoder.h>
			
 
				+#include <misc/Utf8Decoder.h>
			
 
				+#include <misc/Utf16Encoder.h>
			
 
				+#include <misc/Utf16Decoder.h>
			
 
				+
			
 
				+/**
			
 
				+ * Decodes UTF-16 data as bytes into an allocated null-terminated UTF-8 string.
			
 
				+ * 
			
 
				+ * @param data UTF-16 data, in big endian
			
 
				+ * @param data_len size of data in bytes
			
 
				+ * @param out_is_error if not NULL and the function returns a string,
			
 
				+ *                     *out_is_error will be set to 0 or 1, indicating
			
 
				+ *                     whether there have been errors decoding the input.
			
 
				+ *                     A null decoded character is treated as an error.
			
 
				+ * @return An UTF-8 null-terminated string which can be freed with free(),
			
 
				+ *         or NULL if out of memory.
			
 
				+ */
			
 
				+static char * unicode_decode_utf16_to_utf8 (const uint8_t *data, size_t data_len, int *out_is_error);
			
 
				+
			
 
				+/**
			
 
				+ * Decodes UTF-8 data into UTF-16 data as bytes.
			
 
				+ * 
			
 
				+ * @param data UTF-8 data
			
 
				+ * @param data_len size of data in bytes
			
 
				+ * @param out output buffer
			
 
				+ * @param out_avail number of bytes available in output buffer
			
 
				+ * @param out_len if not NULL, *out_len will contain the number of bytes
			
 
				+ *                required to store the resulting data (or overflow)
			
 
				+ * @param out_is_error if not NULL, *out_is_error will contain 0 or 1,
			
 
				+ *                     indicating whether there have been errors decoding
			
 
				+ *                     the input
			
 
				+ */
			
 
				+static void unicode_decode_utf8_to_utf16 (const uint8_t *data, size_t data_len, uint8_t *out, size_t out_avail, bsize_t *out_len, int *out_is_error);
			
 
				+
			
 
				+static char * unicode_decode_utf16_to_utf8 (const uint8_t *data, size_t data_len, int *out_is_error)
			
 
				+{
			
 
				+    // will build the resulting UTF-8 string by appending to ExpString
			
 
				+    ExpString str;
			
 
				+    if (!ExpString_Init(&str)) {
			
 
				+        goto fail0;
			
 
				+    }
			
 
				+    
			
 
				+    // init UTF-16 decoder
			
 
				+    Utf16Decoder decoder;
			
 
				+    Utf16Decoder_Init(&decoder);
			
 
				+    
			
 
				+    // set initial input and input matching positions
			
 
				+    size_t i_in = 0;
			
 
				+    size_t i_ch = 0;
			
 
				+    
			
 
				+    int error = 0;
			
 
				+    
			
 
				+    while (i_in < data_len) {
			
 
				+        // read two input bytes from the input position
			
 
				+        uint8_t x = data[i_in++];
			
 
				+        if (i_in == data_len) {
			
 
				+            break;
			
 
				+        }
			
 
				+        uint8_t y = data[i_in++];
			
 
				+        
			
 
				+        // combine them into a 16-bit value
			
 
				+        uint16_t xy = (((uint16_t)x << 8) | (uint16_t)y);
			
 
				+        
			
 
				+        // give the 16-bit value to the UTF-16 decoder and maybe
			
 
				+        // receive a Unicode character back
			
 
				+        uint32_t ch;
			
 
				+        if (!Utf16Decoder_Input(&decoder, xy, &ch)) {
			
 
				+            continue;
			
 
				+        }
			
 
				+        
			
 
				+        if (!error) {
			
 
				+            // encode the Unicode character back into UTF-16
			
 
				+            uint16_t chenc[2];
			
 
				+            int chenc_n = Utf16Encoder_EncodeCharacter(ch, chenc);
			
 
				+            ASSERT(chenc_n > 0)
			
 
				+            
			
 
				+            // match the result with input
			
 
				+            for (int chenc_i = 0; chenc_i < chenc_n; chenc_i++) {
			
 
				+                uint8_t cx = (chenc[chenc_i] >> 8);
			
 
				+                uint8_t cy = (chenc[chenc_i] & 0xFF);
			
 
				+                
			
 
				+                if (i_ch >= data_len || data[i_ch] != cx) {
			
 
				+                    error = 1;
			
 
				+                    break;
			
 
				+                }
			
 
				+                i_ch++;
			
 
				+                
			
 
				+                if (i_ch >= data_len || data[i_ch] != cy) {
			
 
				+                    error = 1;
			
 
				+                    break;
			
 
				+                }
			
 
				+                i_ch++;
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        // we don't like null Unicode characters because we're building a
			
 
				+        // null-terminated UTF-8 string
			
 
				+        if (ch == 0) {
			
 
				+            error = 1;
			
 
				+            continue;
			
 
				+        }
			
 
				+        
			
 
				+        // encode the Unicode character into UTF-8
			
 
				+        uint8_t enc[5];
			
 
				+        int enc_n = Utf8Encoder_EncodeCharacter(ch, enc);
			
 
				+        ASSERT(enc_n > 0)
			
 
				+        
			
 
				+        // append the resulting UTF-8 bytes to the result string
			
 
				+        enc[enc_n] = 0;
			
 
				+        if (!ExpString_Append(&str, enc)) {
			
 
				+            goto fail1;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    // check if we matched the whole input string when encoding back
			
 
				+    if (i_ch < data_len) {
			
 
				+        error = 1;
			
 
				+    }
			
 
				+    
			
 
				+    if (out_is_error) {
			
 
				+        *out_is_error = error;
			
 
				+    }
			
 
				+    return ExpString_Get(&str);
			
 
				+    
			
 
				+fail1:
			
 
				+    ExpString_Free(&str);
			
 
				+fail0:
			
 
				+    return NULL;
			
 
				+}
			
 
				+
			
 
				+static void unicode_decode_utf8_to_utf16 (const uint8_t *data, size_t data_len, uint8_t *out, size_t out_avail, bsize_t *out_len, int *out_is_error)
			
 
				+{
			
 
				+    Utf8Decoder decoder;
			
 
				+    Utf8Decoder_Init(&decoder);
			
 
				+    
			
 
				+    size_t i_in = 0;
			
 
				+    size_t i_ch = 0;
			
 
				+    
			
 
				+    bsize_t len = bsize_fromsize(0);
			
 
				+    
			
 
				+    int error = 0;
			
 
				+    
			
 
				+    while (i_in < data_len) {
			
 
				+        uint8_t x = data[i_in++];
			
 
				+        
			
 
				+        uint32_t ch;
			
 
				+        if (!Utf8Decoder_Input(&decoder, x, &ch)) {
			
 
				+            continue;
			
 
				+        }
			
 
				+        
			
 
				+        if (!error) {
			
 
				+            uint8_t chenc[4];
			
 
				+            int chenc_n = Utf8Encoder_EncodeCharacter(ch, chenc);
			
 
				+            ASSERT(chenc_n > 0)
			
 
				+            
			
 
				+            for (int chenc_i = 0; chenc_i < chenc_n; chenc_i++) {
			
 
				+                if (i_ch >= data_len || data[i_ch] != chenc[chenc_i]) {
			
 
				+                    error = 1;
			
 
				+                    break;
			
 
				+                }
			
 
				+                i_ch++;
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        uint16_t enc[2];
			
 
				+        int enc_n = Utf16Encoder_EncodeCharacter(ch, enc);
			
 
				+        ASSERT(enc_n > 0)
			
 
				+        
			
 
				+        len = bsize_add(len, bsize_fromsize(2 * enc_n));
			
 
				+        
			
 
				+        for (int enc_i = 0; enc_i < enc_n; enc_i++) {
			
 
				+            if (out_avail == 0) {
			
 
				+                break;
			
 
				+            }
			
 
				+            *(out++) = (enc[enc_i] >> 8);
			
 
				+            out_avail--;
			
 
				+            
			
 
				+            if (out_avail == 0) {
			
 
				+                break;
			
 
				+            }
			
 
				+            *(out++) = (enc[enc_i] & 0xFF);
			
 
				+            out_avail--;
			
 
				+        }
			
 
				+    }
			
 
				+    
			
 
				+    if (i_ch < data_len) {
			
 
				+        error = 1;
			
 
				+    }
			
 
				+    
			
 
				+    if (out_len) {
			
 
				+        *out_len = len;
			
 
				+    }
			
 
				+    if (out_is_error) {
			
 
				+        *out_is_error = error;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+#endif