2018-04-25 19:30:49 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
2018-04-25 15:57:31 +02:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include "utf8.h"
|
|
|
|
|
2018-04-25 16:13:34 +02:00
|
|
|
unsigned long utf8_unpack_char(const char *src)
|
2018-04-25 15:57:31 +02:00
|
|
|
{
|
|
|
|
unsigned long c = 0;
|
|
|
|
|
|
|
|
if ((src[0] & 0x80) == 0x00) {
|
|
|
|
c = ((src[0] & 0x7f) << 0);
|
|
|
|
} else if ((src[0] & 0xe0) == 0xc0) {
|
|
|
|
c = ((src[0] & 0x1f) << 6);
|
|
|
|
c |= ((src[1] & 0x3f) << 0);
|
|
|
|
} else if ((src[0] & 0xf0) == 0xe0) {
|
|
|
|
c = ((src[0] & 0x0f) << 12);
|
|
|
|
c |= ((src[1] & 0x3f) << 6);
|
|
|
|
c |= ((src[2] & 0x3f) << 0);
|
|
|
|
} else if ((src[0] & 0xf8) == 0xf0) {
|
|
|
|
c = ((src[0] & 0x07) << 18);
|
|
|
|
c |= ((src[1] & 0x3f) << 12);
|
|
|
|
c |= ((src[2] & 0x3f) << 6);
|
|
|
|
c |= ((src[3] & 0x3f) << 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
void utf8_pack_char(char *dest, unsigned long c)
|
|
|
|
{
|
|
|
|
if (c <= 0x00007f) {
|
|
|
|
dest[0] = c;
|
|
|
|
} else if (c <= 0x0007ff) {
|
|
|
|
dest[0] = (0xc0 | ((c >> 6) & 0xff));
|
|
|
|
dest[1] = (0x80 | ((c >> 0) & 0x3f));
|
|
|
|
} else if (c <= 0x00ffff) {
|
|
|
|
dest[0] = (0xe0 | ((c >> 12) & 0xff));
|
|
|
|
dest[1] = (0x80 | ((c >> 6) & 0x3f));
|
|
|
|
dest[2] = (0x80 | ((c >> 0) & 0x3f));
|
|
|
|
} else if (c <= 0x10ffff) {
|
|
|
|
dest[0] = (0xf0 | ((c >> 18) & 0xff));
|
|
|
|
dest[1] = (0x80 | ((c >> 12) & 0x3f));
|
|
|
|
dest[2] = (0x80 | ((c >> 6) & 0x3f));
|
|
|
|
dest[3] = (0x80 | ((c >> 0) & 0x3f));
|
|
|
|
} else {
|
|
|
|
dest[0] = '?'; // should not happen
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned int utf8_char_length(unsigned long c)
|
|
|
|
{
|
|
|
|
if (c <= 0x00007f) {
|
|
|
|
return 1;
|
|
|
|
} else if (c <= 0x0007ff) {
|
|
|
|
return 2;
|
|
|
|
} else if (c <= 0x00ffff) {
|
|
|
|
return 3;
|
|
|
|
} else if (c <= 0x10ffff) {
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0; // should not happen
|
|
|
|
}
|
|
|
|
|
|
|
|
char *utf8_code_to_string(unsigned long c)
|
|
|
|
{
|
|
|
|
unsigned int length = utf8_char_length(c) + 1;
|
|
|
|
char *buffer;
|
|
|
|
|
|
|
|
buffer = malloc(length);
|
|
|
|
if (buffer == NULL)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
utf8_pack_char(buffer, c);
|
|
|
|
buffer[length] = 0;
|
|
|
|
|
|
|
|
return buffer;
|
|
|
|
}
|