test the utf8.c unit
This commit is contained in:
parent
1daa2632fb
commit
c5d8d17599
5 changed files with 63 additions and 20 deletions
|
@ -20,8 +20,11 @@ LFLAGS =
|
|||
TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@
|
||||
TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@
|
||||
|
||||
OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o velthuis.o
|
||||
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o tests/velthuis.o
|
||||
OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o \
|
||||
velthuis.o
|
||||
|
||||
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \
|
||||
tests/velthuis.o tests/utf8.o
|
||||
|
||||
AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \
|
||||
*.status *.cache
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "translit.h"
|
||||
#include "transcript.h"
|
||||
#include "velthuis.h"
|
||||
#include "utf8.h"
|
||||
|
||||
static Suite *create_test_suite()
|
||||
{
|
||||
|
@ -14,6 +15,8 @@ static Suite *create_test_suite()
|
|||
register_translit_tests(test_case);
|
||||
register_transcript_tests(test_case);
|
||||
register_velthuis_encoder_tests(test_case);
|
||||
register_utf8_tests(test_case);
|
||||
|
||||
suite_add_tcase(suite, test_case);
|
||||
|
||||
return suite;
|
||||
|
|
35
tests/utf8.c
Normal file
35
tests/utf8.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
#include "test.h"
|
||||
#include "utf8.h"
|
||||
#include "../utf8.h"
|
||||
|
||||
START_TEST(test_utf8)
|
||||
{
|
||||
char buf[10] = {};
|
||||
unsigned int c;
|
||||
|
||||
c = utf8_unpack_char("\u0024");
|
||||
utf8_pack_char(buf, c);
|
||||
ck_assert_int_eq(1, utf8_char_length(c));
|
||||
ck_assert_str_eq("\u0024", buf);
|
||||
|
||||
c = utf8_unpack_char("\u00a2");
|
||||
utf8_pack_char(buf, c);
|
||||
ck_assert_int_eq(2, utf8_char_length(c));
|
||||
ck_assert_str_eq("\u00a2", buf);
|
||||
|
||||
c = utf8_unpack_char("\u0939");
|
||||
utf8_pack_char(buf, c);
|
||||
ck_assert_int_eq(3, utf8_char_length(c));
|
||||
ck_assert_str_eq("\u0939", buf);
|
||||
|
||||
c = utf8_unpack_char("\U00010348");
|
||||
utf8_pack_char(buf, c);
|
||||
ck_assert_int_eq(4, utf8_char_length(c));
|
||||
ck_assert_str_eq("\U00010348", buf);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
void register_utf8_tests(TCase *test_case)
|
||||
{
|
||||
tcase_add_test(test_case, test_utf8);
|
||||
}
|
8
tests/utf8.h
Normal file
8
tests/utf8.h
Normal file
|
@ -0,0 +1,8 @@
|
|||
#ifndef __TEST_UTF8_H
|
||||
#define __TEST_UTF8_H
|
||||
|
||||
#include <check.h>
|
||||
|
||||
void register_utf8_tests(TCase *test_case);
|
||||
|
||||
#endif /* __TEST_UTF8_H */
|
30
utf8.c
30
utf8.c
|
@ -1,6 +1,5 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "utf8.h"
|
||||
|
||||
unsigned int utf8_unpack_char(const char *src)
|
||||
|
@ -9,13 +8,16 @@ unsigned int utf8_unpack_char(const char *src)
|
|||
|
||||
if ((src[0] & 0x80) == 0x00) {
|
||||
c = ((src[0] & 0x7f) << 0);
|
||||
|
||||
} else if ((src[0] & 0xe0) == 0xc0) {
|
||||
c = ((src[0] & 0x1f) << 6);
|
||||
c |= ((src[1] & 0x3f) << 0);
|
||||
|
||||
} else if ((src[0] & 0xf0) == 0xe0) {
|
||||
c = ((src[0] & 0x0f) << 12);
|
||||
c |= ((src[1] & 0x3f) << 6);
|
||||
c |= ((src[2] & 0x3f) << 0);
|
||||
|
||||
} else if ((src[0] & 0xf8) == 0xf0) {
|
||||
c = ((src[0] & 0x07) << 18);
|
||||
c |= ((src[1] & 0x3f) << 12);
|
||||
|
@ -30,20 +32,24 @@ void utf8_pack_char(char *dest, unsigned int c)
|
|||
{
|
||||
if (c <= 0x00007f) {
|
||||
dest[0] = c;
|
||||
|
||||
} else if (c <= 0x0007ff) {
|
||||
dest[0] = (0xc0 | ((c >> 6) & 0xff));
|
||||
dest[1] = (0x80 | ((c >> 0) & 0x3f));
|
||||
|
||||
} else if (c <= 0x00ffff) {
|
||||
dest[0] = (0xe0 | ((c >> 12) & 0xff));
|
||||
dest[1] = (0x80 | ((c >> 6) & 0x3f));
|
||||
dest[2] = (0x80 | ((c >> 0) & 0x3f));
|
||||
|
||||
} else if (c <= 0x10ffff) {
|
||||
dest[0] = (0xf0 | ((c >> 18) & 0xff));
|
||||
dest[1] = (0x80 | ((c >> 12) & 0x3f));
|
||||
dest[2] = (0x80 | ((c >> 6) & 0x3f));
|
||||
dest[3] = (0x80 | ((c >> 0) & 0x3f));
|
||||
|
||||
} else {
|
||||
dest[0] = '?'; // should not happen
|
||||
dest[0] = '?'; /* should not happen */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,28 +57,16 @@ unsigned int utf8_char_length(unsigned int c)
|
|||
{
|
||||
if (c <= 0x00007f) {
|
||||
return 1;
|
||||
|
||||
} else if (c <= 0x0007ff) {
|
||||
return 2;
|
||||
|
||||
} else if (c <= 0x00ffff) {
|
||||
return 3;
|
||||
|
||||
} else if (c <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 0; // should not happen
|
||||
}
|
||||
|
||||
char *utf8_code_to_string(unsigned int c)
|
||||
{
|
||||
unsigned int length = utf8_char_length(c) + 1;
|
||||
char *buffer;
|
||||
|
||||
buffer = malloc(length);
|
||||
if (buffer == NULL)
|
||||
return NULL;
|
||||
|
||||
utf8_pack_char(buffer, c);
|
||||
buffer[length] = 0;
|
||||
|
||||
return buffer;
|
||||
return 0; /* should not happen */
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue