test the utf8.c unit

This commit is contained in:
Vlasta Vesely 2021-02-25 10:15:08 +01:00
parent 1daa2632fb
commit c5d8d17599
5 changed files with 63 additions and 20 deletions

View file

@ -20,8 +20,11 @@ LFLAGS =
TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@ TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@
TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@ TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@
OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o velthuis.o OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o \
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o tests/velthuis.o velthuis.o
TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \
tests/velthuis.o tests/utf8.o
AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \ AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \
*.status *.cache *.status *.cache

View file

@ -2,6 +2,7 @@
#include "translit.h" #include "translit.h"
#include "transcript.h" #include "transcript.h"
#include "velthuis.h" #include "velthuis.h"
#include "utf8.h"
static Suite *create_test_suite() static Suite *create_test_suite()
{ {
@ -14,6 +15,8 @@ static Suite *create_test_suite()
register_translit_tests(test_case); register_translit_tests(test_case);
register_transcript_tests(test_case); register_transcript_tests(test_case);
register_velthuis_encoder_tests(test_case); register_velthuis_encoder_tests(test_case);
register_utf8_tests(test_case);
suite_add_tcase(suite, test_case); suite_add_tcase(suite, test_case);
return suite; return suite;

35
tests/utf8.c Normal file
View file

@ -0,0 +1,35 @@
#include "test.h"
#include "utf8.h"
#include "../utf8.h"
START_TEST(test_utf8)
{
char buf[10] = {};
unsigned int c;
c = utf8_unpack_char("\u0024");
utf8_pack_char(buf, c);
ck_assert_int_eq(1, utf8_char_length(c));
ck_assert_str_eq("\u0024", buf);
c = utf8_unpack_char("\u00a2");
utf8_pack_char(buf, c);
ck_assert_int_eq(2, utf8_char_length(c));
ck_assert_str_eq("\u00a2", buf);
c = utf8_unpack_char("\u0939");
utf8_pack_char(buf, c);
ck_assert_int_eq(3, utf8_char_length(c));
ck_assert_str_eq("\u0939", buf);
c = utf8_unpack_char("\U00010348");
utf8_pack_char(buf, c);
ck_assert_int_eq(4, utf8_char_length(c));
ck_assert_str_eq("\U00010348", buf);
}
END_TEST
void register_utf8_tests(TCase *test_case)
{
tcase_add_test(test_case, test_utf8);
}

8
tests/utf8.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef __TEST_UTF8_H
#define __TEST_UTF8_H
#include <check.h>
void register_utf8_tests(TCase *test_case);
#endif /* __TEST_UTF8_H */

30
utf8.c
View file

@ -1,6 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#include <stdlib.h>
#include "utf8.h" #include "utf8.h"
unsigned int utf8_unpack_char(const char *src) unsigned int utf8_unpack_char(const char *src)
@ -9,13 +8,16 @@ unsigned int utf8_unpack_char(const char *src)
if ((src[0] & 0x80) == 0x00) { if ((src[0] & 0x80) == 0x00) {
c = ((src[0] & 0x7f) << 0); c = ((src[0] & 0x7f) << 0);
} else if ((src[0] & 0xe0) == 0xc0) { } else if ((src[0] & 0xe0) == 0xc0) {
c = ((src[0] & 0x1f) << 6); c = ((src[0] & 0x1f) << 6);
c |= ((src[1] & 0x3f) << 0); c |= ((src[1] & 0x3f) << 0);
} else if ((src[0] & 0xf0) == 0xe0) { } else if ((src[0] & 0xf0) == 0xe0) {
c = ((src[0] & 0x0f) << 12); c = ((src[0] & 0x0f) << 12);
c |= ((src[1] & 0x3f) << 6); c |= ((src[1] & 0x3f) << 6);
c |= ((src[2] & 0x3f) << 0); c |= ((src[2] & 0x3f) << 0);
} else if ((src[0] & 0xf8) == 0xf0) { } else if ((src[0] & 0xf8) == 0xf0) {
c = ((src[0] & 0x07) << 18); c = ((src[0] & 0x07) << 18);
c |= ((src[1] & 0x3f) << 12); c |= ((src[1] & 0x3f) << 12);
@ -30,20 +32,24 @@ void utf8_pack_char(char *dest, unsigned int c)
{ {
if (c <= 0x00007f) { if (c <= 0x00007f) {
dest[0] = c; dest[0] = c;
} else if (c <= 0x0007ff) { } else if (c <= 0x0007ff) {
dest[0] = (0xc0 | ((c >> 6) & 0xff)); dest[0] = (0xc0 | ((c >> 6) & 0xff));
dest[1] = (0x80 | ((c >> 0) & 0x3f)); dest[1] = (0x80 | ((c >> 0) & 0x3f));
} else if (c <= 0x00ffff) { } else if (c <= 0x00ffff) {
dest[0] = (0xe0 | ((c >> 12) & 0xff)); dest[0] = (0xe0 | ((c >> 12) & 0xff));
dest[1] = (0x80 | ((c >> 6) & 0x3f)); dest[1] = (0x80 | ((c >> 6) & 0x3f));
dest[2] = (0x80 | ((c >> 0) & 0x3f)); dest[2] = (0x80 | ((c >> 0) & 0x3f));
} else if (c <= 0x10ffff) { } else if (c <= 0x10ffff) {
dest[0] = (0xf0 | ((c >> 18) & 0xff)); dest[0] = (0xf0 | ((c >> 18) & 0xff));
dest[1] = (0x80 | ((c >> 12) & 0x3f)); dest[1] = (0x80 | ((c >> 12) & 0x3f));
dest[2] = (0x80 | ((c >> 6) & 0x3f)); dest[2] = (0x80 | ((c >> 6) & 0x3f));
dest[3] = (0x80 | ((c >> 0) & 0x3f)); dest[3] = (0x80 | ((c >> 0) & 0x3f));
} else { } else {
dest[0] = '?'; // should not happen dest[0] = '?'; /* should not happen */
} }
} }
@ -51,28 +57,16 @@ unsigned int utf8_char_length(unsigned int c)
{ {
if (c <= 0x00007f) { if (c <= 0x00007f) {
return 1; return 1;
} else if (c <= 0x0007ff) { } else if (c <= 0x0007ff) {
return 2; return 2;
} else if (c <= 0x00ffff) { } else if (c <= 0x00ffff) {
return 3; return 3;
} else if (c <= 0x10ffff) { } else if (c <= 0x10ffff) {
return 4; return 4;
} }
return 0; // should not happen return 0; /* should not happen */
}
char *utf8_code_to_string(unsigned int c)
{
unsigned int length = utf8_char_length(c) + 1;
char *buffer;
buffer = malloc(length);
if (buffer == NULL)
return NULL;
utf8_pack_char(buffer, c);
buffer[length] = 0;
return buffer;
} }