From c5d8d1759972c3ff3c22a2dc9bb827b3eb892520 Mon Sep 17 00:00:00 2001 From: Vlasta Vesely Date: Thu, 25 Feb 2021 10:15:08 +0100 Subject: [PATCH] test the utf8.c unit --- Makefile.in | 7 +++++-- tests/main.c | 3 +++ tests/utf8.c | 35 +++++++++++++++++++++++++++++++++++ tests/utf8.h | 8 ++++++++ utf8.c | 30 ++++++++++++------------------ 5 files changed, 63 insertions(+), 20 deletions(-) create mode 100644 tests/utf8.c create mode 100644 tests/utf8.h diff --git a/Makefile.in b/Makefile.in index 02e165b..206cce2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -20,8 +20,11 @@ LFLAGS = TEST_CFLAGS = @CFLAGS@ @CHECK_CFLAGS@ TEST_LFLAGS = @CHECK_LIBS@ @COVERAGE_LFLAGS@ -OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o velthuis.o -TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o tests/velthuis.o +OBJECTS = iast.o iast-czech.o transliteration.o transcription.o utf8.o \ + velthuis.o + +TEST_OBJECTS = tests/main.o tests/translit.o tests/transcript.o \ + tests/velthuis.o tests/utf8.o AUX_FILES = Makefile configure aclocal.m4 install-sh config.h* *.log \ *.status *.cache diff --git a/tests/main.c b/tests/main.c index d1fe0d8..3684828 100644 --- a/tests/main.c +++ b/tests/main.c @@ -2,6 +2,7 @@ #include "translit.h" #include "transcript.h" #include "velthuis.h" +#include "utf8.h" static Suite *create_test_suite() { @@ -14,6 +15,8 @@ static Suite *create_test_suite() register_translit_tests(test_case); register_transcript_tests(test_case); register_velthuis_encoder_tests(test_case); + register_utf8_tests(test_case); + suite_add_tcase(suite, test_case); return suite; diff --git a/tests/utf8.c b/tests/utf8.c new file mode 100644 index 0000000..40e0398 --- /dev/null +++ b/tests/utf8.c @@ -0,0 +1,35 @@ +#include "test.h" +#include "utf8.h" +#include "../utf8.h" + +START_TEST(test_utf8) +{ + char buf[10] = {}; + unsigned int c; + + c = utf8_unpack_char("\u0024"); + utf8_pack_char(buf, c); + ck_assert_int_eq(1, utf8_char_length(c)); + ck_assert_str_eq("\u0024", buf); + + c = utf8_unpack_char("\u00a2"); + utf8_pack_char(buf, c); + ck_assert_int_eq(2, utf8_char_length(c)); + ck_assert_str_eq("\u00a2", buf); + + c = utf8_unpack_char("\u0939"); + utf8_pack_char(buf, c); + ck_assert_int_eq(3, utf8_char_length(c)); + ck_assert_str_eq("\u0939", buf); + + c = utf8_unpack_char("\U00010348"); + utf8_pack_char(buf, c); + ck_assert_int_eq(4, utf8_char_length(c)); + ck_assert_str_eq("\U00010348", buf); +} +END_TEST + +void register_utf8_tests(TCase *test_case) +{ + tcase_add_test(test_case, test_utf8); +} diff --git a/tests/utf8.h b/tests/utf8.h new file mode 100644 index 0000000..4675a5d --- /dev/null +++ b/tests/utf8.h @@ -0,0 +1,8 @@ +#ifndef __TEST_UTF8_H +#define __TEST_UTF8_H + +#include + +void register_utf8_tests(TCase *test_case); + +#endif /* __TEST_UTF8_H */ diff --git a/utf8.c b/utf8.c index 394f086..4af262d 100644 --- a/utf8.c +++ b/utf8.c @@ -1,6 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include #include "utf8.h" unsigned int utf8_unpack_char(const char *src) @@ -9,13 +8,16 @@ unsigned int utf8_unpack_char(const char *src) if ((src[0] & 0x80) == 0x00) { c = ((src[0] & 0x7f) << 0); + } else if ((src[0] & 0xe0) == 0xc0) { c = ((src[0] & 0x1f) << 6); c |= ((src[1] & 0x3f) << 0); + } else if ((src[0] & 0xf0) == 0xe0) { c = ((src[0] & 0x0f) << 12); c |= ((src[1] & 0x3f) << 6); c |= ((src[2] & 0x3f) << 0); + } else if ((src[0] & 0xf8) == 0xf0) { c = ((src[0] & 0x07) << 18); c |= ((src[1] & 0x3f) << 12); @@ -30,20 +32,24 @@ void utf8_pack_char(char *dest, unsigned int c) { if (c <= 0x00007f) { dest[0] = c; + } else if (c <= 0x0007ff) { dest[0] = (0xc0 | ((c >> 6) & 0xff)); dest[1] = (0x80 | ((c >> 0) & 0x3f)); + } else if (c <= 0x00ffff) { dest[0] = (0xe0 | ((c >> 12) & 0xff)); dest[1] = (0x80 | ((c >> 6) & 0x3f)); dest[2] = (0x80 | ((c >> 0) & 0x3f)); + } else if (c <= 0x10ffff) { dest[0] = (0xf0 | ((c >> 18) & 0xff)); dest[1] = (0x80 | ((c >> 12) & 0x3f)); dest[2] = (0x80 | ((c >> 6) & 0x3f)); dest[3] = (0x80 | ((c >> 0) & 0x3f)); + } else { - dest[0] = '?'; // should not happen + dest[0] = '?'; /* should not happen */ } } @@ -51,28 +57,16 @@ unsigned int utf8_char_length(unsigned int c) { if (c <= 0x00007f) { return 1; + } else if (c <= 0x0007ff) { return 2; + } else if (c <= 0x00ffff) { return 3; + } else if (c <= 0x10ffff) { return 4; } - return 0; // should not happen -} - -char *utf8_code_to_string(unsigned int c) -{ - unsigned int length = utf8_char_length(c) + 1; - char *buffer; - - buffer = malloc(length); - if (buffer == NULL) - return NULL; - - utf8_pack_char(buffer, c); - buffer[length] = 0; - - return buffer; + return 0; /* should not happen */ }