Fix wchar_from_utf8 to cope with surrogate pairs.
authorRobin Watts <[email protected]>
Fri, 14 Jun 2024 15:11:38 +0000 (16:11 +0100)
committerRobin Watts <[email protected]>
Tue, 23 Jul 2024 19:02:01 +0000 (20:02 +0100)
It would be nice to use fz_wchar_from_utf8 instead of this,
but we don't have an fz_context available in the circumstances
where this is called. So live with the code duplication.

source/fitz/time.c

index bb8257c95c95a27d0a3b6bfdd7fc8849842e8da7..d52d266f1168b9593d00f9291550773e2aaa3b4d 100644 (file)
@@ -95,6 +95,8 @@ wchar_from_utf8(const char *s)
 {
        wchar_t *d, *r;
        int c;
+       /* This allocation is larger than we need, but it's guaranteed
+        * to be safe. */
        r = d = malloc((strlen(s) + 1) * sizeof(wchar_t));
        if (!r)
                return NULL;
@@ -103,7 +105,11 @@ wchar_from_utf8(const char *s)
                /* Truncating c to a wchar_t can be problematic if c
                 * is 0x10000. */
                if (c >= 0x10000)
-                       c = FZ_REPLACEMENT_CHARACTER;
+               {
+                       c -= 0x10000;
+                       *d++ = 0xd800 + (c>>10);
+                       c = 0xdc00 + (c&1023);
+               }
                *d++ = c;
        }
        *d = 0;