-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] Implemented wcstrombs internal and public function #145794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
uzairnawaz
commented
Jun 25, 2025
- added internal wcsrtombs function
- began implementing public function
- build files
- fixed edge case with null dest; added tests
- refactored internal wcrtomb to work with wcsrtombs
- add test for invalid state
- added death test
@llvm/pr-subscribers-libc Author: Uzair Nawaz (uzairnawaz) Changes
Full diff: https://github.com/llvm/llvm-project/pull/145794.diff 14 Files Affected:
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..7985fe559bf7b 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1252,6 +1252,7 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
+ libc.src.wchar.wcsrtombs
)
endif()
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..db2f8540be7a7 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -175,6 +175,15 @@ functions:
- type: char *__restrict
- type: wchar_t
- type: mbstate_t *__restrict
+ - name: wcsrtombs
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: wchar_t **__restrict
+ - type: size_t
+ - type: mbstate_t *__restrict
- name: wctomb
standards:
- stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 86a47319f278a..22c97e8a0facd 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -37,6 +37,24 @@ add_object_library(
.mbstate
)
+add_object_library(
+ wcsrtombs
+ HDRS
+ wcsrtombs.h
+ SRCS
+ wcsrtombs.cpp
+ DEPENDS
+ libc.hdr.types.char32_t
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.error_or
+ libc.src.__support.common
+ libc.src.__support.libc_assert
+ .character_converter
+ .mbstate
+ .wcrtomb
+)
+
add_object_library(
mbrtowc
HDRS
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index a74a6f3ec34a6..38768fc52fc81 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -21,24 +21,28 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
- mbstate *__restrict ps) {
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps,
+ size_t max_written) {
static_assert(sizeof(wchar_t) == 4);
CharacterConverter cr(ps);
-
+
if (!cr.isValidState())
return Error(EINVAL);
-
+
+ char buf[sizeof(wchar_t) / sizeof(char)];
if (s == nullptr)
- return Error(EILSEQ);
-
- int status = cr.push(static_cast<char32_t>(wc));
- if (status != 0)
- return Error(EILSEQ);
-
+ s = buf;
+
+ // if cr isnt empty, it should be represented in mbstate already
+ if (cr.isEmpty()) {
+ int status = cr.push(static_cast<char32_t>(wc));
+ if (status != 0)
+ return Error(EILSEQ);
+ }
+
size_t count = 0;
- while (!cr.isEmpty()) {
+ while (!cr.isEmpty() && count < max_written) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
LIBC_ASSERT(utf8.has_value());
@@ -46,6 +50,10 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
s++;
count++;
}
+
+ if (!cr.isEmpty()) // didn't complete the conversion
+ return -1;
+
return count;
}
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index bcd39a92a3b76..a2658ca581da9 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -18,7 +18,7 @@
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps, size_t max_written);
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsrtombs.cpp b/libc/src/__support/wchar/wcsrtombs.cpp
new file mode 100644
index 0000000000000..c2bd3b632f8de
--- /dev/null
+++ b/libc/src/__support/wchar/wcsrtombs.cpp
@@ -0,0 +1,58 @@
+//===-- Implementation of wcsrtombs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcsrtombs.h"
+#include "hdr/types/char32_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ size_t len, mbstate *__restrict ps) {
+ if (src == nullptr)
+ return Error(-1);
+
+ // ignore len parameter when theres no destination string
+ if (dst == nullptr)
+ len = SIZE_MAX;
+
+ size_t bytes_written = 0;
+ while (bytes_written < len) {
+ auto result =
+ internal::wcrtomb(dst == nullptr ? nullptr : dst + bytes_written, **src,
+ ps, len - bytes_written);
+ if (!result.has_value())
+ return result; // forward the error
+
+ // couldn't complete conversion
+ if (result.value() == static_cast<size_t>(-1))
+ return len;
+
+ // terminate the loop after converting the null wide character
+ if (**src == L'\0') {
+ *src = nullptr;
+ return bytes_written;
+ }
+
+ bytes_written += result.value();
+ (*src)++;
+ }
+
+ return bytes_written;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsrtombs.h b/libc/src/__support/wchar/wcsrtombs.h
new file mode 100644
index 0000000000000..af49c2bca9e2a
--- /dev/null
+++ b/libc/src/__support/wchar/wcsrtombs.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcsrtombs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ size_t len, mbstate *__restrict ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..5f6361b62a47f 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -48,6 +48,20 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)
+add_entrypoint_object(
+ wcsrtombs
+ SRCS
+ wcsrtombs.cpp
+ HDRS
+ wcsrtombs.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.wcsrtombs
+ libc.src.__support.wchar.mbstate
+)
+
add_entrypoint_object(
wctomb
SRCS
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 3e9df0599431e..4a9db213f82b0 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -23,16 +23,14 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
static internal::mbstate internal_mbstate;
// when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
- char buf[sizeof(wchar_t) / sizeof(char)];
- if (s == nullptr) {
- s = buf;
+ if (s == nullptr)
wc = L'\0';
- }
auto result = internal::wcrtomb(
s, wc,
ps == nullptr ? &internal_mbstate
- : reinterpret_cast<internal::mbstate *>(ps));
+ : reinterpret_cast<internal::mbstate *>(ps),
+ sizeof(wchar_t));
if (!result.has_value()) {
libc_errno = result.error();
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
new file mode 100644
index 0000000000000..97c8a1f61e8fc
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -0,0 +1,42 @@
+//===-- Implementation of wcsrtombs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsrtombs.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcsrtombs.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
+ (char *__restrict dst, const wchar_t **__restrict src,
+ size_t len, mbstate_t *__restrict ps)) {
+ static internal::mbstate internal_mbstate;
+
+ LIBC_CRASH_ON_NULLPTR(src);
+
+ auto result = internal::wcsrtombs(
+ dst, src, len,
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps));
+
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
+ }
+
+ return result.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h
new file mode 100644
index 0000000000000..103074cfcba63
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for wcsrtombs ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+#define LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp
index 142302e6ae09b..56e26cf98d5c6 100644
--- a/libc/src/wchar/wctomb.cpp
+++ b/libc/src/wchar/wctomb.cpp
@@ -22,7 +22,7 @@ LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) {
if (s == nullptr)
return 0;
- auto result = internal::wcrtomb(s, wc, &internal_mbstate);
+ auto result = internal::wcrtomb(s, wc, &internal_mbstate, sizeof (wchar_t));
if (!result.has_value()) { // invalid wide character
libc_errno = EILSEQ;
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..02d33dfbcdde8 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -78,6 +78,20 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)
+add_libc_test(
+ wcsrtombs_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsrtombs_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsrtombs
+ libc.src.string.memset
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+)
+
add_libc_test(
wctomb_test
SUITE
diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp
new file mode 100644
index 0000000000000..8511756b11269
--- /dev/null
+++ b/libc/test/src/wchar/wcsrtombs_test.cpp
@@ -0,0 +1,213 @@
+//===-- Unittests for wcsrtombs ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/wcsrtombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWCSRToMBSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcWCSRToMBSTest, SingleCharacterOneByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ const wchar_t *wcs = L"U";
+ char mbs[] = {0, 0};
+ size_t cnt = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(1));
+ ASSERT_EQ(mbs[0], 'U');
+ ASSERT_EQ(mbs[1], '\0');
+ ASSERT_EQ(wcs, nullptr);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, MultipleCompleteConversions) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ // utf32: 0xff -> utf8: 0xc3 0xbf
+ // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ const wchar_t *wcs = L"\xFF\xAC15";
+ const wchar_t *wcs_start = wcs;
+
+ // init with dummy value of 1 so that we can check when null byte written
+ char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+ char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+
+ size_t cnt1 = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
+ ASSERT_EQ(cnt1, static_cast<size_t>(2));
+ ASSERT_EQ(wcs, wcs_start + 1);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], '\x01'); // not modified
+
+ size_t cnt2 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1, &wcs, 3, &state);
+ ASSERT_EQ(cnt2, static_cast<size_t>(3));
+ ASSERT_EQ(wcs, wcs_start + 2);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], expected[3]);
+ ASSERT_EQ(mbs[4], expected[4]);
+ ASSERT_EQ(mbs[5], '\x01'); // null byte not yet written
+
+ // all that is left in the string is the null terminator
+ size_t cnt3 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1 + cnt2, &wcs, 50, &state);
+ ASSERT_EQ(cnt3, static_cast<size_t>(0));
+ ASSERT_EQ(wcs, nullptr);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], expected[3]);
+ ASSERT_EQ(mbs[4], expected[4]);
+ ASSERT_EQ(mbs[5], expected[5]);
+ ASSERT_EQ(mbs[6], '\x01'); // should not write beyond null terminator
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, MultiplePartialConversions) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ // utf32: 0xff -> utf8: 0xc3 0xbf
+ // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ const wchar_t *wcs = L"\xFF\xAC15";
+ const wchar_t *wcs_start = wcs;
+
+ // init with dummy value of 1 so that we can check when null byte written
+ char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+ char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+ size_t written = 0;
+ size_t count = 0;
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, &state);
+ written += count;
+ // ASSERT_EQ(count, static_cast<size_t>(1));
+ ASSERT_EQ(wcs, wcs_start);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], '\x01');
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, &state);
+ written += count;
+ ASSERT_EQ(count, static_cast<size_t>(2));
+ ASSERT_EQ(wcs, wcs_start + 1);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], '\x01');
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, &state);
+ written += count;
+ ASSERT_EQ(count, static_cast<size_t>(2));
+ ASSERT_EQ(wcs, nullptr);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], expected[3]);
+ ASSERT_EQ(mbs[4], expected[4]);
+ ASSERT_EQ(mbs[5], expected[5]);
+ ASSERT_EQ(mbs[6], '\x01');
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, NullDestination) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ // utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+ // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ const wchar_t *wcs = L"\x1F921\xAC15";
+
+ // null destination means the conversion isnt stored, but all the side effects
+ // still occur. the len parameter is also ignored
+ size_t count = LIBC_NAMESPACE::wcsrtombs(nullptr, &wcs, 3, &state);
+ ASSERT_EQ(count, static_cast<size_t>(7));
+ ASSERT_EQ(wcs, nullptr);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, NullState) {
+ // same as MultiplePartialConversions test except without an explicit
+ // mbstate_t
+
+ const wchar_t *wcs = L"\xFF\xAC15";
+ const wchar_t *wcs_start = wcs;
+
+ // init with dummy value of 1 so that we can check when null byte written
+ char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+ char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+ size_t written = 0;
+ size_t count = 0;
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, nullptr);
+ written += count;
+ ASSERT_EQ(count, static_cast<size_t>(1));
+ ASSERT_EQ(wcs, wcs_start);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], '\x01');
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, nullptr);
+ written += count;
+ ASSERT_EQ(count, static_cast<size_t>(2));
+ ASSERT_EQ(wcs, wcs_start + 1);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], '\x01');
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, nullptr);
+ written += count;
+ ASSERT_EQ(count, static_cast<size_t>(2));
+ ASSERT_EQ(wcs, nullptr);
+ ASSERT_EQ(mbs[0], expected[0]);
+ ASSERT_EQ(mbs[1], expected[1]);
+ ASSERT_EQ(mbs[2], expected[2]);
+ ASSERT_EQ(mbs[3], expected[3]);
+ ASSERT_EQ(mbs[4], expected[4]);
+ ASSERT_EQ(mbs[5], expected[5]);
+ ASSERT_EQ(mbs[6], '\x01');
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, InvalidWchar) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ const wchar_t *wcs = L"\xFF\xAC15\x12FFFF";
+ char mbs[15];
+ // convert the valid wchar
+ size_t count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 5, &state);
+ ASSERT_EQ(count, static_cast<size_t>(5));
+ ASSERT_TRUE(*wcs == static_cast<wchar_t>(0x12ffff));
+ ASSERT_ERRNO_SUCCESS();
+
+ count = LIBC_NAMESPACE::wcsrtombs(mbs + count, &wcs, 5, &state); // invalid
+ ASSERT_EQ(count, static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, InvalidState) {
+ LIBC_NAMESPACE::internal::mbstate state{0, 0, 9}; // 9 total bytes is invalid
+ const wchar_t *wcs = L"\xFF\xAC15";
+ char mbs[5];
+ // convert the valid wchar
+ size_t count = LIBC_NAMESPACE::wcsrtombs(
+ mbs, &wcs, 5, reinterpret_cast<mbstate_t *>(&state));
+ ASSERT_EQ(count, static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EINVAL);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
+TEST_F(LlvmLibcWCSRToMBSTest, NullSrc) {
+ // Passing in a nullptr should crash the program.
+ char mbs[] = {0, 0};
+ EXPECT_DEATH(
+ [&mbs] {
+ LIBC_NAMESPACE::wcsrtombs(mbs, nullptr, 2, nullptr);
+ },
+ WITH_SIGNAL(-1));
+}
+#endif // LIBC_HAS_ADDRESS_SANITIZER
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This feels like two patches, one refactoring wcrtomb and one adding wcsrtombs. Splitting them would make this easier to review
ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps, | ||
size_t max_written) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're calling this function wcrtomb
it should match that interface. If you want to have a different interface, then that's fine but it should have a different name to avoid confusion.
ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src, | ||
size_t len, mbstate *__restrict ps) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is wcsnrtombs
, not wcsrtombs