Skip to content

[libc] Implemented wcstrombs internal and public function #145794

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from

Conversation

uzairnawaz
Copy link
Contributor

  • added internal wcsrtombs function
  • began implementing public function
  • build files
  • fixed edge case with null dest; added tests
  • refactored internal wcrtomb to work with wcsrtombs
  • add test for invalid state
  • added death test

@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-libc

Author: Uzair Nawaz (uzairnawaz)

Changes
  • added internal wcsrtombs function
  • began implementing public function
  • build files
  • fixed edge case with null dest; added tests
  • refactored internal wcrtomb to work with wcsrtombs
  • add test for invalid state
  • added death test

Full diff: https://github.com/llvm/llvm-project/pull/145794.diff

14 Files Affected:

  • (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
  • (modified) libc/include/wchar.yaml (+9)
  • (modified) libc/src/__support/wchar/CMakeLists.txt (+18)
  • (modified) libc/src/__support/wchar/wcrtomb.cpp (+19-11)
  • (modified) libc/src/__support/wchar/wcrtomb.h (+1-1)
  • (added) libc/src/__support/wchar/wcsrtombs.cpp (+58)
  • (added) libc/src/__support/wchar/wcsrtombs.h (+27)
  • (modified) libc/src/wchar/CMakeLists.txt (+14)
  • (modified) libc/src/wchar/wcrtomb.cpp (+3-5)
  • (added) libc/src/wchar/wcsrtombs.cpp (+42)
  • (added) libc/src/wchar/wcsrtombs.h (+24)
  • (modified) libc/src/wchar/wctomb.cpp (+1-1)
  • (modified) libc/test/src/wchar/CMakeLists.txt (+14)
  • (added) libc/test/src/wchar/wcsrtombs_test.cpp (+213)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..7985fe559bf7b 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1252,6 +1252,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.wchar.mbtowc
     libc.src.wchar.wcrtomb
     libc.src.wchar.wctomb
+    libc.src.wchar.wcsrtombs
   )
 endif()
 
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..db2f8540be7a7 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -175,6 +175,15 @@ functions:
       - type: char *__restrict
       - type: wchar_t
       - type: mbstate_t *__restrict
+  - name: wcsrtombs
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: char *__restrict
+      - type: wchar_t **__restrict
+      - type: size_t
+      - type: mbstate_t *__restrict
   - name: wctomb
     standards:
       - stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 86a47319f278a..22c97e8a0facd 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -37,6 +37,24 @@ add_object_library(
     .mbstate
 )
 
+add_object_library(
+  wcsrtombs
+  HDRS
+    wcsrtombs.h
+  SRCS 
+    wcsrtombs.cpp
+  DEPENDS
+    libc.hdr.types.char32_t
+    libc.hdr.types.size_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.error_or
+    libc.src.__support.common
+    libc.src.__support.libc_assert
+    .character_converter
+    .mbstate
+    .wcrtomb
+)
+
 add_object_library(
   mbrtowc
   HDRS
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index a74a6f3ec34a6..38768fc52fc81 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -21,24 +21,28 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
-                        mbstate *__restrict ps) {
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps,
+                        size_t max_written) {
   static_assert(sizeof(wchar_t) == 4);
 
   CharacterConverter cr(ps);
-
+  
   if (!cr.isValidState())
     return Error(EINVAL);
-
+  
+  char buf[sizeof(wchar_t) / sizeof(char)];
   if (s == nullptr)
-    return Error(EILSEQ);
-
-  int status = cr.push(static_cast<char32_t>(wc));
-  if (status != 0)
-    return Error(EILSEQ);
-
+    s = buf;
+  
+  // if cr isnt empty, it should be represented in mbstate already
+  if (cr.isEmpty()) {
+    int status = cr.push(static_cast<char32_t>(wc));
+    if (status != 0)
+      return Error(EILSEQ);
+  }
+  
   size_t count = 0;
-  while (!cr.isEmpty()) {
+  while (!cr.isEmpty() && count < max_written) {
     auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
     LIBC_ASSERT(utf8.has_value());
 
@@ -46,6 +50,10 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
     s++;
     count++;
   }
+
+  if (!cr.isEmpty()) // didn't complete the conversion
+    return -1;
+
   return count;
 }
 
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index bcd39a92a3b76..a2658ca581da9 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -18,7 +18,7 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps, size_t max_written);
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsrtombs.cpp b/libc/src/__support/wchar/wcsrtombs.cpp
new file mode 100644
index 0000000000000..c2bd3b632f8de
--- /dev/null
+++ b/libc/src/__support/wchar/wcsrtombs.cpp
@@ -0,0 +1,58 @@
+//===-- Implementation of wcsrtombs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcsrtombs.h"
+#include "hdr/types/char32_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+                          size_t len, mbstate *__restrict ps) {
+  if (src == nullptr)
+    return Error(-1);
+
+  // ignore len parameter when theres no destination string
+  if (dst == nullptr)
+    len = SIZE_MAX;
+
+  size_t bytes_written = 0;
+  while (bytes_written < len) {
+    auto result =
+        internal::wcrtomb(dst == nullptr ? nullptr : dst + bytes_written, **src,
+                          ps, len - bytes_written);
+    if (!result.has_value())
+      return result; // forward the error
+
+    // couldn't complete conversion
+    if (result.value() == static_cast<size_t>(-1))
+      return len;
+
+    // terminate the loop after converting the null wide character
+    if (**src == L'\0') {
+      *src = nullptr;
+      return bytes_written;
+    }
+
+    bytes_written += result.value();
+    (*src)++;
+  }
+
+  return bytes_written;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsrtombs.h b/libc/src/__support/wchar/wcsrtombs.h
new file mode 100644
index 0000000000000..af49c2bca9e2a
--- /dev/null
+++ b/libc/src/__support/wchar/wcsrtombs.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcsrtombs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+                          size_t len, mbstate *__restrict ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSRTOMBS_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..5f6361b62a47f 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -48,6 +48,20 @@ add_entrypoint_object(
     libc.src.__support.wchar.mbstate
 )
 
+add_entrypoint_object(
+  wcsrtombs
+  SRCS
+    wcsrtombs.cpp
+  HDRS
+    wcsrtombs.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+    libc.hdr.types.mbstate_t
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.wcsrtombs
+    libc.src.__support.wchar.mbstate
+)
+
 add_entrypoint_object(
   wctomb
   SRCS
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 3e9df0599431e..4a9db213f82b0 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -23,16 +23,14 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
   static internal::mbstate internal_mbstate;
 
   // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
-  char buf[sizeof(wchar_t) / sizeof(char)];
-  if (s == nullptr) {
-    s = buf;
+  if (s == nullptr)
     wc = L'\0';
-  }
 
   auto result = internal::wcrtomb(
       s, wc,
       ps == nullptr ? &internal_mbstate
-                    : reinterpret_cast<internal::mbstate *>(ps));
+                    : reinterpret_cast<internal::mbstate *>(ps),
+      sizeof(wchar_t));
 
   if (!result.has_value()) {
     libc_errno = result.error();
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
new file mode 100644
index 0000000000000..97c8a1f61e8fc
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -0,0 +1,42 @@
+//===-- Implementation of wcsrtombs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsrtombs.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcsrtombs.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
+                   (char *__restrict dst, const wchar_t **__restrict src,
+                    size_t len, mbstate_t *__restrict ps)) {
+  static internal::mbstate internal_mbstate;
+
+  LIBC_CRASH_ON_NULLPTR(src);
+
+  auto result = internal::wcsrtombs(
+      dst, src, len,
+      ps == nullptr ? &internal_mbstate
+                    : reinterpret_cast<internal::mbstate *>(ps));
+
+  if (!result.has_value()) {
+    libc_errno = result.error();
+    return -1;
+  }
+
+  return result.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h
new file mode 100644
index 0000000000000..103074cfcba63
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for wcsrtombs ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+#define LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
+                 size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp
index 142302e6ae09b..56e26cf98d5c6 100644
--- a/libc/src/wchar/wctomb.cpp
+++ b/libc/src/wchar/wctomb.cpp
@@ -22,7 +22,7 @@ LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) {
   if (s == nullptr)
     return 0;
 
-  auto result = internal::wcrtomb(s, wc, &internal_mbstate);
+  auto result = internal::wcrtomb(s, wc, &internal_mbstate, sizeof (wchar_t));
 
   if (!result.has_value()) { // invalid wide character
     libc_errno = EILSEQ;
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..02d33dfbcdde8 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -78,6 +78,20 @@ add_libc_test(
     libc.test.UnitTest.ErrnoCheckingTest
 )
 
+add_libc_test(
+  wcsrtombs_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsrtombs_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsrtombs
+    libc.src.string.memset
+    libc.hdr.types.wchar_t
+    libc.hdr.types.mbstate_t
+    libc.src.__support.libc_errno
+)
+
 add_libc_test(
   wctomb_test
   SUITE
diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp
new file mode 100644
index 0000000000000..8511756b11269
--- /dev/null
+++ b/libc/test/src/wchar/wcsrtombs_test.cpp
@@ -0,0 +1,213 @@
+//===-- Unittests for wcsrtombs ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/wcsrtombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWCSRToMBSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcWCSRToMBSTest, SingleCharacterOneByte) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+  const wchar_t *wcs = L"U";
+  char mbs[] = {0, 0};
+  size_t cnt = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(1));
+  ASSERT_EQ(mbs[0], 'U');
+  ASSERT_EQ(mbs[1], '\0');
+  ASSERT_EQ(wcs, nullptr);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, MultipleCompleteConversions) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+  // utf32: 0xff -> utf8: 0xc3 0xbf
+  // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+  const wchar_t *wcs = L"\xFF\xAC15";
+  const wchar_t *wcs_start = wcs;
+
+  // init with dummy value of 1 so that we can check when null byte written
+  char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+  char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+
+  size_t cnt1 = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
+  ASSERT_EQ(cnt1, static_cast<size_t>(2));
+  ASSERT_EQ(wcs, wcs_start + 1);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], '\x01'); // not modified
+
+  size_t cnt2 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1, &wcs, 3, &state);
+  ASSERT_EQ(cnt2, static_cast<size_t>(3));
+  ASSERT_EQ(wcs, wcs_start + 2);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], expected[3]);
+  ASSERT_EQ(mbs[4], expected[4]);
+  ASSERT_EQ(mbs[5], '\x01'); // null byte not yet written
+
+  // all that is left in the string is the null terminator
+  size_t cnt3 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1 + cnt2, &wcs, 50, &state);
+  ASSERT_EQ(cnt3, static_cast<size_t>(0));
+  ASSERT_EQ(wcs, nullptr);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], expected[3]);
+  ASSERT_EQ(mbs[4], expected[4]);
+  ASSERT_EQ(mbs[5], expected[5]);
+  ASSERT_EQ(mbs[6], '\x01'); // should not write beyond null terminator
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, MultiplePartialConversions) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+  // utf32: 0xff -> utf8: 0xc3 0xbf
+  // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+  const wchar_t *wcs = L"\xFF\xAC15";
+  const wchar_t *wcs_start = wcs;
+
+  // init with dummy value of 1 so that we can check when null byte written
+  char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+  char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+  size_t written = 0;
+  size_t count = 0;
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, &state);
+  written += count;
+  // ASSERT_EQ(count, static_cast<size_t>(1));
+  ASSERT_EQ(wcs, wcs_start);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], '\x01');
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, &state);
+  written += count;
+  ASSERT_EQ(count, static_cast<size_t>(2));
+  ASSERT_EQ(wcs, wcs_start + 1);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], '\x01');
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, &state);
+  written += count;
+  ASSERT_EQ(count, static_cast<size_t>(2));
+  ASSERT_EQ(wcs, nullptr);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], expected[3]);
+  ASSERT_EQ(mbs[4], expected[4]);
+  ASSERT_EQ(mbs[5], expected[5]);
+  ASSERT_EQ(mbs[6], '\x01');
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, NullDestination) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+  // utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+  // utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+  const wchar_t *wcs = L"\x1F921\xAC15";
+
+  // null destination means the conversion isnt stored, but all the side effects
+  // still occur. the len parameter is also ignored
+  size_t count = LIBC_NAMESPACE::wcsrtombs(nullptr, &wcs, 3, &state);
+  ASSERT_EQ(count, static_cast<size_t>(7));
+  ASSERT_EQ(wcs, nullptr);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, NullState) {
+  // same as MultiplePartialConversions test except without an explicit
+  // mbstate_t
+
+  const wchar_t *wcs = L"\xFF\xAC15";
+  const wchar_t *wcs_start = wcs;
+
+  // init with dummy value of 1 so that we can check when null byte written
+  char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
+  char expected[6] = {'\xC3', '\xBF', '\xEA', '\xB0', '\x95', '\x00'};
+  size_t written = 0;
+  size_t count = 0;
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, nullptr);
+  written += count;
+  ASSERT_EQ(count, static_cast<size_t>(1));
+  ASSERT_EQ(wcs, wcs_start);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], '\x01');
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, nullptr);
+  written += count;
+  ASSERT_EQ(count, static_cast<size_t>(2));
+  ASSERT_EQ(wcs, wcs_start + 1);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], '\x01');
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, nullptr);
+  written += count;
+  ASSERT_EQ(count, static_cast<size_t>(2));
+  ASSERT_EQ(wcs, nullptr);
+  ASSERT_EQ(mbs[0], expected[0]);
+  ASSERT_EQ(mbs[1], expected[1]);
+  ASSERT_EQ(mbs[2], expected[2]);
+  ASSERT_EQ(mbs[3], expected[3]);
+  ASSERT_EQ(mbs[4], expected[4]);
+  ASSERT_EQ(mbs[5], expected[5]);
+  ASSERT_EQ(mbs[6], '\x01');
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, InvalidWchar) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+  const wchar_t *wcs = L"\xFF\xAC15\x12FFFF";
+  char mbs[15];
+  // convert the valid wchar
+  size_t count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 5, &state);
+  ASSERT_EQ(count, static_cast<size_t>(5));
+  ASSERT_TRUE(*wcs == static_cast<wchar_t>(0x12ffff));
+  ASSERT_ERRNO_SUCCESS();
+
+  count = LIBC_NAMESPACE::wcsrtombs(mbs + count, &wcs, 5, &state); // invalid
+  ASSERT_EQ(count, static_cast<size_t>(-1));
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcWCSRToMBSTest, InvalidState) {
+  LIBC_NAMESPACE::internal::mbstate state{0, 0, 9}; // 9 total bytes is invalid
+  const wchar_t *wcs = L"\xFF\xAC15";
+  char mbs[5];
+  // convert the valid wchar
+  size_t count = LIBC_NAMESPACE::wcsrtombs(
+      mbs, &wcs, 5, reinterpret_cast<mbstate_t *>(&state));
+  ASSERT_EQ(count, static_cast<size_t>(-1));
+  ASSERT_ERRNO_EQ(EINVAL);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
+TEST_F(LlvmLibcWCSRToMBSTest, NullSrc) {
+  // Passing in a nullptr should crash the program.
+  char mbs[] = {0, 0};
+  EXPECT_DEATH(
+      [&mbs] {
+        LIBC_NAMESPACE::wcsrtombs(mbs, nullptr, 2, nullptr);
+      },
+      WITH_SIGNAL(-1));
+}
+#endif // LIBC_HAS_ADDRESS_SANITIZER

Copy link

github-actions bot commented Jun 25, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels like two patches, one refactoring wcrtomb and one adding wcsrtombs. Splitting them would make this easier to review

Comment on lines +24 to +25
ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps,
size_t max_written) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're calling this function wcrtomb it should match that interface. If you want to have a different interface, then that's fine but it should have a different name to avoid confusion.

Comment on lines +23 to +24
ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
size_t len, mbstate *__restrict ps) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is wcsnrtombs, not wcsrtombs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants