Skip to content

[libc] mbsrtowcs implementation #145791

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from

Conversation

sribee8
Copy link
Contributor

@sribee8 sribee8 commented Jun 25, 2025

Implemented mbsrtowcs and tests for the function.

@llvmbot llvmbot added the libc label Jun 25, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 25, 2025

@llvm/pr-subscribers-libc

Author: None (sribee8)

Changes

Implemented mbsrtowcs and tests for the function.


Full diff: https://github.com/llvm/llvm-project/pull/145791.diff

10 Files Affected:

  • (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
  • (modified) libc/include/wchar.yaml (+9)
  • (modified) libc/src/__support/wchar/CMakeLists.txt (+16)
  • (added) libc/src/__support/wchar/mbsrtowcs.cpp (+44)
  • (added) libc/src/__support/wchar/mbsrtowcs.h (+29)
  • (modified) libc/src/wchar/CMakeLists.txt (+18)
  • (added) libc/src/wchar/mbsrtowcs.cpp (+41)
  • (added) libc/src/wchar/mbsrtowcs.h (+24)
  • (modified) libc/test/src/wchar/CMakeLists.txt (+14)
  • (added) libc/test/src/wchar/mbsrtowcs_test.cpp (+132)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..e615702f7bbb8 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1249,6 +1249,7 @@ if(LLVM_LIBC_FULL_BUILD)
 
     # wchar.h entrypoints
     libc.src.wchar.mbrtowc
+    libc.src.wchar.mbsrtowcs
     libc.src.wchar.mbtowc
     libc.src.wchar.wcrtomb
     libc.src.wchar.wctomb
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..576cf09b86696 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -38,6 +38,15 @@ functions:
       - type: const char *__restrict
       - type: size_t
       - type: mbstate_t *__restrict
+  - name: mbsrtowcs
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const char **__restrict
+      - type: size_t
+      - type: mbstate_t *__restrict
   - name: mbtowc
     standards:
       - stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 86a47319f278a..c06b1023180ad 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -53,3 +53,19 @@ add_object_library(
   .character_converter
   .mbstate
 )
+
+add_object_library(
+  mbsrtowcs
+  HDRS
+    mbsrtowcs.h
+  SRCS
+    mbsrtowcs.cpp
+  DEPENDS
+  libc.hdr.types.wchar_t
+  libc.hdr.types.size_t
+  libc.src.__support.common
+  libc.src.__support.error_or
+  libc.src.__support.macros.config
+  .mbstate
+  .mbrtowc
+)
diff --git a/libc/src/__support/wchar/mbsrtowcs.cpp b/libc/src/__support/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000000000..4edda2bed6e44
--- /dev/null
+++ b/libc/src/__support/wchar/mbsrtowcs.cpp
@@ -0,0 +1,44 @@
+//===-- Implementation for mbsrtowcs function -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/mbsrtowcs.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                          size_t len, mbstate *__restrict ps) {
+  size_t i = 0;
+  // Converting characters until we reach error or null terminator
+  for (; i < len; ++i, ++dst) {
+    auto check = mbrtowc(dst, *src, 4, ps);
+    // Encoding error/invalid mbstate
+    if (!check.has_value())
+      return Error(check.error());
+    // Successfully encoded, check for null terminator
+    if (*dst == L'\0') {
+      *src = nullptr;
+      return i;
+    }
+    // Set src to point right after the last character converted
+    *src = *src + check.value();
+  }
+  return i;
+}
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/mbsrtowcs.h b/libc/src/__support/wchar/mbsrtowcs.h
new file mode 100644
index 0000000000000..5eda23fa7baad
--- /dev/null
+++ b/libc/src/__support/wchar/mbsrtowcs.h
@@ -0,0 +1,29 @@
+//===-- Implementation header for mbsrtowcs function ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
+#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                          size_t len, mbstate *__restrict ps);
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..7a27ff8544a1e 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -78,6 +78,24 @@ add_entrypoint_object(
     libc.src.__support.wchar.mbstate
 )
 
+add_entrypoint_object(
+  mbsrtowcs
+  SRCS
+    mbsrtowcs.cpp
+  HDRS
+    mbsrtowcs.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.wchar.mbsrtowcs
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.macros.null_check
+)
+
 add_entrypoint_object(
   mbtowc
   SRCS
diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000000000..a7e534f4b57c2
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.cpp
@@ -0,0 +1,41 @@
+//===-- Implementation of mbsrtowcs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsrtowcs.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbsrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
+                   (wchar_t *__restrict dst, const char **__restrict src,
+                    size_t len, mbstate_t *__restrict ps)) {
+  LIBC_CRASH_ON_NULLPTR(src);
+  static internal::mbstate internal_mbstate;
+  wchar_t temp[len];
+  auto ret = internal::mbsrtowcs(
+      dst == nullptr ? temp : dst, src, len,
+      ps == nullptr ? &internal_mbstate
+                    : reinterpret_cast<internal::mbstate *>(ps));
+  if (!ret.has_value()) {
+    // Encoding failure
+    libc_errno = ret.error();
+    return -1;
+  }
+  return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h
new file mode 100644
index 0000000000000..f8d4cc26e63ae
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsrtowcs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+                 size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..44f0e7238012b 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -39,6 +39,20 @@ add_libc_test(
     libc.test.UnitTest.ErrnoCheckingTest
 )
 
+add_libc_test(
+  mbsrtowcs_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    mbsrtowcs_test.cpp
+  DEPENDS
+    libc.src.__support.libc_errno
+    libc.src.string.memset
+    libc.src.wchar.mbsrtowcs
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+)
+
 add_libc_test(
   mbtowc_test
   SUITE
diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp
new file mode 100644
index 0000000000000..3fec3e76d3f68
--- /dev/null
+++ b/libc/test/src/wchar/mbsrtowcs_test.cpp
@@ -0,0 +1,132 @@
+//===-- Unittests for mbsrtowcs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbsrtowcs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneCharacter) {
+  mbstate_t *mb;
+  LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+  const char *ch = "A";
+  wchar_t dest[2];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 2, mb);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_TRUE(dest[0] == L'A');
+  ASSERT_TRUE(dest[1] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Should set ch to nullptr after reading null terminator
+  ASSERT_EQ(ch, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MultiByteOneCharacter) {
+  const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
+  wchar_t dest[2];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_TRUE(dest[1] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 1);
+  // Should set ch to nullptr after reading null terminator
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
+  // Two laughing cat emojis "😹😹"
+  const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_TRUE(dest[2] == L'\0');
+  // Should not count null terminator in number
+  ASSERT_EQ(static_cast<int>(n), 2);
+  // Should set ch to nullptr after reading null terminator
+  ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  const char *check = src;
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Should have read 3 emojis
+  ASSERT_EQ(static_cast<int>(n), 3);
+  ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+  ASSERT_EQ(static_cast<int>(dest[2]), 128569);
+  // src should now point to the 4th cat emoji aka 13th byte
+  ASSERT_EQ((check + 12), src);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
+  // 0x80 is invalid first byte of mb character
+  const char *src =
+      "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
+  // The 7th byte is invalid for a 4 byte character
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
+  // Should return error and set errno
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 5, nullptr);
+  ASSERT_ERRNO_SUCCESS();
+  // Null destination should still return correct number of read chars
+  ASSERT_EQ(static_cast<int>(n), 4);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidMBState) {
+  mbstate_t *mb;
+  LIBC_NAMESPACE::internal::mbstate inv;
+  inv.total_bytes = 6;
+  mb = reinterpret_cast<mbstate_t *>(&inv);
+  // Four laughing cat emojis "😹😹😹😹"
+  const char *src =
+      "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+  wchar_t dest[3];
+  size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, mb);
+  // Should fail from invalid mbstate
+  ASSERT_EQ(static_cast<int>(n), -1);
+  ASSERT_ERRNO_EQ(EINVAL);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
+TEST_F(LlvmLibcMBSRToWCSTest, NullSource) {
+  // Passing in a nullptr source should crash the program
+  EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
+               WITH_SIGNAL(-1));
+}
+#endif // LIBC_HAS_ADDRESS_SANITIZER

Copy link
Contributor

@uzairnawaz uzairnawaz left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mostly good except for one small edge case

Copy link

github-actions bot commented Jun 25, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants