-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] mbsrtowcs implementation #145791
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[libc] mbsrtowcs implementation #145791
Conversation
Implemented mbsrtowcs and tests for the function.
@llvm/pr-subscribers-libc Author: None (sribee8) ChangesImplemented mbsrtowcs and tests for the function. Full diff: https://github.com/llvm/llvm-project/pull/145791.diff 10 Files Affected:
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..e615702f7bbb8 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1249,6 +1249,7 @@ if(LLVM_LIBC_FULL_BUILD)
# wchar.h entrypoints
libc.src.wchar.mbrtowc
+ libc.src.wchar.mbsrtowcs
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..576cf09b86696 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -38,6 +38,15 @@ functions:
- type: const char *__restrict
- type: size_t
- type: mbstate_t *__restrict
+ - name: mbsrtowcs
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: wchar_t *__restrict
+ - type: const char **__restrict
+ - type: size_t
+ - type: mbstate_t *__restrict
- name: mbtowc
standards:
- stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 86a47319f278a..c06b1023180ad 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -53,3 +53,19 @@ add_object_library(
.character_converter
.mbstate
)
+
+add_object_library(
+ mbsrtowcs
+ HDRS
+ mbsrtowcs.h
+ SRCS
+ mbsrtowcs.cpp
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.size_t
+ libc.src.__support.common
+ libc.src.__support.error_or
+ libc.src.__support.macros.config
+ .mbstate
+ .mbrtowc
+)
diff --git a/libc/src/__support/wchar/mbsrtowcs.cpp b/libc/src/__support/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000000000..4edda2bed6e44
--- /dev/null
+++ b/libc/src/__support/wchar/mbsrtowcs.cpp
@@ -0,0 +1,44 @@
+//===-- Implementation for mbsrtowcs function -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/mbsrtowcs.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbrtowc.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate *__restrict ps) {
+ size_t i = 0;
+ // Converting characters until we reach error or null terminator
+ for (; i < len; ++i, ++dst) {
+ auto check = mbrtowc(dst, *src, 4, ps);
+ // Encoding error/invalid mbstate
+ if (!check.has_value())
+ return Error(check.error());
+ // Successfully encoded, check for null terminator
+ if (*dst == L'\0') {
+ *src = nullptr;
+ return i;
+ }
+ // Set src to point right after the last character converted
+ *src = *src + check.value();
+ }
+ return i;
+}
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/mbsrtowcs.h b/libc/src/__support/wchar/mbsrtowcs.h
new file mode 100644
index 0000000000000..5eda23fa7baad
--- /dev/null
+++ b/libc/src/__support/wchar/mbsrtowcs.h
@@ -0,0 +1,29 @@
+//===-- Implementation header for mbsrtowcs function ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
+#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate *__restrict ps);
+
+} // namespace internal
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..7a27ff8544a1e 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -78,6 +78,24 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)
+add_entrypoint_object(
+ mbsrtowcs
+ SRCS
+ mbsrtowcs.cpp
+ HDRS
+ mbsrtowcs.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ libc.src.__support.wchar.mbsrtowcs
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.macros.null_check
+)
+
add_entrypoint_object(
mbtowc
SRCS
diff --git a/libc/src/wchar/mbsrtowcs.cpp b/libc/src/wchar/mbsrtowcs.cpp
new file mode 100644
index 0000000000000..a7e534f4b57c2
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.cpp
@@ -0,0 +1,41 @@
+//===-- Implementation of mbsrtowcs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/mbsrtowcs.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/mbsrtowcs.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
+ (wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate_t *__restrict ps)) {
+ LIBC_CRASH_ON_NULLPTR(src);
+ static internal::mbstate internal_mbstate;
+ wchar_t temp[len];
+ auto ret = internal::mbsrtowcs(
+ dst == nullptr ? temp : dst, src, len,
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps));
+ if (!ret.has_value()) {
+ // Encoding failure
+ libc_errno = ret.error();
+ return -1;
+ }
+ return ret.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/mbsrtowcs.h b/libc/src/wchar/mbsrtowcs.h
new file mode 100644
index 0000000000000..f8d4cc26e63ae
--- /dev/null
+++ b/libc/src/wchar/mbsrtowcs.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for mbsrtowcs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
+ size_t len, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..44f0e7238012b 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -39,6 +39,20 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)
+add_libc_test(
+ mbsrtowcs_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ mbsrtowcs_test.cpp
+ DEPENDS
+ libc.src.__support.libc_errno
+ libc.src.string.memset
+ libc.src.wchar.mbsrtowcs
+ libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
+)
+
add_libc_test(
mbtowc_test
SUITE
diff --git a/libc/test/src/wchar/mbsrtowcs_test.cpp b/libc/test/src/wchar/mbsrtowcs_test.cpp
new file mode 100644
index 0000000000000..3fec3e76d3f68
--- /dev/null
+++ b/libc/test/src/wchar/mbsrtowcs_test.cpp
@@ -0,0 +1,132 @@
+//===-- Unittests for mbsrtowcs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/string/memset.h"
+#include "src/wchar/mbsrtowcs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneCharacter) {
+ mbstate_t *mb;
+ LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
+ const char *ch = "A";
+ wchar_t dest[2];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 2, mb);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_TRUE(dest[0] == L'A');
+ ASSERT_TRUE(dest[1] == L'\0');
+ // Should not count null terminator in number
+ ASSERT_EQ(static_cast<int>(n), 1);
+ // Should set ch to nullptr after reading null terminator
+ ASSERT_EQ(ch, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MultiByteOneCharacter) {
+ const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
+ wchar_t dest[2];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+ ASSERT_TRUE(dest[1] == L'\0');
+ // Should not count null terminator in number
+ ASSERT_EQ(static_cast<int>(n), 1);
+ // Should set ch to nullptr after reading null terminator
+ ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
+ // Two laughing cat emojis "😹😹"
+ const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ wchar_t dest[3];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+ ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+ ASSERT_TRUE(dest[2] == L'\0');
+ // Should not count null terminator in number
+ ASSERT_EQ(static_cast<int>(n), 2);
+ // Should set ch to nullptr after reading null terminator
+ ASSERT_EQ(src, nullptr);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
+ // Four laughing cat emojis "😹😹😹😹"
+ const char *src =
+ "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ const char *check = src;
+ wchar_t dest[3];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+ ASSERT_ERRNO_SUCCESS();
+ // Should have read 3 emojis
+ ASSERT_EQ(static_cast<int>(n), 3);
+ ASSERT_EQ(static_cast<int>(dest[0]), 128569);
+ ASSERT_EQ(static_cast<int>(dest[1]), 128569);
+ ASSERT_EQ(static_cast<int>(dest[2]), 128569);
+ // src should now point to the 4th cat emoji aka 13th byte
+ ASSERT_EQ((check + 12), src);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
+ // 0x80 is invalid first byte of mb character
+ const char *src =
+ "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ wchar_t dest[3];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
+ // Should return error and set errno
+ ASSERT_EQ(static_cast<int>(n), -1);
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
+ // The 7th byte is invalid for a 4 byte character
+ const char *src =
+ "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ wchar_t dest[3];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
+ // Should return error and set errno
+ ASSERT_EQ(static_cast<int>(n), -1);
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
+ // Four laughing cat emojis "😹😹😹😹"
+ const char *src =
+ "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 5, nullptr);
+ ASSERT_ERRNO_SUCCESS();
+ // Null destination should still return correct number of read chars
+ ASSERT_EQ(static_cast<int>(n), 4);
+}
+
+TEST_F(LlvmLibcMBSRToWCSTest, InvalidMBState) {
+ mbstate_t *mb;
+ LIBC_NAMESPACE::internal::mbstate inv;
+ inv.total_bytes = 6;
+ mb = reinterpret_cast<mbstate_t *>(&inv);
+ // Four laughing cat emojis "😹😹😹😹"
+ const char *src =
+ "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
+ wchar_t dest[3];
+ size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, mb);
+ // Should fail from invalid mbstate
+ ASSERT_EQ(static_cast<int>(n), -1);
+ ASSERT_ERRNO_EQ(EINVAL);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
+TEST_F(LlvmLibcMBSRToWCSTest, NullSource) {
+ // Passing in a nullptr source should crash the program
+ EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
+ WITH_SIGNAL(-1));
+}
+#endif // LIBC_HAS_ADDRESS_SANITIZER
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mostly good except for one small edge case
✅ With the latest revision this PR passed the C/C++ code formatter. |
Implemented mbsrtowcs and tests for the function.