Create a EncodingConverter class with both iconv and icu support. #138893

abhina-sree · 2025-05-07T15:28:43Z

This patch adds a wrapper class called EncodingConverter for ConverterEBCDIC. This class is then extended to support the ICU library or iconv library. The ICU library currently takes priority over the iconv library.

Relevant RFCs:
https://discourse.llvm.org/t/rfc-adding-a-charset-converter-to-the-llvm-support-library/69795
https://discourse.llvm.org/t/rfc-enabling-fexec-charset-support-to-llvm-and-clang-reposting/71512

Stacked PR to enable fexec-charset that depends on this:
#138895

See old PR for review and commit history: #74516

llvmbot · 2025-05-07T15:29:25Z

@llvm/pr-subscribers-llvm-support

Author: Abhina Sree (abhina-sree)

Changes

This patch adds a wrapper class called CharSetConverter for ConverterEBCDIC. This class is then extended to support the ICU library or iconv library. The ICU library currently takes priority over the iconv library.

Relevant RFCs:
https://discourse.llvm.org/t/rfc-adding-a-charset-converter-to-the-llvm-support-library/69795
https://discourse.llvm.org/t/rfc-enabling-fexec-charset-support-to-llvm-and-clang-reposting/71512

Patch is 29.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138893.diff

9 Files Affected:

(modified) llvm/CMakeLists.txt (+4)
(modified) llvm/cmake/config-ix.cmake (+35)
(modified) llvm/include/llvm/Config/config.h.cmake (+6)
(added) llvm/include/llvm/Support/CharSet.h (+141)
(modified) llvm/lib/Support/CMakeLists.txt (+9)
(added) llvm/lib/Support/CharSet.cpp (+344)
(modified) llvm/unittests/Support/CMakeLists.txt (+1)
(added) llvm/unittests/Support/CharSetTest.cpp (+232)
(modified) llvm/unittests/Support/ConvertEBCDICTest.cpp (+2-2)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index e8d9ec0d6153a..894c0e1d2e5ae 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -592,6 +592,10 @@ else()
   option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 endif()
 
+set(LLVM_ENABLE_ICU "OFF" CACHE STRING "Use ICU for character conversion support if available. Can be ON, OFF, or FORCE_ON")
+
+set(LLVM_ENABLE_ICONV "OFF" CACHE STRING "Use iconv for character conversion support if available. Can be ON, OFF, or FORCE_ON")
+
 set(LLVM_ENABLE_ZLIB "ON" CACHE STRING "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
 
 set(LLVM_ENABLE_ZSTD "ON" CACHE STRING "Use zstd for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 43311dad457ec..f7e826b34d26f 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -294,6 +294,41 @@ if(LLVM_HAS_LOGF128)
   set(LLVM_HAS_LOGF128 "${HAS_LOGF128}")
 endif()
 
+if (LLVM_ENABLE_ICU STREQUAL FORCE_ON AND LLVM_ENABLE_ICONV STREQUAL FORCE_ON)
+  message(FATAL_ERROR "LLVM_ENABLE_ICU and LLVM_ENABLE_ICONV should not both be FORCE_ON")
+endif()
+
+# Check for ICU. Only allow an optional, dynamic link for ICU so we don't impact LLVM's licensing.
+if(LLVM_ENABLE_ICU AND NOT(LLVM_ENABLE_ICONV STREQUAL FORCE_ON))
+  set(LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+  set(CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  if (LLVM_ENABLE_ICU STREQUAL FORCE_ON)
+    find_package(ICU REQUIRED COMPONENTS uc i18n)
+    if (NOT ICU_FOUND)
+      message(FATAL_ERROR "Failed to configure ICU, but LLVM_ENABLE_ICU is FORCE_ON")
+    endif()
+  else()
+    find_package(ICU COMPONENTS uc i18n)
+  endif()
+  set(HAVE_ICU ${ICU_FOUND})
+  set(CMAKE_FIND_LIBRARY_SUFFIXES ${LIBRARY_SUFFIXES})
+endif()
+
+# Check for builtin iconv to avoid licensing issues.
+if(LLVM_ENABLE_ICONV AND NOT HAVE_ICU)
+  if (LLVM_ENABLE_ICONV STREQUAL FORCE_ON)
+    find_package(Iconv REQUIRED)
+    if (NOT Iconv_FOUND OR NOT Iconv_IS_BUILT_IN)
+      message(FATAL_ERROR "Failed to configure iconv, but LLVM_ENABLE_ICONV is FORCE_ON")
+    endif()
+  else()
+    find_package(Iconv)
+  endif()
+  if(Iconv_FOUND AND Iconv_IS_BUILT_IN)
+    set(HAVE_ICONV 1)
+  endif()
+endif()
+
 # function checks
 check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM)
 find_package(Backtrace)
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index 7efac55ab0352..3f70a0150da4f 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -236,6 +236,12 @@
 /* Have host's ___chkstk_ms */
 #cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS}
 
+/* Define if ICU library is available */
+#cmakedefine HAVE_ICU ${HAVE_ICU}
+
+/* Define if iconv library is available */
+#cmakedefine HAVE_ICONV ${HAVE_ICONV}
+
 /* Linker version detected at compile time. */
 #cmakedefine HOST_LINK_VERSION "${HOST_LINK_VERSION}"
 
diff --git a/llvm/include/llvm/Support/CharSet.h b/llvm/include/llvm/Support/CharSet.h
new file mode 100644
index 0000000000000..6a28cd19f4143
--- /dev/null
+++ b/llvm/include/llvm/Support/CharSet.h
@@ -0,0 +1,141 @@
+//===-- CharSet.h - Characters set conversion class ---------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides a utility class to convert between different character
+/// set encodings.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CHARSET_H
+#define LLVM_SUPPORT_CHARSET_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/ErrorOr.h"
+
+#include <string>
+#include <system_error>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+
+namespace details {
+class CharSetConverterImplBase {
+
+private:
+  /// Converts a string.
+  /// \param[in] Source source string
+  /// \param[out] Result container for converted string
+  /// \return error code in case something went wrong
+  ///
+  /// The following error codes can occur, among others:
+  ///   - std::errc::argument_list_too_long: The result requires more than
+  ///     std::numeric_limits<size_t>::max() bytes.
+  ///   - std::errc::illegal_byte_sequence: The input contains an invalid
+  ///     multibyte sequence.
+  ///   - std::errc::invalid_argument: The input contains an incomplete
+  ///     multibyte sequence.
+  ///
+  /// If the destination charset is a stateful character set, the shift state
+  /// will be set to the initial state.
+  ///
+  /// In case of an error, the result string contains the successfully converted
+  /// part of the input string.
+  ///
+  virtual std::error_code convertString(StringRef Source,
+                                        SmallVectorImpl<char> &Result) = 0;
+
+  /// Resets the converter to the initial state.
+  virtual void reset() = 0;
+
+public:
+  virtual ~CharSetConverterImplBase() = default;
+
+  /// Converts a string and resets the converter to the initial state.
+  std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result) {
+    auto EC = convertString(Source, Result);
+    reset();
+    return EC;
+  }
+};
+} // namespace details
+
+// Names inspired by https://wg21.link/p1885.
+namespace text_encoding {
+enum class id {
+  /// UTF-8 character set encoding.
+  UTF8,
+
+  /// IBM EBCDIC 1047 character set encoding.
+  IBM1047
+};
+} // end namespace text_encoding
+
+/// Utility class to convert between different character set encodings.
+class CharSetConverter {
+  std::unique_ptr<details::CharSetConverterImplBase> Converter;
+
+  CharSetConverter(std::unique_ptr<details::CharSetConverterImplBase> Converter)
+      : Converter(std::move(Converter)) {}
+
+public:
+  /// Creates a CharSetConverter instance.
+  /// Returns std::errc::invalid_argument in case the requested conversion is
+  /// not supported.
+  /// \param[in] CSFrom the source character encoding
+  /// \param[in] CSTo the target character encoding
+  /// \return a CharSetConverter instance or an error code
+  static ErrorOr<CharSetConverter> create(text_encoding::id CSFrom,
+                                          text_encoding::id CSTo);
+
+  /// Creates a CharSetConverter instance.
+  /// Returns std::errc::invalid_argument in case the requested conversion is
+  /// not supported.
+  /// \param[in] CPFrom name of the source character encoding
+  /// \param[in] CPTo name of the target character encoding
+  /// \return a CharSetConverter instance or an error code
+  static ErrorOr<CharSetConverter> create(StringRef CPFrom, StringRef CPTo);
+
+  CharSetConverter(const CharSetConverter &) = delete;
+  CharSetConverter &operator=(const CharSetConverter &) = delete;
+
+  CharSetConverter(CharSetConverter &&Other)
+      : Converter(std::move(Other.Converter)) {}
+
+  CharSetConverter &operator=(CharSetConverter &&Other) {
+    if (this != &Other)
+      Converter = std::move(Other.Converter);
+    return *this;
+  }
+
+  ~CharSetConverter() = default;
+
+  /// Converts a string.
+  /// \param[in] Source source string
+  /// \param[out] Result container for converted string
+  /// \return error code in case something went wrong
+  std::error_code convert(StringRef Source,
+                          SmallVectorImpl<char> &Result) const {
+    return Converter->convert(Source, Result);
+  }
+
+  ErrorOr<std::string> convert(StringRef Source) const {
+    SmallString<100> Result;
+    auto EC = Converter->convert(Source, Result);
+    if (!EC)
+      return std::string(Result);
+    return EC;
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index df1e65f3a588c..9a7d26a35bf1a 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -162,6 +162,7 @@ add_llvm_component_library(LLVMSupport
   CachePruning.cpp
   Caching.cpp
   circular_raw_ostream.cpp
+  CharSet.cpp
   Chrono.cpp
   COM.cpp
   CodeGenCoverage.cpp
@@ -316,6 +317,14 @@ add_llvm_component_library(LLVMSupport
   Demangle
   )
 
+# Link ICU library if it is an external library.
+if(ICU_FOUND)
+  target_link_libraries(LLVMSupport
+  PRIVATE
+  ${ICU_LIBRARIES}
+  )
+endif()
+
 set(llvm_system_libs ${system_libs})
 
 # This block is only needed for llvm-config. When we deprecate llvm-config and
diff --git a/llvm/lib/Support/CharSet.cpp b/llvm/lib/Support/CharSet.cpp
new file mode 100644
index 0000000000000..6810cf9c6e376
--- /dev/null
+++ b/llvm/lib/Support/CharSet.cpp
@@ -0,0 +1,344 @@
+//===-- CharSet.cpp - Characters sets conversion class ------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides utility classes to convert between different character
+/// set encodings.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CharSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertEBCDIC.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <limits>
+#include <system_error>
+
+#ifdef HAVE_ICU
+#include <unicode/ucnv.h>
+#elif defined(HAVE_ICONV)
+#include <iconv.h>
+#endif
+
+using namespace llvm;
+
+// Normalize the charset name with the charset alias matching algorithm proposed
+// in https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching.
+static void normalizeCharSetName(StringRef CSName,
+                                 SmallVectorImpl<char> &Normalized) {
+  bool PrevDigit = false;
+  for (auto Ch : CSName) {
+    if (isAlnum(Ch)) {
+      Ch = toLower(Ch);
+      if (Ch != '0' || PrevDigit) {
+        PrevDigit = isDigit(Ch);
+        Normalized.push_back(Ch);
+      }
+    }
+  }
+}
+
+// Maps the charset name to enum constant if possible.
+static std::optional<text_encoding::id> getKnownCharSet(StringRef CSName) {
+  SmallString<16> Normalized;
+  normalizeCharSetName(CSName, Normalized);
+  if (Normalized.equals("utf8"))
+    return text_encoding::id::UTF8;
+  if (Normalized.equals("ibm1047"))
+    return text_encoding::id::IBM1047;
+  return std::nullopt;
+}
+
+LLVM_ATTRIBUTE_UNUSED static void
+HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
+               SmallVectorImpl<char> &Result) {
+  // No space left in output buffer. Double the size of the underlying
+  // memory in the SmallVectorImpl, adjust pointer and length and continue
+  // the conversion.
+  Capacity = (Capacity < std::numeric_limits<size_t>::max() / 2)
+                 ? 2 * Capacity
+                 : std::numeric_limits<size_t>::max();
+  Result.resize(0);
+  Result.resize_for_overwrite(Capacity);
+  Output = static_cast<char *>(Result.data());
+  OutputLength = Capacity;
+}
+
+namespace {
+enum ConversionType {
+  UTF8ToIBM1047,
+  IBM1047ToUTF8,
+};
+
+// Support conversion between EBCDIC 1047 and UTF-8. This class uses
+// built-in translation tables that allow for translation between the
+// aforementioned character sets. The use of tables for conversion is only
+// possible because EBCDIC 1047 is a single-byte, stateless encoding; other
+// character sets are not supported.
+class CharSetConverterTable : public details::CharSetConverterImplBase {
+  const ConversionType ConvType;
+
+public:
+  CharSetConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
+
+  std::error_code convertString(StringRef Source,
+                                SmallVectorImpl<char> &Result) override;
+
+  void reset() override {}
+};
+
+std::error_code
+CharSetConverterTable::convertString(StringRef Source,
+                                     SmallVectorImpl<char> &Result) {
+  if (ConvType == IBM1047ToUTF8) {
+    ConverterEBCDIC::convertToUTF8(Source, Result);
+    return std::error_code();
+  } else if (ConvType == UTF8ToIBM1047) {
+    return ConverterEBCDIC::convertToEBCDIC(Source, Result);
+  }
+  llvm_unreachable("Invalid ConvType!");
+  return std::error_code();
+}
+
+#ifdef HAVE_ICU
+struct UConverterDeleter {
+  void operator()(UConverter *Converter) const {
+    if (Converter)
+      ucnv_close(Converter);
+  }
+};
+using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
+
+class CharSetConverterICU : public details::CharSetConverterImplBase {
+  UConverterUniquePtr FromConvDesc;
+  UConverterUniquePtr ToConvDesc;
+
+public:
+  CharSetConverterICU(UConverterUniquePtr FromConverter,
+                      UConverterUniquePtr ToConverter)
+      : FromConvDesc(std::move(FromConverter)),
+        ToConvDesc(std::move(ToConverter)) {}
+
+  std::error_code convertString(StringRef Source,
+                                SmallVectorImpl<char> &Result) override;
+
+  void reset() override;
+};
+
+std::error_code
+CharSetConverterICU::convertString(StringRef Source,
+                                   SmallVectorImpl<char> &Result) {
+  // Setup the input in case it has no backing data.
+  size_t InputLength = Source.size();
+  const char *In = InputLength ? const_cast<char *>(Source.data()) : "";
+
+  // Setup the output. We directly write into the SmallVector.
+  size_t Capacity = Result.capacity();
+  size_t OutputLength = Capacity;
+  Result.resize_for_overwrite(Capacity);
+  char *Output = static_cast<char *>(Result.data());
+  UErrorCode EC = U_ZERO_ERROR;
+
+  ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
+                      &EC);
+  ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
+                        NULL, &EC);
+  assert(U_SUCCESS(EC));
+
+  do {
+    EC = U_ZERO_ERROR;
+    const char *Input = In;
+
+    Output = InputLength ? static_cast<char *>(Result.data()) : nullptr;
+    ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output, Result.end(), &Input,
+                   In + InputLength, /*pivotStart=*/NULL,
+                   /*pivotSource=*/NULL, /*pivotTarget=*/NULL,
+                   /*pivotLimit=*/NULL, /*reset=*/true,
+                   /*flush=*/true, &EC);
+    if (U_FAILURE(EC)) {
+      if (EC == U_BUFFER_OVERFLOW_ERROR &&
+          Capacity < std::numeric_limits<size_t>::max()) {
+        HandleOverflow(Capacity, Output, OutputLength, Result);
+        continue;
+      }
+      // Some other error occured.
+      Result.resize(Output - Result.data());
+      return std::error_code(EILSEQ, std::generic_category());
+    }
+    break;
+  } while (true);
+
+  Result.resize(Output - Result.data());
+  return std::error_code();
+}
+
+void CharSetConverterICU::reset() {
+  ucnv_reset(&*FromConvDesc);
+  ucnv_reset(&*ToConvDesc);
+}
+
+#elif defined(HAVE_ICONV)
+class CharSetConverterIconv : public details::CharSetConverterImplBase {
+  class UniqueIconvT {
+    iconv_t ConvDesc;
+
+  public:
+    operator iconv_t() const { return ConvDesc; }
+    UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
+    ~UniqueIconvT() {
+      if (ConvDesc != (iconv_t)-1) {
+        iconv_close(ConvDesc);
+        ConvDesc = (iconv_t)-1;
+      }
+    }
+    UniqueIconvT(UniqueIconvT &&Other) : ConvDesc(Other.ConvDesc) {
+      Other.ConvDesc = (iconv_t)-1;
+    }
+    UniqueIconvT &operator=(UniqueIconvT &&Other) {
+      if (&Other != this) {
+        ConvDesc = Other.ConvDesc;
+        Other.ConvDesc = (iconv_t)-1;
+      }
+      return *this;
+    }
+  };
+  UniqueIconvT ConvDesc;
+
+public:
+  CharSetConverterIconv(UniqueIconvT ConvDesc)
+      : ConvDesc(std::move(ConvDesc)) {}
+
+  std::error_code convertString(StringRef Source,
+                                SmallVectorImpl<char> &Result) override;
+
+  void reset() override;
+};
+
+std::error_code
+CharSetConverterIconv::convertString(StringRef Source,
+                                     SmallVectorImpl<char> &Result) {
+  // Setup the output. We directly write into the SmallVector.
+  size_t Capacity = Result.capacity();
+  char *Output = static_cast<char *>(Result.data());
+  size_t OutputLength = Capacity;
+  Result.resize_for_overwrite(Capacity);
+
+  size_t Ret;
+  // Handle errors returned from iconv().
+  auto HandleError = [&Capacity, &Output, &OutputLength, &Result,
+                      this](size_t Ret) {
+    if (Ret == static_cast<size_t>(-1)) {
+      // An error occured. Check if we can gracefully handle it.
+      if (errno == E2BIG && Capacity < std::numeric_limits<size_t>::max()) {
+        HandleOverflow(Capacity, Output, OutputLength, Result);
+        // Reset converter
+        iconv(ConvDesc, nullptr, nullptr, nullptr, nullptr);
+        return std::error_code();
+      } else {
+        // Some other error occured.
+        Result.resize(Output - Result.data());
+        return std::error_code(errno, std::generic_category());
+      }
+    } else {
+      // A positive return value indicates that some characters were converted
+      // in a nonreversible way, that is, replaced with a SUB symbol. Returning
+      // an error in this case makes sure that both conversion routines behave
+      // in the same way.
+      return std::make_error_code(std::errc::illegal_byte_sequence);
+    }
+  };
+
+  do {
+    // Setup the input. Use nullptr to reset iconv state if input length is
+    // zero.
+    size_t InputLength = Source.size();
+    char *Input = InputLength ? const_cast<char *>(Source.data()) : nullptr;
+    Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
+    if (Ret != 0) {
+      if (auto EC = HandleError(Ret))
+        return EC;
+      continue;
+    }
+    // Flush the converter
+    Ret = iconv(ConvDesc, nullptr, nullptr, &Output, &OutputLength);
+    if (Ret != 0) {
+      if (auto EC = HandleError(Ret))
+        return EC;
+      continue;
+    }
+    break;
+  } while (true);
+
+  // Re-adjust size to actual size.
+  Result.resize(Output - Result.data());
+  return std::error_code();
+}
+
+void CharSetConverterIconv::reset() {
+  iconv(ConvDesc, nullptr, nullptr, nullptr, nullptr);
+}
+
+#endif // HAVE_ICONV
+} // namespace
+
+ErrorOr<CharSetConverter> CharSetConverter::create(text_encoding::id CPFrom,
+                                                   text_encoding::id CPTo) {
+
+  assert(CPFrom != CPTo && "Text encodings should be distinct");
+
+  ConversionType Conversion;
+  if (CPFrom == text_encoding::id::UTF8 && CPTo == text_encoding::id::IBM1047)
+    Conversion = UTF8ToIBM1047;
+  else if (CPFrom == text_encoding::id::IBM1047 &&
+           CPTo == text_encoding::id::UTF8)
+    Conversion = IBM1047ToUTF8;
+  else
+    return std::error_code(errno, std::generic_category());
+
+  std::unique_ptr<details::CharSetConverterImplBase> Converter =
+      std::make_unique<CharSetConverterTable>(Conversion);
+  return CharSetConverter(std::move(Converter));
+}
+
+ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
+                                                   StringRef CSTo) {
+  std::optional<text_encoding::id> From = getKnownCharSet(CSFrom);
+  std::optional<text_encoding::id> To = getKnownCharSet(CSTo);
+  if (From && To) {
+    ErrorOr<CharSetConverter> Converter = create(*From, *To);
+    if (Converter)
+      return Converter;
+  }
+#ifdef HAVE_ICU
+  UErrorCode EC = U_ZERO_ERROR;
+  UConverterUniquePtr FromConvDesc(ucnv_open(CSFrom.str().c_str(), &EC));
+  if (U_FAILURE(EC)) {
+    return std::error_code(errno, std::generic_category());
+  }
+  UConverterUniquePtr ToConvDesc(ucnv_open(CSTo.str().c_str(), &EC));
+  if (U_FAILURE(EC)) {
+    return std::error_code(errno, std::generic_category());
+  }
+  std::unique_ptr<details::CharSetConverterImplBase> Converter =
+      std::make_unique<CharSetConverterICU>(std::move(FromConvDesc),
+                                            std::move(ToConvDesc));
+  return CharSetConverter(std::move(Converter));
+#elif defined(HAVE_ICONV)...
[truncated]

llvm/include/llvm/Support/CharSet.h

llvm/include/llvm/Config/config.h.cmake

llvm/lib/Support/CharSet.cpp

llvm/unittests/Support/CharSetTest.cpp

llvm/lib/Support/CharSet.cpp

github-actions · 2025-05-09T14:41:43Z

✅ With the latest revision this PR passed the C/C++ code formatter.

s-barannikov

LGTM with final nits

llvm/lib/Support/CharSet.cpp

llvm/unittests/Support/CharSetTest.cpp

llvm/include/llvm/Support/CharSet.h

llvm/lib/Support/CharSet.cpp

llvm/unittests/Support/CharSetTest.cpp

cor3ntin · 2025-05-12T07:44:28Z

Can we consistently use "text encoding"? Rather than "charset" (including in file names?), thanks

cor3ntin · 2025-05-12T07:47:15Z

llvm/include/llvm/Support/CharSet.h

+  /// Converts a string and resets the converter to the initial state.
+  std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result) {
+    auto EC = convertString(Source, Result);
+    reset();
+    return EC;
+  }


Please provide an overload that returns ErrorOr<std::string>, I find that interface cumbersome to use in the -fexec-patch

I provided one on line 129 of this file, please let me know if I should also provide an overload here.

abhina-sree · 2025-05-14T13:52:20Z

If there are no new comments, I will merge this in tomorrow, thanks everyone!

hubert-reinterpretcast

Trying to make a final pass (target EOD Friday). Some minor comments for now.

llvm/include/llvm/Support/EncodingConverter.h

llvm/CMakeLists.txt

llvm/cmake/config-ix.cmake

llvm/include/llvm/Support/TextEncoding.h

llvm/lib/Support/TextEncoding.cpp

hubert-reinterpretcast · 2025-05-20T03:07:09Z

llvm/lib/Support/TextEncoding.cpp

+    EC = U_ZERO_ERROR;
+    const char *Input = In;
+
+    Output = InputLength ? static_cast<char *>(Result.data()) : nullptr;


Either InputLength == 0 does nothing (and we should have done an early exit earlier) or it can do something in the future (to emit a shift sequence to the initial shift state) and Output should point to the buffer always.

Sorry, but I am not seeing the resolution to this. Which of the two options do you want to go with?

hubert-reinterpretcast · 2025-05-20T03:08:22Z

llvm/lib/Support/TextEncoding.cpp

+  UErrorCode EC = U_ZERO_ERROR;
+
+  ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
+                      &EC);
+  ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
+                        NULL, &EC);
+  assert(U_SUCCESS(EC));


Should this be added to reset (and should the constructor call reset) instead of having it here?

This code is not really reseting the converters, it is setting up the callback functions

Either this can be set once in the constructor or this is setting a state that needs to be restored across calls to reset. Either way, there is probably no need to call these for every conversion in this function (but I admit that the moving to reset makes zero difference at this time because the public interface calls reset for every call to this function).

Overall, the extra calls are negligible anyway.

llvm/lib/Support/TextEncoding.cpp

llvm/unittests/Support/TextEncodingTest.cpp

llvm/unittests/Support/CharSetTest.cpp

llvm/unittests/Support/TextEncodingTest.cpp

llvm-ci · 2025-05-20T18:15:25Z

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/17997

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/8/11 (2174 of 2183)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/9/11 (2175 of 2183)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/0/2 (2176 of 2183)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/1/2 (2177 of 2183)
PASS: lldb-unit :: Utility/./UtilityTests/4/9 (2178 of 2183)
PASS: lldb-unit :: Target/./TargetTests/11/14 (2179 of 2183)
PASS: lldb-unit :: Host/./HostTests/8/13 (2180 of 2183)
PASS: lldb-unit :: Host/./HostTests/0/13 (2181 of 2183)
PASS: lldb-unit :: Process/gdb-remote/./ProcessGdbRemoteTests/8/9 (2182 of 2183)
UNRESOLVED: lldb-api :: tools/lldb-server/TestLldbGdbServer.py (2183 of 2183)
******************** TEST 'lldb-api :: tools/lldb-server/TestLldbGdbServer.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/tools/lldb-server -p TestLldbGdbServer.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision a9ee8e4a454ec01fefba8829d2847527aa80623f)
  clang revision a9ee8e4a454ec01fefba8829d2847527aa80623f
  llvm revision a9ee8e4a454ec01fefba8829d2847527aa80623f
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hc_then_Csignal_signals_correct_thread_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_another_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_minus_one_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_fails_on_zero_pid_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_Hg_switches_to_3_threads_launch_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_and_p_thread_suffix_work_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
PASS: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_P_writes_all_gpr_registers_llgs (TestLldbGdbServer.LldbGdbServerTestCase)
UNSUPPORTED: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_attach_commandline_continue_app_exits_debugserver (TestLldbGdbServer.LldbGdbServerTestCase) (test case does not fall in any category of interest for this run) 
Program aborted due to an unhandled Error:
Operation not permitted
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.	Program arguments: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server gdbserver --attach=1696024 --reverse-connect [127.0.0.1]:52787
 #0 0x0000aaaaded8a258 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0x4ca258)
 #1 0x0000aaaaded88258 llvm::sys::RunSignalHandlers() (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb-server+0x4c8258)
 #2 0x0000aaaaded8a988 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x0000ffffb88107dc (linux-vdso.so.1+0x7dc)
 #4 0x0000ffffb801f1f0 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76

llvm-ci · 2025-05-20T18:34:21Z

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/16286

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/4/11 (3071 of 3080)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/5/11 (3072 of 3080)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/6/11 (3073 of 3080)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/7/11 (3074 of 3080)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/9/11 (3075 of 3080)
PASS: lldb-unit :: ValueObject/./LLDBValueObjectTests/8/11 (3076 of 3080)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/0/2 (3077 of 3080)
PASS: lldb-unit :: tools/lldb-server/tests/./LLDBServerTests/1/2 (3078 of 3080)
PASS: lldb-unit :: Process/gdb-remote/./ProcessGdbRemoteTests/8/35 (3079 of 3080)
TIMEOUT: lldb-api :: tools/lldb-dap/module/TestDAP_module.py (3080 of 3080)
******************** TEST 'lldb-api :: tools/lldb-dap/module/TestDAP_module.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/module -p TestDAP_module.py
--
Exit Code: -9
Timeout: Reached timeout of 600 seconds

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision a9ee8e4a454ec01fefba8829d2847527aa80623f)
  clang revision a9ee8e4a454ec01fefba8829d2847527aa80623f
  llvm revision a9ee8e4a454ec01fefba8829d2847527aa80623f

--
Command Output (stderr):
--
========= DEBUG ADAPTER PROTOCOL LOGS =========
1747765441.375190496 --> (stdin/stdout) {"command":"initialize","type":"request","arguments":{"adapterID":"lldb-native","clientID":"vscode","columnsStartAt1":true,"linesStartAt1":true,"locale":"en-us","pathFormat":"path","supportsRunInTerminalRequest":true,"supportsVariablePaging":true,"supportsVariableType":true,"supportsStartDebuggingRequest":true,"supportsProgressReporting":true,"$__lldb_sourceInitFile":false},"seq":1}
1747765441.379321814 <-- (stdin/stdout) {"body":{"$__lldb_version":"lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision a9ee8e4a454ec01fefba8829d2847527aa80623f)\n  clang revision a9ee8e4a454ec01fefba8829d2847527aa80623f\n  llvm revision a9ee8e4a454ec01fefba8829d2847527aa80623f","completionTriggerCharacters":["."," ","\t"],"exceptionBreakpointFilters":[{"default":false,"filter":"cpp_catch","label":"C++ Catch"},{"default":false,"filter":"cpp_throw","label":"C++ Throw"},{"default":false,"filter":"objc_catch","label":"Objective-C Catch"},{"default":false,"filter":"objc_throw","label":"Objective-C Throw"}],"supportTerminateDebuggee":true,"supportsBreakpointLocationsRequest":true,"supportsCancelRequest":true,"supportsCompletionsRequest":true,"supportsConditionalBreakpoints":true,"supportsConfigurationDoneRequest":true,"supportsDataBreakpoints":true,"supportsDelayedStackTraceLoading":true,"supportsDisassembleRequest":true,"supportsEvaluateForHovers":true,"supportsExceptionInfoRequest":true,"supportsExceptionOptions":true,"supportsFunctionBreakpoints":true,"supportsHitConditionalBreakpoints":true,"supportsInstructionBreakpoints":true,"supportsLogPoints":true,"supportsModulesRequest":true,"supportsReadMemoryRequest":true,"supportsRestartRequest":true,"supportsSetVariable":true,"supportsStepInTargetsRequest":true,"supportsSteppingGranularity":true,"supportsValueFormattingOptions":true},"command":"initialize","request_seq":1,"seq":0,"success":true,"type":"response"}
1747765441.379780293 --> (stdin/stdout) {"command":"launch","type":"request","arguments":{"program":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/module/TestDAP_module.test_compile_units/a.out","initCommands":["settings clear --all","settings set symbols.enable-external-lookup false","settings set target.inherit-tcc true","settings set target.disable-aslr false","settings set target.detach-on-error false","settings set target.auto-apply-fixits false","settings set plugin.process.gdb-remote.packet-timeout 60","settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"","settings set use-color false","settings set show-statusline false"],"disableASLR":true,"enableAutoVariableSummaries":false,"enableSyntheticChildDebugging":false,"displayExtendedBacktrace":false},"seq":2}
1747765441.380268812 <-- (stdin/stdout) {"body":{"category":"console","output":"Running initCommands:\n"},"event":"output","seq":0,"type":"event"}
1747765441.380329370 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings clear --all\n"},"event":"output","seq":0,"type":"event"}
1747765441.380344391 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set symbols.enable-external-lookup false\n"},"event":"output","seq":0,"type":"event"}
1747765441.380357027 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set target.inherit-tcc true\n"},"event":"output","seq":0,"type":"event"}
1747765441.380369663 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set target.disable-aslr false\n"},"event":"output","seq":0,"type":"event"}
1747765441.380380869 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set target.detach-on-error false\n"},"event":"output","seq":0,"type":"event"}
1747765441.380392790 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set target.auto-apply-fixits false\n"},"event":"output","seq":0,"type":"event"}
1747765441.380404234 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set plugin.process.gdb-remote.packet-timeout 60\n"},"event":"output","seq":0,"type":"event"}
1747765441.380459547 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"\n"},"event":"output","seq":0,"type":"event"}
1747765441.380471706 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set use-color false\n"},"event":"output","seq":0,"type":"event"}
1747765441.380483627 <-- (stdin/stdout) {"body":{"category":"console","output":"(lldb) settings set show-statusline false\n"},"event":"output","seq":0,"type":"event"}
1747765441.525060177 <-- (stdin/stdout) {"command":"launch","request_seq":2,"seq":0,"success":true,"type":"response"}
1747765441.525144100 <-- (stdin/stdout) {"body":{"module":{"addressRange":"0xf7fbf000","debugInfoSize":"983.3KB","id":"0D794E6C-AF7E-D8CB-B9BA-E385B4F8753F-5A793D65","name":"ld-linux-armhf.so.3","path":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolFilePath":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":0,"type":"event"}
1747765441.525234699 <-- (stdin/stdout) {"event":"initialized","seq":0,"type":"event"}
1747765441.525394678 <-- (stdin/stdout) {"body":{"module":{"addressRange":"0x400000","debugInfoSize":"1.1KB","id":"38E64BA9","name":"a.out","path":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/module/TestDAP_module.test_compile_units/a.out","symbolFilePath":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/module/TestDAP_module.test_compile_units/a.out","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":0,"type":"event"}
1747765441.525813580 --> (stdin/stdout) {"command":"setBreakpoints","type":"request","arguments":{"source":{"name":"main.cpp","path":"main.cpp"},"sourceModified":false,"lines":[5],"breakpoints":[{"line":5}]},"seq":3}
1747765441.533555746 <-- (stdin/stdout) {"body":{"breakpoints":[{"column":3,"id":1,"instructionReference":"0x41073C","line":5,"source":{"name":"main.cpp","path":"main.cpp"},"verified":true}]},"command":"setBreakpoints","request_seq":3,"seq":0,"success":true,"type":"response"}
1747765441.533858299 --> (stdin/stdout) {"command":"configurationDone","type":"request","arguments":{},"seq":4}

hubert-reinterpretcast · 2025-05-20T20:47:24Z

llvm/lib/Support/TextEncoding.cpp

+    EC = U_ZERO_ERROR;
+    const char *Input = In;
+
+    Output = InputLength ? static_cast<char *>(Result.data()) : nullptr;


Sorry, but I am not seeing the resolution to this. Which of the two options do you want to go with?

hubert-reinterpretcast · 2025-05-20T20:50:11Z

llvm/lib/Support/TextEncoding.cpp

+        } else
+          return std::error_code(E2BIG, std::generic_category());


Technically, Result.resize(Output - Result.data()) should still be run in this case.

hubert-reinterpretcast · 2025-05-20T21:00:39Z

llvm/lib/Support/TextEncoding.cpp

+    // Setup the input. Use nullptr to reset iconv state if input length is
+    // zero.


Comment is out-of-sync with the code.

llvm-ci · 2025-05-21T00:51:21Z

LLVM Buildbot has detected a new failure on builder clang-ppc64-aix running on aix-ppc64 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/64/builds/3700

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'lit :: timeout-hang.py' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 13
not env -u FILECHECK_OPTS "/home/llvm/llvm-external-buildbots/workers/env/bin/python3.11" /home/llvm/llvm-external-buildbots/workers/aix-ppc64/clang-ppc64-aix/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical Inputs/timeout-hang/run-nonexistent.txt  --timeout=1 --param external=0 | "/home/llvm/llvm-external-buildbots/workers/env/bin/python3.11" /home/llvm/llvm-external-buildbots/workers/aix-ppc64/clang-ppc64-aix/build/utils/lit/tests/timeout-hang.py 1
# executed command: not env -u FILECHECK_OPTS /home/llvm/llvm-external-buildbots/workers/env/bin/python3.11 /home/llvm/llvm-external-buildbots/workers/aix-ppc64/clang-ppc64-aix/llvm-project/llvm/utils/lit/lit.py -j1 --order=lexical Inputs/timeout-hang/run-nonexistent.txt --timeout=1 --param external=0
# .---command stderr------------
# | lit.py: /home/llvm/llvm-external-buildbots/workers/aix-ppc64/clang-ppc64-aix/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 1 seconds was requested on the command line. Forcing timeout to be 1 seconds.
# `-----------------------------
# executed command: /home/llvm/llvm-external-buildbots/workers/env/bin/python3.11 /home/llvm/llvm-external-buildbots/workers/aix-ppc64/clang-ppc64-aix/build/utils/lit/tests/timeout-hang.py 1
# .---command stdout------------
# | Testing took as long or longer than timeout
# `-----------------------------
# error: command failed with exit status: 1

--

********************

abhina-sree · 2025-05-22T11:52:41Z

Thanks for the review, I will create a separate PR to address these concerns and any future reviews as well

Create a CharSetConverter class with both iconv and icu support.

3192c7b

llvmbot added cmake Build system in general and CMake in particular llvm:support labels May 7, 2025

abhina-sree mentioned this pull request May 7, 2025

Enable fexec-charset option #138895

Open

abhina-sree self-assigned this May 7, 2025

abhina-sree mentioned this pull request May 7, 2025

Create a CharSetConverter class with both iconv and icu support #74516

Closed

abhina-sree requested review from redstar, cor3ntin, tahonermann, daltenty, hubert-reinterpretcast, AaronBallman, perry-ca and efriedma-quic May 7, 2025 15:36

s-barannikov reviewed May 7, 2025

View reviewed changes

address comments

6d40922

rename CharSetConverter to EncodingConverter

52635f2

abhina-sree force-pushed the users/abhina/charset_converter branch from 5e8d930 to 52635f2 Compare May 9, 2025 14:45

s-barannikov approved these changes May 9, 2025

View reviewed changes

hubert-reinterpretcast reviewed May 9, 2025

View reviewed changes

llvm/unittests/Support/CharSetTest.cpp Outdated Show resolved Hide resolved

cor3ntin reviewed May 12, 2025

View reviewed changes

address comments, rename CharSet to EncodingConverter

a39b13e

abhina-sree changed the title ~~Create a CharSetConverter class with both iconv and icu support.~~ Create a EncodingConverter class with both iconv and icu support. May 12, 2025

hubert-reinterpretcast reviewed May 15, 2025

View reviewed changes

llvm/include/llvm/Support/EncodingConverter.h Outdated Show resolved Hide resolved

llvm/CMakeLists.txt Outdated Show resolved Hide resolved

llvm/cmake/config-ix.cmake Outdated Show resolved Hide resolved

rename filename, class to use TextEncoding, address comments

b32b472

hubert-reinterpretcast reviewed May 20, 2025

View reviewed changes

llvm/unittests/Support/TextEncodingTest.cpp Outdated Show resolved Hide resolved

llvm/unittests/Support/CharSetTest.cpp Outdated Show resolved Hide resolved

llvm/unittests/Support/TextEncodingTest.cpp Outdated Show resolved Hide resolved

hubert-reinterpretcast reviewed May 20, 2025

View reviewed changes

llvm/unittests/Support/TextEncodingTest.cpp Show resolved Hide resolved

address latest comments

b9dab1f

abhina-sree force-pushed the users/abhina/charset_converter branch from 4b8b4fe to b9dab1f Compare May 20, 2025 15:32

abhina-sree merged commit a9ee8e4 into main May 20, 2025
11 checks passed

abhina-sree deleted the users/abhina/charset_converter branch May 20, 2025 18:02

hubert-reinterpretcast reviewed May 20, 2025

View reviewed changes

abhina-sree mentioned this pull request Jun 2, 2025

Improvements to TextEncodingConverter #142476

Open

		} else
		return std::error_code(E2BIG, std::generic_category());

		// Setup the input. Use nullptr to reset iconv state if input length is
		// zero.

Create a EncodingConverter class with both iconv and icu support. #138893

Create a EncodingConverter class with both iconv and icu support. #138893

Uh oh!

Conversation

abhina-sree commented May 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 7, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented May 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

s-barannikov left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

cor3ntin commented May 12, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

abhina-sree May 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

abhina-sree commented May 14, 2025

Uh oh!

hubert-reinterpretcast left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

hubert-reinterpretcast May 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

abhina-sree commented May 7, 2025 •

edited

Loading

github-actions bot commented May 9, 2025 •

edited

Loading

abhina-sree May 12, 2025 •

edited

Loading

hubert-reinterpretcast May 20, 2025 •

edited

Loading