Skip to content

[Bug]: 使用预编译好的Android-lite-arm64-v8a 导致Segmentation fault #5682

Closed
@luchangli03

Description

@luchangli03

软件环境

我直接从https://github.com/PaddlePaddle/PaddleNLP/blob/develop/fast_tokenizer/docs/cpp/README.md下载fast_tokenizer-lite-android-arm64-v8a-1.0.2.tgz,解压,创建一个简单测试工程
main.cpp:
'''
#include <iostream>
#include <vector>
#include <string>

#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
using namespace paddlenlp;

int main() {

  printf("step 1\n");
  std::string str1 = "hello world";

  std::cout<<str1<<std::endl;

  // // // 1. Define a ernie fast tokenizer
  // fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer("vocab.txt");
  // printf("step 2\n");

  // // case 3: Tokenize a batch of single strings
  // // std::cout << "Tokenize a batch of single strings" << std::endl;

  // printf("Tokenize a batch of single strings\n");

  // std::vector<std::string> strings_list = {"通过中介公司买了二手房,首付都付了,现在卖家不想卖了。怎么处理?",
  //                                          "凌云研发的国产两轮电动车怎么样,有什么惊喜?",
  //                                          "一辆车的寿命到底多长,最多可以开多久?"};

  // std::vector<fast_tokenizer::core::Encoding> encodings;

  // tokenizer.EncodeBatchStrings(strings_list, &encodings);
  // for (auto&& encoding : encodings) {
  //   std::cout << encoding.DebugString() << std::endl;
  // }

  return 0;
}
'''
CMakeLists.txt
'''
cmake_minimum_required(VERSION 3.10)

project(cmake_study LANGUAGES CXX)

# used to reduce binary size
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")

add_executable(
    fast_tok_test
    main.cpp
)

target_include_directories(
    fast_tok_test
    PUBLIC
   /root/codes/engine/fast_tokenizer_test/fast_tokenizer-lite-android-arm64-v8a-1.0.2/include/
   /root/codes/engine/fast_tokenizer_test/fast_tokenizer-lite-android-arm64-v8a-1.0.2/third_party/include/
)

target_link_libraries(
    fast_tok_test
    PUBLIC 
    /root/codes/engine/fast_tokenizer_test/fast_tokenizer-lite-android-arm64-v8a-1.0.2/lib/libcore_tokenizers.so
)
'''
build.sh
'''
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

cd ${SCRIPT_DIR}

# rm -rf build
mkdir build
cd build

export SDK_PATH=/root/codes/android_sdk
export NDK_PATH=/root/codes/android_sdk/ndk/24.0.8215888/

ANDROID_ABI=arm64-v8a
MINSDKVERSION=29

${SDK_PATH}/cmake/3.18.1/bin/cmake \
    -DCMAKE_BUILD_TYPE=Release \
    -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake \
    -DANDROID_ABI=${ANDROID_ABI} \
    -DANDROID_NDK=${NDK_PATH} \
    -DANDROID_PLATFORM=android-${MINSDKVERSION} \
    -DCMAKE_ANDROID_ARCH_ABI=${ANDROID_ABI} \
    -DCMAKE_ANDROID_NDK=${NDK_PATH} \
    -DCMAKE_MAKE_PROGRAM=$SDK_PATH/cmake/3.18.1/bin/ninja \
    -DCMAKE_SYSTEM_NAME=Android \
    -DCMAKE_SYSTEM_VERSION=${MINSDKVERSION} \
    -DANDROID_STL=c++_static \
    -GNinja \
    ..

${SDK_PATH}/cmake/3.18.1/bin/ninja

adb push fast_tok_test $DEV_DIR
'''

重复问题

  • I have searched the existing issues

错误描述

只要main.cpp里面使用std::string,程序启动直接Segmentation fault

稳定复现步骤 & 代码

参考上面描述。

Metadata

Metadata

Labels

bugSomething isn't workingtriage

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions