Skip to content

Commit 0df31a8

Browse files
committed
MNN 1.0.0 release sync.
- Added Python Express API implemented with pbind11 - Added demos for Python Express API - Performance improvements for ARM64, ARMv8.2, x86. - README update.
1 parent dfd89f1 commit 0df31a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1366
-876
lines changed

3rd_party/flatbuffers/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ elseif(MSVC)
206206
# Visual Studio pedantic build settings
207207
# warning C4512: assignment operator could not be generated
208208
# warning C4316: object allocated on the heap may not be aligned
209-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4512 /wd4316")
209+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4512 /wd4316")
210210
endif()
211211

212212
if(FLATBUFFERS_CODE_COVERAGE)

CMakeLists.txt

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ option(MNN_USE_SYSTEM_LIB "For opencl and vulkan, use system lib or use dlopen"
5050
option(MNN_BUILD_HARD "Build -mfloat-abi=hard or not" OFF)
5151
option(MNN_BUILD_SHARED_LIBS "MNN build shared or static lib" ON)
5252
option(MNN_FORBID_MULTI_THREAD "Disable Multi Thread" OFF)
53-
option(MNN_OPENMP "Enable Multiple Thread Linux|Android" ON)
54-
option(MNN_USE_THREAD_POOL "Use Multiple Thread by Self ThreadPool" ON)
55-
option(MNN_BUILD_TRAIN "Build Train Tools" OFF)
53+
option(MNN_OPENMP "Use OpenMP's thread pool implementation. Does not work on iOS or Mac OS" OFF)
54+
option(MNN_USE_THREAD_POOL "Use MNN's own thread pool implementation" ON)
55+
option(MNN_BUILD_TRAIN "Build MNN's training framework" OFF)
5656
option(MNN_BUILD_DEMO "Build demo/exec or not" OFF)
5757
option(MNN_BUILD_TOOLS "Build tools/cpp or not" ON)
5858
option(MNN_BUILD_QUANTOOLS "Build Quantized Tools or not" OFF)
@@ -84,13 +84,13 @@ IF(MSVC OR WIN32)
8484
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
8585
ENDIF()
8686
SET(MNN_USE_SYSTEM_LIB ON CACHE BOOL "<docstring>" FORCE)
87-
87+
8888
# generate optimized (release) exe and library with pdb debug file, https://stackoverflow.com/a/31264946
8989
SET(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
9090
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
9191
SET(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
9292
SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
93-
93+
9494
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
9595
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4018 /wd4251 /wd4996 /wd4244 /wd4146 /wd4129 /wd4305 /wd4275")
9696
ENDIF()
@@ -343,6 +343,40 @@ list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Neur
343343
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Optimizer.hpp")
344344
list(APPEND MNN_EXPR_PUB_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/MNN/expr/Executor.hpp")
345345

346+
if (NOT APPLE)
347+
if(MNN_OPENMP)
348+
message(STATUS "[*] Checking OpenMP")
349+
find_package(OpenMP)
350+
# For CMake < 3.9, we need to make the target ourselves
351+
if(NOT TARGET OpenMP::OpenMP_CXX)
352+
find_package(Threads REQUIRED)
353+
add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
354+
set_property(TARGET OpenMP::OpenMP_CXX
355+
PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
356+
# Only works if the same flag is passed to the linker; use CMake 3.9+ otherwise (Intel, AppleClang)
357+
set_property(TARGET OpenMP::OpenMP_CXX
358+
PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
359+
endif()
360+
# TODO: Don't pollute global CFLAGS
361+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
362+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
363+
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
364+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
365+
if (WIN32)
366+
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
367+
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
368+
endif()
369+
FOREACH(TARGET ${MNN_TARGETS})
370+
target_link_libraries(${TARGET} OpenMP::OpenMP_CXX)
371+
IF(WIN32)
372+
target_compile_options(${TARGET} PUBLIC /openmp ${OpenMP_CXX_FLAGS} ${OpenMP_C_FLAGS})
373+
ELSE()
374+
target_compile_options(${TARGET} PUBLIC ${OpenMP_CXX_FLAGS} ${OpenMP_C_FLAGS})
375+
ENDIF()
376+
ENDFOREACH()
377+
endif()
378+
endif()
379+
346380
set(CMAKE_CXX_FLAGS_ORIGIN ${CMAKE_CXX_FLAGS})
347381
set(CMAKE_C_FLAGS_ORIGIN ${CMAKE_C_FLAGS})
348382
if ((NOT (MSVC OR WIN32)) AND MNN_HIDDEN)
@@ -477,7 +511,7 @@ if (NOT MNN_BUILD_SHARED_LIBS)
477511
if(APPLE)
478512
set(MNN_DEPS -Wl,-all_load ${MNN_DEPS} -Wl,-noall_load)
479513
elseif (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
480-
# Static-link will not replace thread-related weak symbol in glibc with strong symbol
514+
# Static-link will not replace thread-related weak symbol in glibc with strong symbol
481515
# in pthread library, so we need use --whole-archive to pthread
482516
# https://stackoverflow.com/questions/35116327/when-g-static-link-pthread-cause-segmentation-fault-why
483517
if(CMAKE_SYSTEM_NAME MATCHES "^Linux")
@@ -487,39 +521,6 @@ if (NOT MNN_BUILD_SHARED_LIBS)
487521
endif()
488522
endif()
489523
endif()
490-
if (NOT APPLE)
491-
if(MNN_OPENMP)
492-
message(STATUS "[*] Checking OpenMP")
493-
find_package(OpenMP)
494-
# For CMake < 3.9, we need to make the target ourselves
495-
if(NOT TARGET OpenMP::OpenMP_CXX)
496-
find_package(Threads REQUIRED)
497-
add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE)
498-
set_property(TARGET OpenMP::OpenMP_CXX
499-
PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS})
500-
# Only works if the same flag is passed to the linker; use CMake 3.9+ otherwise (Intel, AppleClang)
501-
set_property(TARGET OpenMP::OpenMP_CXX
502-
PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS} Threads::Threads)
503-
endif()
504-
# TODO: Don't pollute global CFLAGS
505-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
506-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
507-
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_SHARED_LINKER_FLAGS}")
508-
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
509-
if (WIN32)
510-
set(OpenMP_C_FLAGS "/openmp ${OpenMP_C_FLAGS}")
511-
set(OpenMP_CXX_FLAGS "/openmp ${OpenMP_CXX_FLAGS}")
512-
endif()
513-
FOREACH(TARGET ${MNN_TARGETS})
514-
target_link_libraries(${TARGET} OpenMP::OpenMP_CXX)
515-
IF(WIN32)
516-
target_compile_options(${TARGET} PUBLIC /openmp ${OpenMP_CXX_FLAGS} ${OpenMP_C_FLAGS})
517-
ELSE()
518-
target_compile_options(${TARGET} PUBLIC ${OpenMP_CXX_FLAGS} ${OpenMP_C_FLAGS})
519-
ENDIF()
520-
ENDFOREACH()
521-
endif()
522-
endif()
523524
list(APPEND MNN_TARGETS MNN)
524525
FOREACH(TARGET ${MNN_TARGETS})
525526
IF((NOT MSVC) AND (NOT WIN32))

MNN.podspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,4 @@ Pod::Spec.new do |s|
5858

5959
s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1'}
6060
s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
61-
end
61+
end

README.md

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,45 @@
22

33
[中文版本](README_CN.md)
44

5-
[![Build Status](https://travis-ci.com/alibaba/MNN.svg?branch=master)](https://travis-ci.com/alibaba/MNN)
6-
75
## Intro
8-
MNN is a lightweight deep neural network inference engine. It loads models and do inference on devices. At present, MNN has been integrated in more than 20 apps of Alibaba-inc, such as Taobao, Tmall, Youku and etc., covering live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control and other scenarios. In addition, MNN is also used on embedded devices, such as IoT.
6+
MNN is a highly efficient and lightweight deep learning framework. It supports inference and training of deep learning models, and has industry leading performance for inference and training on-device. At present, MNN has been integrated in more than 20 apps of Alibaba Inc, such as Taobao, Tmall, Youku, Dingtalk, Xianyu and etc., covering more than 70 usage scenarios such as live broadcast, short video capture, search recommendation, product searching by image, interactive marketing, equity distribution, security risk control. In addition, MNN is also used on embedded devices, such as IoT.
7+
8+
The design principles and performance data of MNN has been published in an MLSys 2020 paper [here](https://proceedings.mlsys.org/static/paper_files/mlsys/2020/7-Paper.pdf). Please cite MNN in your publications if it helps your research:
9+
10+
@inproceedings{alibaba2020mnn,
11+
author = {Jiang, Xiaotang and Wang, Huan and Chen, Yiliu and Wu, Ziqi and Wang, Lichuan and Zou, Bin and Yang, Yafeng and Cui, Zongyang and Cai, Yu and Yu, Tianhang and Lv, Chengfei and Wu, Zhihua},
12+
title = {MNN: A Universal and Efficient Inference Engine},
13+
booktitle = {MLSys},
14+
year = {2020}
15+
}
16+
17+
## Documentation
18+
MNN's docs are in placed in [Yuque docs here](https://www.yuque.com/mnn/en).
19+
20+
## Key Features
21+
### High performance
22+
- Implements core computing with lots of optimized assembly code to make full use of the ARM CPU.
23+
- For iOS, GPU acceleration (Metal) can be turned on, which is faster than Apple's native CoreML.
24+
- For Android, `OpenCL`, `Vulkan`, and `OpenGL` are available and deep tuned for mainstream GPUs (`Adreno` and `Mali`).
25+
- Convolution and transposition convolution algorithms are efficient and stable. The Winograd convolution algorithm is widely used to better symmetric convolutions such as 3x3 -> 7x7.
26+
- Twice speed increase for the new architecture ARM v8.2 with FP16 half-precision calculation support.
927

10-
## Features
1128
### Lightweight
1229
- Optimized for devices, no dependencies, can be easily deployed to mobile devices and a variety of embedded devices.
1330
- iOS platform: static library size for armv7+arm64 platforms is about 5MB, size increase of linked executables is about 620KB, and metallib file is about 600KB.
1431
- Android platform: core so size is about 400KB, OpenCL so is about 400KB, Vulkan so is about 400KB.
1532

1633
### Versatility
1734
- Supports `Tensorflow`, `Caffe`, `ONNX`, and supports common neural networks such as `CNN`, `RNN`, `GAN`.
18-
- Supports 86 `Tensorflow` ops, 34 `Caffe` ops; MNN ops: 71 for CPU, 55 for Metal, 29 for OpenCL, and 31 for Vulkan.
35+
- MNN model converter supports 149 `Tensorflow` OPs, 58 `TFLite` OPs, 47 `Caffe` OPs and 74 `ONNX` OPs; Number of OPs by different MNN hardware backends: 111 for CPU, 6 for ARM V8.2, 55 for Metal, 43 for OpenCL, and 32 for Vulkan.
1936
- Supports iOS 8.0+, Android 4.3+ and embedded devices with POSIX interface.
20-
- Supports hybrid computing on multiple devices. Currently supports CPU and GPU. GPU op plugin can be loaded dynamically to replace default (CPU) op implementation.
37+
- Supports hybrid computing on multiple devices. Currently supports CPU and GPU.
2138

22-
### High performance
23-
- Implements core computing with lots of optimized assembly code to make full use of the ARM CPU.
24-
- For iOS, GPU acceleration (Metal) can be turned on, which is faster than Apple's native CoreML.
25-
- For Android, `OpenCL`, `Vulkan`, and `OpenGL` are available and deep tuned for mainstream GPUs (`Adreno` and `Mali`).
26-
- Convolution and transposition convolution algorithms are efficient and stable. The Winograd convolution algorithm is widely used to better symmetric convolutions such as 3x3 -> 7x7.
27-
- Additional optimizations for the new architecture ARM v8.2 with half-precision calculation support.
28-
29-
### Easy to use
39+
### Ease of use
3040
- Efficient image processing module, speeding up affine transform and color space transform without libyuv or opencv.
3141
- Provides callbacks throughout the workflow to extract data or control the execution precisely.
3242
- Provides options for selecting inference branch and paralleling branches on CPU and GPU.
43+
- (BETA) MNN Python API helps ML engineers to easily use MNN to build a model, train it and quantize it, without dipping their toes in C++ code.
3344

3445
## Architecture
3546
![architecture](doc/architecture.png)
@@ -40,48 +51,23 @@ Converter consists of Frontends and Graph Optimize. The former is responsible fo
4051

4152
Interpreter consists of Engine and Backends. The former is responsible for the loading of the model and the scheduling of the calculation graph; the latter includes the memory allocation and the Op implementation under each computing device. In Engine and Backends, MNN applies a variety of optimization schemes, including applying Winograd algorithm in convolution and deconvolution, applying Strassen algorithm in matrix multiplication, low-precision calculation, Neon optimization, hand-written assembly, multi-thread optimization, memory reuse, heterogeneous computing, etc.
4253

43-
## Paper
44-
Please cite MNN in your publications if it helps your research:
45-
46-
@inproceedings{alibaba2020mnn,
47-
author = {Jiang, Xiaotang and Wang, Huan and Chen, Yiliu and Wu, Ziqi and Wang, Lichuan and Zou, Bin and Yang, Yafeng and Cui, Zongyang and Cai, Yu and Yu, Tianhang and Lv, Chengfei and Wu, Zhihua},
48-
title = {MNN: A Universal and Efficient Inference Engine},
49-
booktitle = {MLSys},
50-
year = {2020}
51-
}
52-
53-
## Quick start
54-
- [Usage](https://www.yuque.com/mnn/en/usage)
55-
- [Supported Operators](https://www.yuque.com/mnn/en/ops)
56-
- [API](doc/API/API_index.html)
57-
- [Demo](https://www.yuque.com/mnn/en/demo_zoo)
58-
59-
## Tools
60-
- [Test tools](https://www.yuque.com/mnn/en/tool_test)
61-
- [Benchmark](https://www.yuque.com/mnn/en/tool_benchmark)
62-
- [Model compress](https://www.yuque.com/mnn/en/tool_quantize)
54+
## How to Discuss and Get Help From MNN Community
6355

64-
## Customizing
65-
- [Customizing backend](https://www.yuque.com/mnn/en/customize_backend)
66-
- [Customizing operator](https://www.yuque.com/mnn/en/customize_op)
67-
- [Contributing](https://www.yuque.com/mnn/en/contribute)
56+
Scan the following QR codes to join Dingtalk discussion group. The group discussions are predominantly Chinese. But we welcome and will help English speakers.
6857

69-
## How to use python interface
70-
- [Python](https://www.yuque.com/mnn/en/usage_in_python)
58+
Group #1 (Full)
7159

72-
## Feedbacks
73-
- [FAQ](https://www.yuque.com/mnn/en/faq)
60+
<img src="doc/DingTalkQR1.png" height="256"/>
7461

75-
Scan QR code to join DingDing discussion group.
62+
Group #2:
7663

77-
<img src="doc/DingTalkQR1.png" height="256"/>
7864
<img src="doc/DingTalkQR2.png" height="256"/>
7965

8066
## License
8167
Apache 2.0
8268

8369
## Acknowledgement
84-
MNN participants: Taobao Technology Department, Search Engineering Team, DAMO Team, Youku and other group employees.
70+
MNN participants: Taobao Technology Department, Search Engineering Team, DAMO Team, Youku and other Alibaba Group employees.
8571

8672
MNN refers to the following projects:
8773
- [Caffe](https://github.com/BVLC/caffe)

0 commit comments

Comments
 (0)