Skip to content

Commit 4a11426

Browse files
whisper : add OpenVINO support (ggml-org#1037)
* openvino: use OpenVINO encoder inference * openvino: add python script for OpenVINO model generation * whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build * Apply suggestions from code review Co-authored-by: Georgi Gerganov <[email protected]> * whisper: Fix compilation error * whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures * cmake: Add openvino-encoder as separate object target * whisper : minor style fixes * minor : indentation fixes --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 7303033 commit 4a11426

8 files changed

+367
-3
lines changed

CMakeLists.txt

+28
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ option(WHISPER_NO_AVX2 "whisper: disable AVX2" OFF)
5454
option(WHISPER_NO_FMA "whisper: disable FMA" OFF)
5555
option(WHISPER_NO_F16C "whisper: disable F16c" OFF)
5656

57+
option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
58+
5759
if (APPLE)
5860
option(WHISPER_NO_ACCELERATE "whisper: disable Accelerate framework" OFF)
5961
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
@@ -192,6 +194,10 @@ if (WHISPER_CLBLAST)
192194
endif()
193195
endif()
194196

197+
if( WHISPER_OPENVINO )
198+
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
199+
endif()
200+
195201
# compiler flags
196202

197203
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -297,6 +303,24 @@ if (WHISPER_COREML)
297303
)
298304
endif()
299305

306+
if (WHISPER_OPENVINO)
307+
set(TARGET whisper.openvino)
308+
309+
add_library(${TARGET} OBJECT
310+
openvino/whisper-openvino-encoder.h
311+
openvino/whisper-openvino-encoder.cpp
312+
)
313+
314+
target_include_directories(${TARGET} PUBLIC
315+
.
316+
)
317+
318+
set_property(TARGET ${TARGET} PROPERTY POSITION_INDEPENDENT_CODE ON)
319+
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DWHISPER_USE_OPENVINO)
320+
321+
target_link_libraries(${TARGET} PRIVATE openvino::runtime)
322+
endif()
323+
300324
#
301325
# whisper - this is the main library of the project
302326
#
@@ -322,6 +346,10 @@ if (WHISPER_COREML)
322346
target_link_libraries(${TARGET} PRIVATE whisper.coreml)
323347
endif()
324348

349+
if (WHISPER_OPENVINO)
350+
target_link_libraries(${TARGET} PRIVATE whisper.openvino)
351+
endif()
352+
325353
if (MSVC)
326354
target_link_libraries(${TARGET} PRIVATE ${WHISPER_EXTRA_LIBS} ${CMAKE_THREAD_LIBS_INIT})
327355

examples/main/main.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ struct whisper_params {
9595
// [TDRZ] speaker turn string
9696
std::string tdrz_speaker_turn = " [SPEAKER_TURN]"; // TODO: set from command line
9797

98+
std::string openvino_encode_device = "CPU";
99+
98100
std::vector<std::string> fname_inp = {};
99101
std::vector<std::string> fname_out = {};
100102
};
@@ -155,6 +157,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
155157
else if ( arg == "--prompt") { params.prompt = argv[++i]; }
156158
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
157159
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
160+
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
158161
else {
159162
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
160163
whisper_print_usage(argc, argv, params);
@@ -207,6 +210,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
207210
fprintf(stderr, " --prompt PROMPT [%-7s] initial prompt\n", params.prompt.c_str());
208211
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
209212
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
213+
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
210214
fprintf(stderr, "\n");
211215
}
212216

@@ -809,6 +813,9 @@ int main(int argc, char ** argv) {
809813
return 3;
810814
}
811815

816+
// initialize openvino encoder. This has no effect on whisper.cpp builds that don't have OpenVINO configured.
817+
whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr);
818+
812819
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
813820
const auto fname_inp = params.fname_inp[f];
814821
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];

models/convert-whisper-to-openvino.py

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import argparse
2+
import torch
3+
from whisper import load_model
4+
import os
5+
from openvino.tools import mo
6+
from openvino.runtime import serialize
7+
import shutil
8+
9+
def convert_encoder(hparams, encoder, mname):
10+
encoder.eval()
11+
12+
mel = torch.zeros((1, 80, 3000))
13+
14+
onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder")
15+
16+
#create a directory to store the onnx model, and other collateral that is saved during onnx export procedure
17+
if not os.path.isdir(onnx_folder):
18+
os.makedirs(onnx_folder)
19+
20+
onnx_path = os.path.join(onnx_folder, "whisper_encoder.onnx")
21+
22+
torch.onnx.export(
23+
encoder,
24+
mel,
25+
onnx_path,
26+
input_names=["mel"],
27+
output_names=["output_features"]
28+
)
29+
30+
# use model optimizer to convert onnx to OpenVINO IR format
31+
encoder_model = mo.convert_model(onnx_path, compress_to_fp16=True)
32+
serialize(encoder_model, xml_path='ggml-' + mname + '-encoder-openvino.xml')
33+
34+
#cleanup
35+
if os.path.isdir(onnx_folder):
36+
shutil.rmtree(onnx_folder)
37+
38+
39+
if __name__ == "__main__":
40+
parser = argparse.ArgumentParser()
41+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True)
42+
args = parser.parse_args()
43+
44+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]:
45+
raise ValueError("Invalid model name")
46+
47+
whisper = load_model(args.model).cpu()
48+
hparams = whisper.dims
49+
50+
encoder = whisper.encoder
51+
52+
# Convert encoder to onnx
53+
convert_encoder(hparams, encoder, args.model)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
openvino-dev[pytorch,onnx]
2+
openai-whisper

openvino/whisper-openvino-encoder.cpp

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#include "openvino/whisper-openvino-encoder.h"
2+
#include "ggml.h"
3+
#include <openvino/openvino.hpp>
4+
#include <iostream>
5+
6+
struct whisper_openvino_context {
7+
ov::InferRequest inferRequest;
8+
};
9+
10+
struct whisper_openvino_context * whisper_openvino_init(const char* path_model,
11+
const char* device,
12+
const char* cache_dir)
13+
{
14+
if (!path_model || !device) {
15+
fprintf(stderr, "%s: path_model and/or device is null\n", __func__);
16+
return nullptr;
17+
}
18+
19+
fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n",
20+
__func__, path_model, device, cache_dir ? cache_dir : "(not set)");
21+
22+
whisper_openvino_context *context = new whisper_openvino_context;
23+
try {
24+
ov::Core core;
25+
26+
if (cache_dir) {
27+
// enables caching of device-specific 'blobs' during core.compile_model
28+
// routine. This speeds up calls to compile_model for successive runs.
29+
core.set_property(ov::cache_dir(cache_dir));
30+
}
31+
32+
//Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
33+
std::shared_ptr<ov::Model> model = core.read_model(path_model);
34+
35+
// Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
36+
auto compiledModel = core.compile_model(model, device);
37+
38+
// From the compiled model object, create an infer request. This is the thing that we
39+
// we will use later on to trigger inference execution.
40+
context->inferRequest = compiledModel.create_infer_request();
41+
}
42+
catch (const std::exception& error) {
43+
std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl;
44+
delete context;
45+
context = nullptr;
46+
}
47+
48+
return context;
49+
}
50+
51+
void whisper_openvino_free(struct whisper_openvino_context * ctx) {
52+
if( ctx ) {
53+
delete ctx;
54+
}
55+
}
56+
57+
int whisper_openvino_encode(
58+
whisper_openvino_context* ctx,
59+
ggml_tensor* mel,
60+
ggml_tensor* out) {
61+
62+
if (!ctx || !mel || !out) {
63+
fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__);
64+
return 0;
65+
}
66+
67+
if (mel->n_dims != 2) {
68+
fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
69+
__func__, mel->n_dims);
70+
return 0;
71+
}
72+
73+
if (out->n_dims != 2) {
74+
fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n",
75+
__func__, out->n_dims);
76+
return 0;
77+
}
78+
79+
try {
80+
81+
//wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
82+
{
83+
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
84+
ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] };
85+
ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] };
86+
ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides);
87+
ctx->inferRequest.set_input_tensor(input_tensor);
88+
}
89+
90+
//wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
91+
{
92+
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
93+
ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] };
94+
ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] };
95+
ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides);
96+
ctx->inferRequest.set_output_tensor(out_tensor);
97+
}
98+
99+
//run inference
100+
ctx->inferRequest.infer();
101+
}
102+
catch (const std::exception& error) {
103+
std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl;
104+
return 0;
105+
}
106+
107+
return 1;
108+
}

openvino/whisper-openvino-encoder.h

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Wrapper of the OpenVINO Whisper Encoder model
2+
//
3+
4+
#if __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
struct whisper_openvino_context;
9+
10+
// initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and
11+
// path to cache_dir. Returns null upon failure.
12+
struct whisper_openvino_context * whisper_openvino_init(const char * path_model,
13+
const char * device,
14+
const char * cache_dir);
15+
16+
// clean up a ctx previously returned from whisper_openvino_init()
17+
void whisper_openvino_free(struct whisper_openvino_context * ctx);
18+
19+
struct ggml_tensor;
20+
21+
// Perform encode using OpenVINO.
22+
// Returns 1 on success
23+
// Returns 0 on failure
24+
int whisper_openvino_encode(
25+
whisper_openvino_context* ctx,
26+
ggml_tensor* mel,
27+
ggml_tensor* out);
28+
29+
#if __cplusplus
30+
}
31+
#endif

0 commit comments

Comments
 (0)