1
+ #include " openvino/whisper-openvino-encoder.h"
2
+ #include " ggml.h"
3
+ #include < openvino/openvino.hpp>
4
+ #include < iostream>
5
+
6
+ struct whisper_openvino_context {
7
+ ov::InferRequest inferRequest;
8
+ };
9
+
10
+ struct whisper_openvino_context * whisper_openvino_init (const char * path_model,
11
+ const char * device,
12
+ const char * cache_dir)
13
+ {
14
+ if (!path_model || !device) {
15
+ fprintf (stderr, " %s: path_model and/or device is null\n " , __func__);
16
+ return nullptr ;
17
+ }
18
+
19
+ fprintf (stderr, " %s: path_model = %s, device = %s, cache_dir = %s\n " ,
20
+ __func__, path_model, device, cache_dir ? cache_dir : " (not set)" );
21
+
22
+ whisper_openvino_context *context = new whisper_openvino_context;
23
+ try {
24
+ ov::Core core;
25
+
26
+ if (cache_dir) {
27
+ // enables caching of device-specific 'blobs' during core.compile_model
28
+ // routine. This speeds up calls to compile_model for successive runs.
29
+ core.set_property (ov::cache_dir (cache_dir));
30
+ }
31
+
32
+ // Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object.
33
+ std::shared_ptr<ov::Model> model = core.read_model (path_model);
34
+
35
+ // Produce a compiled-model object, given the device ("CPU", "GPU", etc.)
36
+ auto compiledModel = core.compile_model (model, device);
37
+
38
+ // From the compiled model object, create an infer request. This is the thing that we
39
+ // we will use later on to trigger inference execution.
40
+ context->inferRequest = compiledModel.create_infer_request ();
41
+ }
42
+ catch (const std::exception & error) {
43
+ std::cout << " in openvino encoder compile routine: exception: " << error.what () << std::endl;
44
+ delete context;
45
+ context = nullptr ;
46
+ }
47
+
48
+ return context;
49
+ }
50
+
51
+ void whisper_openvino_free (struct whisper_openvino_context * ctx) {
52
+ if ( ctx ) {
53
+ delete ctx;
54
+ }
55
+ }
56
+
57
+ int whisper_openvino_encode (
58
+ whisper_openvino_context* ctx,
59
+ ggml_tensor* mel,
60
+ ggml_tensor* out) {
61
+
62
+ if (!ctx || !mel || !out) {
63
+ fprintf (stderr, " %s: Error! ctx / mel / out is null\n " , __func__);
64
+ return 0 ;
65
+ }
66
+
67
+ if (mel->n_dims != 2 ) {
68
+ fprintf (stderr, " %s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n " ,
69
+ __func__, mel->n_dims );
70
+ return 0 ;
71
+ }
72
+
73
+ if (out->n_dims != 2 ) {
74
+ fprintf (stderr, " %s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n " ,
75
+ __func__, out->n_dims );
76
+ return 0 ;
77
+ }
78
+
79
+ try {
80
+
81
+ // wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request
82
+ {
83
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
84
+ ov::Shape input_shape = { 1 , (unsigned long long )mel->ne [1 ], (unsigned long long )mel->ne [0 ] };
85
+ ov::Strides input_strides = { mel->nb [2 ], mel->nb [1 ], mel->nb [0 ] };
86
+ ov::Tensor input_tensor (ov::element::f32, input_shape, mel->data , input_strides);
87
+ ctx->inferRequest .set_input_tensor (input_tensor);
88
+ }
89
+
90
+ // wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request
91
+ {
92
+ // note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays
93
+ ov::Shape output_shape = { 1 , (unsigned long long )out->ne [1 ], (unsigned long long )out->ne [0 ] };
94
+ ov::Strides output_strides = { out->nb [2 ], out->nb [1 ], out->nb [0 ] };
95
+ ov::Tensor out_tensor (ov::element::f32, output_shape, out->data , output_strides);
96
+ ctx->inferRequest .set_output_tensor (out_tensor);
97
+ }
98
+
99
+ // run inference
100
+ ctx->inferRequest .infer ();
101
+ }
102
+ catch (const std::exception & error) {
103
+ std::cout << " in openvino encode inference execution routine: exception: " << error.what () << std::endl;
104
+ return 0 ;
105
+ }
106
+
107
+ return 1 ;
108
+ }
0 commit comments