|
22 | 22 | * derived from https://os.mbed.com/teams/ST/code/DISCO-F746NG_AUDIO_demo |
23 | 23 | */ |
24 | 24 |
|
25 | | -#include "kws.h" |
26 | | -#include "AUDIO_DISCO_F746NG.h" |
| 25 | +#include "kws_f746ng.h" |
| 26 | +#include "plot_utils.h" |
27 | 27 | #include "LCD_DISCO_F746NG.h" |
28 | 28 |
|
29 | | -#define LCD_COLOR_ARM_BLUE ((uint32_t) 0xFF00C1DE) |
30 | | -#define LCD_COLOR_ARM_DARK ((uint32_t) 0xFF333E48) |
31 | | - |
32 | | -AUDIO_DISCO_F746NG audio; |
33 | 29 | LCD_DISCO_F746NG lcd; |
34 | 30 | Serial pc(USBTX, USBRX); |
| 31 | +KWS_F746NG *kws; |
35 | 32 | Timer T; |
36 | 33 |
|
37 | | -char lcd_output_string[256]; |
38 | | -char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down","left","right","on","off","stop","go"}; |
39 | | -int current_pixel_location=0; |
40 | | -uint32_t mfcc_plot_buffer[NUM_MFCC_COEFFS*NUM_FRAMES*10]; |
41 | | -int mfcc_update_counter=0; |
42 | | - |
43 | | - |
44 | | -/* |
45 | | - * The audio recording works with two windows, each of size 80 ms. |
46 | | - * The data for each window will be tranfered by the DMA, which sends |
47 | | - * sends an interrupt after the transfer is completed. |
48 | | - */ |
49 | | - |
50 | | -/* AUDIO_BLOCK_SIZE is the number of audio samples in each recording window */ |
51 | | -#define AUDIO_BLOCK_SIZE (2*FRAME_LEN) |
| 34 | +char lcd_output_string[64]; |
| 35 | +char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down", |
| 36 | + "left","right","on","off","stop","go"}; |
| 37 | +// Tune the following three parameters to improve the detection accuracy |
| 38 | +// and reduce false positives |
| 39 | +// Longer averaging window and higher threshold reduce false positives |
| 40 | +// but increase detection latency and reduce true positive detections. |
52 | 41 |
|
53 | | -int16_t audio_io_buffer[AUDIO_BLOCK_SIZE*8]; //2 (L/R) channels x 2 input/output x 2 for ping-pong buffer |
54 | | -int16_t audio_buffer[AUDIO_BLOCK_SIZE]; |
| 42 | +// (recording_win*frame_shift) is the actual recording window size |
| 43 | +int recording_win = 3; |
| 44 | +// Averaging window for smoothing out the output predictions |
| 45 | +int averaging_window_len = 3; |
| 46 | +int detection_threshold = 90; //in percent |
55 | 47 |
|
56 | | -int16_t* AUDIO_BUFFER_IN = audio_io_buffer; |
57 | | -int16_t* AUDIO_BUFFER_OUT = (AUDIO_BUFFER_IN + (AUDIO_BLOCK_SIZE * 4)); |
58 | | - |
59 | | -q7_t scratch_buffer[SCRATCH_BUFFER_SIZE]; |
60 | | -KWS *kws; |
61 | | - |
62 | | -static uint8_t SetSysClock_PLL_HSE_200MHz(); |
63 | 48 | void run_kws(); |
64 | | -void plot_mfcc(q7_t* mfcc_buffer); |
65 | | -void plot_waveform(); |
66 | | -uint32_t calculate_rgb(int min, int max, int value); |
67 | 49 |
|
68 | 50 | int main() |
69 | 51 | { |
70 | | - SetSysClock_PLL_HSE_200MHz(); |
71 | 52 | pc.baud(9600); |
72 | | - |
73 | | - kws = new KWS(audio_buffer,scratch_buffer); |
74 | | - |
75 | | - lcd.Clear(LCD_COLOR_ARM_BLUE); |
76 | | - lcd.SetBackColor(LCD_COLOR_ARM_BLUE); |
77 | | - lcd.DisplayStringAt(0, LINE(1), (uint8_t *)"Keyword Spotting Example", CENTER_MODE); |
78 | | - wait(1); |
79 | | - lcd.Clear(LCD_COLOR_ARM_BLUE); |
80 | | - lcd.SetBackColor(LCD_COLOR_ARM_BLUE); |
81 | | - lcd.SetTextColor(LCD_COLOR_WHITE); |
82 | | - |
83 | | - int size_x, size_y; |
84 | | - size_x = lcd.GetXSize(); |
85 | | - size_y = lcd.GetYSize(); |
86 | | - lcd.FillRect(0, 0, size_x, size_y/3); |
87 | | - |
88 | | - /* Initialize buffers */ |
89 | | - memset(AUDIO_BUFFER_IN, 0, AUDIO_BLOCK_SIZE*8); |
90 | | - memset(AUDIO_BUFFER_OUT, 0, AUDIO_BLOCK_SIZE*8); |
91 | | - |
92 | | - /* May need to adjust volume to get better accuracy/user-experience */ |
93 | | - audio.IN_SetVolume(80); |
94 | | - |
95 | | - /* Start Recording */ |
96 | | - audio.IN_Record((uint16_t*)AUDIO_BUFFER_IN, AUDIO_BLOCK_SIZE * 4); |
97 | | - |
98 | | - /* Start Playback for listening to what is being classified */ |
99 | | - audio.OUT_SetAudioFrameSlot(CODEC_AUDIOFRAME_SLOT_02); |
100 | | - audio.OUT_Play((uint16_t*)AUDIO_BUFFER_OUT, AUDIO_BLOCK_SIZE * 8); |
| 53 | + kws = new KWS_F746NG(recording_win,averaging_window_len); |
| 54 | + init_plot(); |
| 55 | + kws->start_kws(); |
101 | 56 |
|
102 | 57 | T.start(); |
103 | 58 |
|
104 | 59 | while (1) { |
105 | 60 | /* A dummy loop to wait for the interrupts. Feature extraction and |
106 | 61 | neural network inference are done in the interrupt service routine. */ |
| 62 | + __WFI(); |
107 | 63 | } |
108 | 64 | } |
109 | 65 |
|
| 66 | + |
110 | 67 | /* |
111 | | - * Manages the DMA Transfer complete interrupt. |
| 68 | + * The audio recording works with two ping-pong buffers. |
| 69 | + * The data for each window will be tranfered by the DMA, which sends |
| 70 | + * sends an interrupt after the transfer is completed. |
112 | 71 | */ |
| 72 | + |
| 73 | +// Manages the DMA Transfer complete interrupt. |
113 | 74 | void BSP_AUDIO_IN_TransferComplete_CallBack(void) |
114 | 75 | { |
115 | | - arm_copy_q7((q7_t *)AUDIO_BUFFER_IN + AUDIO_BLOCK_SIZE*4, (q7_t *)AUDIO_BUFFER_OUT + AUDIO_BLOCK_SIZE*4, AUDIO_BLOCK_SIZE*4); |
| 76 | + arm_copy_q7((q7_t *)kws->audio_buffer_in + kws->audio_block_size*4, (q7_t *)kws->audio_buffer_out + kws->audio_block_size*4, kws->audio_block_size*4); |
| 77 | + if(kws->frame_len != kws->frame_shift) { |
| 78 | + //copy the last (frame_len - frame_shift) audio data to the start |
| 79 | + arm_copy_q7((q7_t *)kws->audio_buffer, (q7_t *)(kws->audio_buffer)+2*(kws->audio_buffer_size-(kws->frame_len-kws->frame_shift)), 2*(kws->frame_len-kws->frame_shift)); |
| 80 | + } |
116 | 81 | // copy the new recording data |
117 | | - for (int i=0;i<AUDIO_BLOCK_SIZE;i++) { |
118 | | - audio_buffer[i] = AUDIO_BUFFER_IN[2*AUDIO_BLOCK_SIZE+i*2]; |
| 82 | + for (int i=0;i<kws->audio_block_size;i++) { |
| 83 | + kws->audio_buffer[kws->frame_len-kws->frame_shift+i] = kws->audio_buffer_in[2*kws->audio_block_size+i*2]; |
119 | 84 | } |
120 | 85 | run_kws(); |
121 | 86 | return; |
122 | 87 | } |
123 | 88 |
|
124 | | -/* |
125 | | - * Manages the DMA Half Transfer complete interrupt. |
126 | | - */ |
| 89 | +// Manages the DMA Half Transfer complete interrupt. |
127 | 90 | void BSP_AUDIO_IN_HalfTransfer_CallBack(void) |
128 | 91 | { |
129 | | - arm_copy_q7((q7_t *)AUDIO_BUFFER_IN, (q7_t *)AUDIO_BUFFER_OUT, AUDIO_BLOCK_SIZE*4); |
| 92 | + arm_copy_q7((q7_t *)kws->audio_buffer_in, (q7_t *)kws->audio_buffer_out, kws->audio_block_size*4); |
| 93 | + if(kws->frame_len!=kws->frame_shift) { |
| 94 | + //copy the last (frame_len - frame_shift) audio data to the start |
| 95 | + arm_copy_q7((q7_t *)kws->audio_buffer, (q7_t *)(kws->audio_buffer)+2*(kws->audio_buffer_size-(kws->frame_len-kws->frame_shift)), 2*(kws->frame_len-kws->frame_shift)); |
| 96 | + } |
130 | 97 | // copy the new recording data |
131 | | - for (int i=0;i<AUDIO_BLOCK_SIZE;i++) { |
132 | | - audio_buffer[i] = AUDIO_BUFFER_IN[i*2]; |
| 98 | + for (int i=0;i<kws->audio_block_size;i++) { |
| 99 | + kws->audio_buffer[kws->frame_len-kws->frame_shift+i] = kws->audio_buffer_in[i*2]; |
133 | 100 | } |
134 | 101 | run_kws(); |
135 | 102 | return; |
136 | 103 | } |
137 | 104 |
|
138 | 105 | void run_kws() |
139 | 106 | { |
140 | | - |
141 | | - //Averaging window for smoothing out the output predictions |
142 | | - int averaging_window_len = 3; //i.e. average over 3 inferences or 240ms |
143 | | - int detection_threshold = 70; //in percent |
144 | | - |
145 | | - int start = T.read_us(); |
146 | | - kws->extract_features(2); //extract mfcc features |
147 | | - kws->classify(); //classify using dnn |
148 | | - kws->average_predictions(averaging_window_len); |
149 | | - |
| 107 | + kws->extract_features(); //extract mfcc features |
| 108 | + kws->classify(); //classify using dnn |
| 109 | + kws->average_predictions(); |
| 110 | + plot_mfcc(); |
150 | 111 | plot_waveform(); |
151 | | - plot_mfcc(kws->mfcc_buffer); |
152 | | - int end = T.read_us(); |
153 | | - int max_ind = kws->get_top_detection(kws->averaged_output); |
| 112 | + int max_ind = kws->get_top_class(kws->averaged_output); |
154 | 113 | if(kws->averaged_output[max_ind]>detection_threshold*128/100) |
155 | 114 | sprintf(lcd_output_string,"%d%% %s",((int)kws->averaged_output[max_ind]*100/128),output_class[max_ind]); |
156 | 115 | lcd.ClearStringLine(8); |
157 | 116 | lcd.DisplayStringAt(0, LINE(8), (uint8_t *) lcd_output_string, CENTER_MODE); |
158 | | - |
159 | | -} |
160 | | - |
161 | | -void plot_mfcc(q7_t* mfcc_buffer) |
162 | | -{ |
163 | | - memcpy(mfcc_plot_buffer, mfcc_plot_buffer+2*NUM_MFCC_COEFFS, 4*NUM_MFCC_COEFFS*(10*NUM_FRAMES-2)); |
164 | | - |
165 | | - int size_x, size_y; |
166 | | - size_x = lcd.GetXSize(); |
167 | | - size_y = lcd.GetYSize(); |
168 | | - |
169 | | - int x_step = 1; |
170 | | - int y_step = 6; |
171 | | - |
172 | | - uint32_t* pBuffer = mfcc_plot_buffer + NUM_MFCC_COEFFS*(10*NUM_FRAMES-2); |
173 | | - int sum = 0;; |
174 | | - |
175 | | - for (int i=0;i<2;i++) { |
176 | | - for (int j=0;j<NUM_MFCC_COEFFS;j++) { |
177 | | - int value = mfcc_buffer[(NUM_MFCC_COEFFS*(NUM_FRAMES-2))+i*NUM_MFCC_COEFFS+j]; |
178 | | - uint32_t RGB = calculate_rgb(-128, 127, value*4); |
179 | | - sum += std::abs(value); |
180 | | - pBuffer[i*NUM_MFCC_COEFFS+j] = RGB; |
181 | | - } |
182 | | - } |
183 | | - mfcc_update_counter++; |
184 | | - if(mfcc_update_counter==10) { |
185 | | - lcd.FillRect(0, size_y/3, size_x, size_y/3); |
186 | | - for (int i=0;i<10*NUM_FRAMES;i++) { |
187 | | - for (int j=0;j<NUM_MFCC_COEFFS;j++) { |
188 | | - for (int x=0;x<x_step;x++) { |
189 | | - for (int y=0;y<y_step;y++) { |
190 | | - lcd.DrawPixel(120+i*x_step+x,90+j*y_step+y, mfcc_plot_buffer[i*NUM_MFCC_COEFFS+j]); |
191 | | - } |
192 | | - } |
193 | | - } |
194 | | - } |
195 | | - mfcc_update_counter=0; |
196 | | - } |
197 | | -} |
198 | | - |
199 | | -uint32_t calculate_rgb(int min, int max, int value) { |
200 | | - uint32_t ret = 0xFF000000; |
201 | | - int mid_point = (min + max) / 2; |
202 | | - int range = (max - min); |
203 | | - if (value >= mid_point) { |
204 | | - uint32_t delta = (value - mid_point)*512 / range; |
205 | | - if (delta > 255) { delta = 255; } |
206 | | - ret = ret | (delta << 16); |
207 | | - ret = ret | ( (255-delta) << 8 ); |
208 | | - } else { |
209 | | - int delta = value*512 / range; |
210 | | - if (delta > 255) { delta = 255; } |
211 | | - ret = ret | (delta << 8); |
212 | | - ret = ret | (255 - delta); |
213 | | - } |
214 | | - return ret; |
215 | | -} |
216 | | - |
217 | | -void plot_waveform() |
218 | | -{ |
219 | | - |
220 | | - int size_x, size_y; |
221 | | - size_x = lcd.GetXSize(); |
222 | | - size_y = lcd.GetYSize(); |
223 | | - |
224 | | - int x_width = 128*3/2; |
225 | | - |
226 | | - int x_start = size_x/2 - x_width*current_pixel_location; |
227 | | - lcd.FillRect(x_start, 0, x_width, 1*size_y/3); |
228 | | - //lcd.FillRect(0, 0, size_x, 1*size_y/3); |
229 | | - current_pixel_location = 1 - current_pixel_location; |
230 | | - int y_center = size_y/6; |
231 | | - |
232 | | - int stride = 2 * (AUDIO_BLOCK_SIZE / x_width / 2); |
233 | | - |
234 | | - for (int i=0;i<x_width;i++) { |
235 | | - int audio_magnitude = y_center+(int)(audio_buffer[i*stride+1]/8); |
236 | | - if (audio_magnitude < 0) { audio_magnitude = 0; } |
237 | | - if (audio_magnitude > 2*y_center) { audio_magnitude = 2*y_center - 1; } |
238 | | - |
239 | | - lcd.DrawPixel(x_start+i,audio_magnitude, LCD_COLOR_ARM_DARK); |
240 | | - } |
241 | | -} |
242 | | - |
243 | | -static uint8_t SetSysClock_PLL_HSE_200MHz() |
244 | | -{ |
245 | | - RCC_ClkInitTypeDef RCC_ClkInitStruct; |
246 | | - RCC_OscInitTypeDef RCC_OscInitStruct; |
247 | | - |
248 | | - // Enable power clock |
249 | | - __PWR_CLK_ENABLE(); |
250 | | - |
251 | | - // Enable HSE oscillator and activate PLL with HSE as source |
252 | | - RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSE; |
253 | | - RCC_OscInitStruct.HSEState = RCC_HSE_ON; /* External xtal on OSC_IN/OSC_OUT */ |
254 | | - |
255 | | - // Warning: this configuration is for a 25 MHz xtal clock only |
256 | | - RCC_OscInitStruct.PLL.PLLState = RCC_PLL_ON; |
257 | | - RCC_OscInitStruct.PLL.PLLSource = RCC_PLLSOURCE_HSE; |
258 | | - RCC_OscInitStruct.PLL.PLLM = 25; // VCO input clock = 1 MHz (25 MHz / 25) |
259 | | - RCC_OscInitStruct.PLL.PLLN = 400; // VCO output clock = 400 MHz (1 MHz * 400) |
260 | | - RCC_OscInitStruct.PLL.PLLP = RCC_PLLP_DIV2; // PLLCLK = 200 MHz (400 MHz / 2) |
261 | | - RCC_OscInitStruct.PLL.PLLQ = 8; // USB clock = 50 MHz (400 MHz / 8) |
262 | | - |
263 | | - if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK) |
264 | | - { |
265 | | - return 0; // FAIL |
266 | | - } |
267 | | - |
268 | | - // Activate the OverDrive to reach the 216 MHz Frequency |
269 | | - if (HAL_PWREx_EnableOverDrive() != HAL_OK) |
270 | | - { |
271 | | - return 0; // FAIL |
272 | | - } |
273 | | - |
274 | | - // Select PLL as system clock source and configure the HCLK, PCLK1 and PCLK2 clocks dividers |
275 | | - RCC_ClkInitStruct.ClockType = (RCC_CLOCKTYPE_SYSCLK | RCC_CLOCKTYPE_HCLK | RCC_CLOCKTYPE_PCLK1 | RCC_CLOCKTYPE_PCLK2); |
276 | | - RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK; // 200 MHz |
277 | | - RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1; // 200 MHz |
278 | | - RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV4; // 50 MHz |
279 | | - RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV2; // 100 MHz |
280 | | - |
281 | | - if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_7) != HAL_OK) |
282 | | - { |
283 | | - return 0; // FAIL |
284 | | - } |
285 | | - HAL_RCC_MCOConfig(RCC_MCO1, RCC_MCO1SOURCE_HSE, RCC_MCODIV_4); |
286 | | - return 1; // OK |
287 | 117 | } |
288 | 118 |
|
0 commit comments