Skip to content

Commit c833a1c

Browse files
author
Naveen Suda
committed
added DS_CNN model and reorganized the code
1 parent 8ea2292 commit c833a1c

30 files changed

+1591
-323
lines changed

Deployment/Examples/realtime_test/main.cpp

Lines changed: 45 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -22,267 +22,97 @@
2222
* derived from https://os.mbed.com/teams/ST/code/DISCO-F746NG_AUDIO_demo
2323
*/
2424

25-
#include "kws.h"
26-
#include "AUDIO_DISCO_F746NG.h"
25+
#include "kws_f746ng.h"
26+
#include "plot_utils.h"
2727
#include "LCD_DISCO_F746NG.h"
2828

29-
#define LCD_COLOR_ARM_BLUE ((uint32_t) 0xFF00C1DE)
30-
#define LCD_COLOR_ARM_DARK ((uint32_t) 0xFF333E48)
31-
32-
AUDIO_DISCO_F746NG audio;
3329
LCD_DISCO_F746NG lcd;
3430
Serial pc(USBTX, USBRX);
31+
KWS_F746NG *kws;
3532
Timer T;
3633

37-
char lcd_output_string[256];
38-
char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down","left","right","on","off","stop","go"};
39-
int current_pixel_location=0;
40-
uint32_t mfcc_plot_buffer[NUM_MFCC_COEFFS*NUM_FRAMES*10];
41-
int mfcc_update_counter=0;
42-
43-
44-
/*
45-
* The audio recording works with two windows, each of size 80 ms.
46-
* The data for each window will be tranfered by the DMA, which sends
47-
* sends an interrupt after the transfer is completed.
48-
*/
49-
50-
/* AUDIO_BLOCK_SIZE is the number of audio samples in each recording window */
51-
#define AUDIO_BLOCK_SIZE (2*FRAME_LEN)
34+
char lcd_output_string[64];
35+
char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down",
36+
"left","right","on","off","stop","go"};
37+
// Tune the following three parameters to improve the detection accuracy
38+
// and reduce false positives
39+
// Longer averaging window and higher threshold reduce false positives
40+
// but increase detection latency and reduce true positive detections.
5241

53-
int16_t audio_io_buffer[AUDIO_BLOCK_SIZE*8]; //2 (L/R) channels x 2 input/output x 2 for ping-pong buffer
54-
int16_t audio_buffer[AUDIO_BLOCK_SIZE];
42+
// (recording_win*frame_shift) is the actual recording window size
43+
int recording_win = 3;
44+
// Averaging window for smoothing out the output predictions
45+
int averaging_window_len = 3;
46+
int detection_threshold = 90; //in percent
5547

56-
int16_t* AUDIO_BUFFER_IN = audio_io_buffer;
57-
int16_t* AUDIO_BUFFER_OUT = (AUDIO_BUFFER_IN + (AUDIO_BLOCK_SIZE * 4));
58-
59-
q7_t scratch_buffer[SCRATCH_BUFFER_SIZE];
60-
KWS *kws;
61-
62-
static uint8_t SetSysClock_PLL_HSE_200MHz();
6348
void run_kws();
64-
void plot_mfcc(q7_t* mfcc_buffer);
65-
void plot_waveform();
66-
uint32_t calculate_rgb(int min, int max, int value);
6749

6850
int main()
6951
{
70-
SetSysClock_PLL_HSE_200MHz();
7152
pc.baud(9600);
72-
73-
kws = new KWS(audio_buffer,scratch_buffer);
74-
75-
lcd.Clear(LCD_COLOR_ARM_BLUE);
76-
lcd.SetBackColor(LCD_COLOR_ARM_BLUE);
77-
lcd.DisplayStringAt(0, LINE(1), (uint8_t *)"Keyword Spotting Example", CENTER_MODE);
78-
wait(1);
79-
lcd.Clear(LCD_COLOR_ARM_BLUE);
80-
lcd.SetBackColor(LCD_COLOR_ARM_BLUE);
81-
lcd.SetTextColor(LCD_COLOR_WHITE);
82-
83-
int size_x, size_y;
84-
size_x = lcd.GetXSize();
85-
size_y = lcd.GetYSize();
86-
lcd.FillRect(0, 0, size_x, size_y/3);
87-
88-
/* Initialize buffers */
89-
memset(AUDIO_BUFFER_IN, 0, AUDIO_BLOCK_SIZE*8);
90-
memset(AUDIO_BUFFER_OUT, 0, AUDIO_BLOCK_SIZE*8);
91-
92-
/* May need to adjust volume to get better accuracy/user-experience */
93-
audio.IN_SetVolume(80);
94-
95-
/* Start Recording */
96-
audio.IN_Record((uint16_t*)AUDIO_BUFFER_IN, AUDIO_BLOCK_SIZE * 4);
97-
98-
/* Start Playback for listening to what is being classified */
99-
audio.OUT_SetAudioFrameSlot(CODEC_AUDIOFRAME_SLOT_02);
100-
audio.OUT_Play((uint16_t*)AUDIO_BUFFER_OUT, AUDIO_BLOCK_SIZE * 8);
53+
kws = new KWS_F746NG(recording_win,averaging_window_len);
54+
init_plot();
55+
kws->start_kws();
10156

10257
T.start();
10358

10459
while (1) {
10560
/* A dummy loop to wait for the interrupts. Feature extraction and
10661
neural network inference are done in the interrupt service routine. */
62+
__WFI();
10763
}
10864
}
10965

66+
11067
/*
111-
* Manages the DMA Transfer complete interrupt.
68+
* The audio recording works with two ping-pong buffers.
69+
* The data for each window will be tranfered by the DMA, which sends
70+
* sends an interrupt after the transfer is completed.
11271
*/
72+
73+
// Manages the DMA Transfer complete interrupt.
11374
void BSP_AUDIO_IN_TransferComplete_CallBack(void)
11475
{
115-
arm_copy_q7((q7_t *)AUDIO_BUFFER_IN + AUDIO_BLOCK_SIZE*4, (q7_t *)AUDIO_BUFFER_OUT + AUDIO_BLOCK_SIZE*4, AUDIO_BLOCK_SIZE*4);
76+
arm_copy_q7((q7_t *)kws->audio_buffer_in + kws->audio_block_size*4, (q7_t *)kws->audio_buffer_out + kws->audio_block_size*4, kws->audio_block_size*4);
77+
if(kws->frame_len != kws->frame_shift) {
78+
//copy the last (frame_len - frame_shift) audio data to the start
79+
arm_copy_q7((q7_t *)kws->audio_buffer, (q7_t *)(kws->audio_buffer)+2*(kws->audio_buffer_size-(kws->frame_len-kws->frame_shift)), 2*(kws->frame_len-kws->frame_shift));
80+
}
11681
// copy the new recording data
117-
for (int i=0;i<AUDIO_BLOCK_SIZE;i++) {
118-
audio_buffer[i] = AUDIO_BUFFER_IN[2*AUDIO_BLOCK_SIZE+i*2];
82+
for (int i=0;i<kws->audio_block_size;i++) {
83+
kws->audio_buffer[kws->frame_len-kws->frame_shift+i] = kws->audio_buffer_in[2*kws->audio_block_size+i*2];
11984
}
12085
run_kws();
12186
return;
12287
}
12388

124-
/*
125-
* Manages the DMA Half Transfer complete interrupt.
126-
*/
89+
// Manages the DMA Half Transfer complete interrupt.
12790
void BSP_AUDIO_IN_HalfTransfer_CallBack(void)
12891
{
129-
arm_copy_q7((q7_t *)AUDIO_BUFFER_IN, (q7_t *)AUDIO_BUFFER_OUT, AUDIO_BLOCK_SIZE*4);
92+
arm_copy_q7((q7_t *)kws->audio_buffer_in, (q7_t *)kws->audio_buffer_out, kws->audio_block_size*4);
93+
if(kws->frame_len!=kws->frame_shift) {
94+
//copy the last (frame_len - frame_shift) audio data to the start
95+
arm_copy_q7((q7_t *)kws->audio_buffer, (q7_t *)(kws->audio_buffer)+2*(kws->audio_buffer_size-(kws->frame_len-kws->frame_shift)), 2*(kws->frame_len-kws->frame_shift));
96+
}
13097
// copy the new recording data
131-
for (int i=0;i<AUDIO_BLOCK_SIZE;i++) {
132-
audio_buffer[i] = AUDIO_BUFFER_IN[i*2];
98+
for (int i=0;i<kws->audio_block_size;i++) {
99+
kws->audio_buffer[kws->frame_len-kws->frame_shift+i] = kws->audio_buffer_in[i*2];
133100
}
134101
run_kws();
135102
return;
136103
}
137104

138105
void run_kws()
139106
{
140-
141-
//Averaging window for smoothing out the output predictions
142-
int averaging_window_len = 3; //i.e. average over 3 inferences or 240ms
143-
int detection_threshold = 70; //in percent
144-
145-
int start = T.read_us();
146-
kws->extract_features(2); //extract mfcc features
147-
kws->classify(); //classify using dnn
148-
kws->average_predictions(averaging_window_len);
149-
107+
kws->extract_features(); //extract mfcc features
108+
kws->classify(); //classify using dnn
109+
kws->average_predictions();
110+
plot_mfcc();
150111
plot_waveform();
151-
plot_mfcc(kws->mfcc_buffer);
152-
int end = T.read_us();
153-
int max_ind = kws->get_top_detection(kws->averaged_output);
112+
int max_ind = kws->get_top_class(kws->averaged_output);
154113
if(kws->averaged_output[max_ind]>detection_threshold*128/100)
155114
sprintf(lcd_output_string,"%d%% %s",((int)kws->averaged_output[max_ind]*100/128),output_class[max_ind]);
156115
lcd.ClearStringLine(8);
157116
lcd.DisplayStringAt(0, LINE(8), (uint8_t *) lcd_output_string, CENTER_MODE);
158-
159-
}
160-
161-
void plot_mfcc(q7_t* mfcc_buffer)
162-
{
163-
memcpy(mfcc_plot_buffer, mfcc_plot_buffer+2*NUM_MFCC_COEFFS, 4*NUM_MFCC_COEFFS*(10*NUM_FRAMES-2));
164-
165-
int size_x, size_y;
166-
size_x = lcd.GetXSize();
167-
size_y = lcd.GetYSize();
168-
169-
int x_step = 1;
170-
int y_step = 6;
171-
172-
uint32_t* pBuffer = mfcc_plot_buffer + NUM_MFCC_COEFFS*(10*NUM_FRAMES-2);
173-
int sum = 0;;
174-
175-
for (int i=0;i<2;i++) {
176-
for (int j=0;j<NUM_MFCC_COEFFS;j++) {
177-
int value = mfcc_buffer[(NUM_MFCC_COEFFS*(NUM_FRAMES-2))+i*NUM_MFCC_COEFFS+j];
178-
uint32_t RGB = calculate_rgb(-128, 127, value*4);
179-
sum += std::abs(value);
180-
pBuffer[i*NUM_MFCC_COEFFS+j] = RGB;
181-
}
182-
}
183-
mfcc_update_counter++;
184-
if(mfcc_update_counter==10) {
185-
lcd.FillRect(0, size_y/3, size_x, size_y/3);
186-
for (int i=0;i<10*NUM_FRAMES;i++) {
187-
for (int j=0;j<NUM_MFCC_COEFFS;j++) {
188-
for (int x=0;x<x_step;x++) {
189-
for (int y=0;y<y_step;y++) {
190-
lcd.DrawPixel(120+i*x_step+x,90+j*y_step+y, mfcc_plot_buffer[i*NUM_MFCC_COEFFS+j]);
191-
}
192-
}
193-
}
194-
}
195-
mfcc_update_counter=0;
196-
}
197-
}
198-
199-
uint32_t calculate_rgb(int min, int max, int value) {
200-
uint32_t ret = 0xFF000000;
201-
int mid_point = (min + max) / 2;
202-
int range = (max - min);
203-
if (value >= mid_point) {
204-
uint32_t delta = (value - mid_point)*512 / range;
205-
if (delta > 255) { delta = 255; }
206-
ret = ret | (delta << 16);
207-
ret = ret | ( (255-delta) << 8 );
208-
} else {
209-
int delta = value*512 / range;
210-
if (delta > 255) { delta = 255; }
211-
ret = ret | (delta << 8);
212-
ret = ret | (255 - delta);
213-
}
214-
return ret;
215-
}
216-
217-
void plot_waveform()
218-
{
219-
220-
int size_x, size_y;
221-
size_x = lcd.GetXSize();
222-
size_y = lcd.GetYSize();
223-
224-
int x_width = 128*3/2;
225-
226-
int x_start = size_x/2 - x_width*current_pixel_location;
227-
lcd.FillRect(x_start, 0, x_width, 1*size_y/3);
228-
//lcd.FillRect(0, 0, size_x, 1*size_y/3);
229-
current_pixel_location = 1 - current_pixel_location;
230-
int y_center = size_y/6;
231-
232-
int stride = 2 * (AUDIO_BLOCK_SIZE / x_width / 2);
233-
234-
for (int i=0;i<x_width;i++) {
235-
int audio_magnitude = y_center+(int)(audio_buffer[i*stride+1]/8);
236-
if (audio_magnitude < 0) { audio_magnitude = 0; }
237-
if (audio_magnitude > 2*y_center) { audio_magnitude = 2*y_center - 1; }
238-
239-
lcd.DrawPixel(x_start+i,audio_magnitude, LCD_COLOR_ARM_DARK);
240-
}
241-
}
242-
243-
static uint8_t SetSysClock_PLL_HSE_200MHz()
244-
{
245-
RCC_ClkInitTypeDef RCC_ClkInitStruct;
246-
RCC_OscInitTypeDef RCC_OscInitStruct;
247-
248-
// Enable power clock
249-
__PWR_CLK_ENABLE();
250-
251-
// Enable HSE oscillator and activate PLL with HSE as source
252-
RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSE;
253-
RCC_OscInitStruct.HSEState = RCC_HSE_ON; /* External xtal on OSC_IN/OSC_OUT */
254-
255-
// Warning: this configuration is for a 25 MHz xtal clock only
256-
RCC_OscInitStruct.PLL.PLLState = RCC_PLL_ON;
257-
RCC_OscInitStruct.PLL.PLLSource = RCC_PLLSOURCE_HSE;
258-
RCC_OscInitStruct.PLL.PLLM = 25; // VCO input clock = 1 MHz (25 MHz / 25)
259-
RCC_OscInitStruct.PLL.PLLN = 400; // VCO output clock = 400 MHz (1 MHz * 400)
260-
RCC_OscInitStruct.PLL.PLLP = RCC_PLLP_DIV2; // PLLCLK = 200 MHz (400 MHz / 2)
261-
RCC_OscInitStruct.PLL.PLLQ = 8; // USB clock = 50 MHz (400 MHz / 8)
262-
263-
if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
264-
{
265-
return 0; // FAIL
266-
}
267-
268-
// Activate the OverDrive to reach the 216 MHz Frequency
269-
if (HAL_PWREx_EnableOverDrive() != HAL_OK)
270-
{
271-
return 0; // FAIL
272-
}
273-
274-
// Select PLL as system clock source and configure the HCLK, PCLK1 and PCLK2 clocks dividers
275-
RCC_ClkInitStruct.ClockType = (RCC_CLOCKTYPE_SYSCLK | RCC_CLOCKTYPE_HCLK | RCC_CLOCKTYPE_PCLK1 | RCC_CLOCKTYPE_PCLK2);
276-
RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_PLLCLK; // 200 MHz
277-
RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1; // 200 MHz
278-
RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV4; // 50 MHz
279-
RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV2; // 100 MHz
280-
281-
if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_7) != HAL_OK)
282-
{
283-
return 0; // FAIL
284-
}
285-
HAL_RCC_MCOConfig(RCC_MCO1, RCC_MCO1SOURCE_HSE, RCC_MCODIV_4);
286-
return 1; // OK
287117
}
288118

0 commit comments

Comments
 (0)