|
1 |
| -// fix problem with std::min and std::max |
2 |
| -#if defined(_WIN32) |
3 |
| -#define WIN32_LEAN_AND_MEAN |
4 |
| -#ifndef NOMINMAX |
5 |
| -# define NOMINMAX |
6 |
| -#endif |
7 |
| -#include <windows.h> |
8 |
| -#endif |
9 |
| - |
10 | 1 | #include "mtmd-audio.h"
|
11 | 2 |
|
12 |
| -//#define MTMD_AUDIO_DEBUG |
13 |
| - |
14 |
| -#define MINIAUDIO_IMPLEMENTATION |
15 |
| -#ifndef MTMD_AUDIO_DEBUG |
16 |
| -# define MA_NO_ENCODING |
17 |
| -#endif |
18 |
| -#define MA_NO_DEVICE_IO |
19 |
| -#define MA_NO_RESOURCE_MANAGER |
20 |
| -#define MA_NO_NODE_GRAPH |
21 |
| -#define MA_NO_ENGINE |
22 |
| -#define MA_NO_GENERATION |
23 |
| -#define MA_API static |
24 |
| -#include "miniaudio.h" |
25 |
| - |
26 | 3 | #define _USE_MATH_DEFINES // for M_PI
|
27 | 4 | #include <cmath>
|
28 | 5 | #include <cstdint>
|
@@ -359,69 +336,6 @@ bool preprocess_audio(
|
359 | 336 | } // namespace whisper_preprocessor
|
360 | 337 |
|
361 | 338 |
|
362 |
| -namespace audio_helpers { |
363 |
| - |
364 |
| -bool is_audio_file(const char * buf, size_t len) { |
365 |
| - if (len < 12) { |
366 |
| - return false; |
367 |
| - } |
368 |
| - |
369 |
| - // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format |
370 |
| - // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html |
371 |
| - bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0; |
372 |
| - bool is_mp3 = len >= 3 && ( |
373 |
| - memcmp(buf, "ID3", 3) == 0 || |
374 |
| - // Check for MPEG sync word (simplified check) |
375 |
| - ((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0) |
376 |
| - ); |
377 |
| - bool is_flac = memcmp(buf, "fLaC", 4) == 0; |
378 |
| - |
379 |
| - return is_wav || is_mp3 || is_flac; |
380 |
| -} |
381 |
| - |
382 |
| -// returns true if the buffer is a valid audio file |
383 |
| -bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) { |
384 |
| - ma_result result; |
385 |
| - const int channels = 1; |
386 |
| - ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate); |
387 |
| - ma_decoder decoder; |
388 |
| - |
389 |
| - result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder); |
390 |
| - if (result != MA_SUCCESS) { |
391 |
| - return false; |
392 |
| - } |
393 |
| - |
394 |
| - ma_uint64 frame_count; |
395 |
| - ma_uint64 frames_read; |
396 |
| - result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count); |
397 |
| - if (result != MA_SUCCESS) { |
398 |
| - ma_decoder_uninit(&decoder); |
399 |
| - return false; |
400 |
| - } |
401 |
| - |
402 |
| - pcmf32_mono.resize(frame_count); |
403 |
| - result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read); |
404 |
| - if (result != MA_SUCCESS) { |
405 |
| - ma_decoder_uninit(&decoder); |
406 |
| - return false; |
407 |
| - } |
408 |
| - |
409 |
| -#ifdef MTMD_AUDIO_DEBUG |
410 |
| - // save audio to wav file |
411 |
| - ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate); |
412 |
| - ma_encoder encoder; |
413 |
| - ma_encoder_init_file("output.wav", &config, &encoder); |
414 |
| - ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read); |
415 |
| - ma_encoder_uninit(&encoder); |
416 |
| -#endif |
417 |
| - |
418 |
| - ma_decoder_uninit(&decoder); |
419 |
| - return true; |
420 |
| -} |
421 |
| - |
422 |
| -} // namespace wav_utils |
423 |
| - |
424 |
| - |
425 | 339 | // precalculated mel filter banks
|
426 | 340 | // values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function
|
427 | 341 | //
|
|
0 commit comments