Skip to content

Commit 27a1982

Browse files
committed
cli : Support "-" for stdout like stdin
This changes examples/cli/cli.cpp to be like examples/common-whisper.cpp. "-of -" can be specified (or this can be inferred from "-" as the input file) to output to stdout. This is useful for piping to other applications. Log fname_out consistently when not stdout - Terminals have stdout=stderr, so remove the message before successful output to ease copying - Don't affect actual error messages - Move opening the ofstream into the factory, fixing missing open and/or error messages in output_score/output_wts - Fix struct naming convention Closes #3048
1 parent 988dcd4 commit 27a1982

File tree

1 file changed

+81
-129
lines changed

1 file changed

+81
-129
lines changed

examples/cli/cli.cpp

+81-129
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <cstdio>
1010
#include <string>
1111
#include <thread>
12+
#include <utility>
1213
#include <vector>
1314
#include <cstring>
1415

@@ -379,15 +380,7 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
379380
}
380381
}
381382

382-
static bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
383-
std::ofstream fout(fname);
384-
if (!fout.is_open()) {
385-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
386-
return false;
387-
}
388-
389-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
390-
383+
static void output_txt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
391384
const int n_segments = whisper_full_n_segments(ctx);
392385
for (int i = 0; i < n_segments; ++i) {
393386
const char * text = whisper_full_get_segment_text(ctx, i);
@@ -402,19 +395,9 @@ static bool output_txt(struct whisper_context * ctx, const char * fname, const w
402395

403396
fout << speaker << text << "\n";
404397
}
405-
406-
return true;
407398
}
408399

409-
static bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
410-
std::ofstream fout(fname);
411-
if (!fout.is_open()) {
412-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
413-
return false;
414-
}
415-
416-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
417-
400+
static void output_vtt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
418401
fout << "WEBVTT\n\n";
419402

420403
const int n_segments = whisper_full_n_segments(ctx);
@@ -434,19 +417,9 @@ static bool output_vtt(struct whisper_context * ctx, const char * fname, const w
434417
fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
435418
fout << speaker << text << "\n\n";
436419
}
437-
438-
return true;
439420
}
440421

441-
static bool output_srt(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
442-
std::ofstream fout(fname);
443-
if (!fout.is_open()) {
444-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
445-
return false;
446-
}
447-
448-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
449-
422+
static void output_srt(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
450423
const int n_segments = whisper_full_n_segments(ctx);
451424
for (int i = 0; i < n_segments; ++i) {
452425
const char * text = whisper_full_get_segment_text(ctx, i);
@@ -463,8 +436,6 @@ static bool output_srt(struct whisper_context * ctx, const char * fname, const w
463436
fout << to_timestamp(t0, true) << " --> " << to_timestamp(t1, true) << "\n";
464437
fout << speaker << text << "\n\n";
465438
}
466-
467-
return true;
468439
}
469440

470441
static char * escape_double_quotes_and_backslashes(const char * str) {
@@ -530,15 +501,7 @@ static char * escape_double_quotes_in_csv(const char * str) {
530501
return escaped;
531502
}
532503

533-
static bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
534-
std::ofstream fout(fname);
535-
if (!fout.is_open()) {
536-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
537-
return false;
538-
}
539-
540-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
541-
504+
static void output_csv(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
542505
const int n_segments = whisper_full_n_segments(ctx);
543506
fout << "start,end,";
544507
if (params.diarize && pcmf32s.size() == 2)
@@ -561,14 +524,9 @@ static bool output_csv(struct whisper_context * ctx, const char * fname, const w
561524
}
562525
fout << "\"" << text_escaped << "\"\n";
563526
}
564-
565-
return true;
566527
}
567528

568-
static bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
569-
std::ofstream fout(fname);
570-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
571-
529+
static void output_score(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & /*params*/, std::vector<std::vector<float>> /*pcmf32s*/) {
572530
const int n_segments = whisper_full_n_segments(ctx);
573531
// fprintf(stderr,"segments: %d\n",n_segments);
574532
for (int i = 0; i < n_segments; ++i) {
@@ -581,16 +539,14 @@ static bool output_score(struct whisper_context * ctx, const char * fname, const
581539
// fprintf(stderr,"token: %s %f\n",token,probability);
582540
}
583541
}
584-
return true;
585542
}
586543

587-
static bool output_json(
544+
static void output_json(
588545
struct whisper_context * ctx,
589-
const char * fname,
546+
std::ofstream & fout,
590547
const whisper_params & params,
591-
std::vector<std::vector<float>> pcmf32s,
592-
bool full) {
593-
std::ofstream fout(fname);
548+
std::vector<std::vector<float>> pcmf32s) {
549+
const bool full = params.output_jsn_full;
594550
int indent = 0;
595551

596552
auto doindent = [&]() {
@@ -670,12 +626,6 @@ static bool output_json(
670626
end_obj(end);
671627
};
672628

673-
if (!fout.is_open()) {
674-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
675-
return false;
676-
}
677-
678-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
679629
start_obj(nullptr);
680630
value_s("systeminfo", whisper_print_system_info(), false);
681631
start_obj("model");
@@ -749,17 +699,12 @@ static bool output_json(
749699

750700
end_arr(true);
751701
end_obj(true);
752-
return true;
753702
}
754703

755704
// karaoke video generation
756705
// outputs a bash script that uses ffmpeg to generate a video with the subtitles
757706
// TODO: font parameter adjustments
758-
static bool output_wts(struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float>> pcmf32s) {
759-
std::ofstream fout(fname);
760-
761-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
762-
707+
static bool output_wts(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s, const char * fname_inp, float t_sec, const char * fname_out) {
763708
static const char * font = params.font_path.c_str();
764709

765710
std::ifstream fin(font);
@@ -875,20 +820,12 @@ static bool output_wts(struct whisper_context * ctx, const char * fname, const c
875820

876821
fout.close();
877822

878-
fprintf(stderr, "%s: run 'source %s' to generate karaoke video\n", __func__, fname);
823+
fprintf(stderr, "# %s: run 'source %s' to generate karaoke video\n", __func__, fname_out);
879824

880825
return true;
881826
}
882827

883-
static bool output_lrc(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
884-
std::ofstream fout(fname);
885-
if (!fout.is_open()) {
886-
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname);
887-
return false;
888-
}
889-
890-
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
891-
828+
static void output_lrc(struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
892829
fout << "[by:whisper.cpp]\n";
893830

894831
const int n_segments = whisper_full_n_segments(ctx);
@@ -916,8 +853,6 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
916853

917854
fout << '[' << timestamp_lrc << ']' << speaker << text << "\n";
918855
}
919-
920-
return true;
921856
}
922857

923858

@@ -1066,8 +1001,53 @@ int main(int argc, char ** argv) {
10661001
}
10671002

10681003
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
1069-
const auto fname_inp = params.fname_inp[f];
1070-
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
1004+
const auto & fname_inp = params.fname_inp[f];
1005+
struct fout_factory {
1006+
std::string fname_out;
1007+
const size_t basename_length;
1008+
const bool is_stdout;
1009+
bool used_stdout;
1010+
decltype(whisper_print_segment_callback) * const print_segment_callback;
1011+
std::ofstream fout;
1012+
1013+
fout_factory (const std::string & fname_out_, const std::string & fname_inp, whisper_params & params) :
1014+
fname_out{!fname_out_.empty() ? fname_out_ : fname_inp},
1015+
basename_length{fname_out.size()},
1016+
is_stdout{fname_out == "-"},
1017+
used_stdout{},
1018+
print_segment_callback{is_stdout ? nullptr : whisper_print_segment_callback} {
1019+
if (!print_segment_callback) {
1020+
params.no_timestamps = true;
1021+
params.print_progress = false;
1022+
}
1023+
}
1024+
1025+
bool open(const char * ext, const char * function) {
1026+
if (is_stdout) {
1027+
if (std::exchange(used_stdout, true)) {
1028+
fprintf(stderr, "warning: Not appending multiple file formats to stdout\n");
1029+
return false;
1030+
}
1031+
#ifdef _WIN32
1032+
fout = std::ofstream{"CON"};
1033+
#else
1034+
fout = std::ofstream{"/dev/stdout"};
1035+
#endif
1036+
// Not using fprintf stderr here because it might equal stdout
1037+
// Also assuming /dev is mounted
1038+
return true;
1039+
}
1040+
fname_out.resize(basename_length);
1041+
fname_out += ext;
1042+
fout = std::ofstream{fname_out};
1043+
if (!fout.is_open()) {
1044+
fprintf(stderr, "%s: failed to open '%s' for writing\n", __func__, fname_out.c_str());
1045+
return false;
1046+
}
1047+
fprintf(stderr, "%s: saving output to '%s'\n", function, fname_out.c_str());
1048+
return true;
1049+
}
1050+
} fout_factory{f < (int) params.fname_out.size() ? params.fname_out[f] : "", fname_inp, params};
10711051

10721052
std::vector<float> pcmf32; // mono-channel F32 PCM
10731053
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
@@ -1172,7 +1152,7 @@ int main(int argc, char ** argv) {
11721152

11731153
// this callback is called on each new segment
11741154
if (!wparams.print_realtime) {
1175-
wparams.new_segment_callback = whisper_print_segment_callback;
1155+
wparams.new_segment_callback = fout_factory.print_segment_callback;
11761156
wparams.new_segment_callback_user_data = &user_data;
11771157
}
11781158

@@ -1214,54 +1194,26 @@ int main(int argc, char ** argv) {
12141194

12151195
// output stuff
12161196
{
1217-
printf("\n");
1218-
1219-
// output to text file
1220-
if (params.output_txt) {
1221-
const auto fname_txt = fname_out + ".txt";
1222-
output_txt(ctx, fname_txt.c_str(), params, pcmf32s);
1223-
}
1224-
1225-
// output to VTT file
1226-
if (params.output_vtt) {
1227-
const auto fname_vtt = fname_out + ".vtt";
1228-
output_vtt(ctx, fname_vtt.c_str(), params, pcmf32s);
1229-
}
1230-
1231-
// output to SRT file
1232-
if (params.output_srt) {
1233-
const auto fname_srt = fname_out + ".srt";
1234-
output_srt(ctx, fname_srt.c_str(), params, pcmf32s);
1235-
}
1236-
1237-
// output to WTS file
1238-
if (params.output_wts) {
1239-
const auto fname_wts = fname_out + ".wts";
1240-
output_wts(ctx, fname_wts.c_str(), fname_inp.c_str(), params, float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE, pcmf32s);
1241-
}
1242-
1243-
// output to CSV file
1244-
if (params.output_csv) {
1245-
const auto fname_csv = fname_out + ".csv";
1246-
output_csv(ctx, fname_csv.c_str(), params, pcmf32s);
1247-
}
1248-
1249-
// output to JSON file
1250-
if (params.output_jsn) {
1251-
const auto fname_jsn = fname_out + ".json";
1252-
output_json(ctx, fname_jsn.c_str(), params, pcmf32s, params.output_jsn_full);
1253-
}
1254-
1255-
// output to LRC file
1256-
if (params.output_lrc) {
1257-
const auto fname_lrc = fname_out + ".lrc";
1258-
output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
1259-
}
1260-
1261-
// output to score file
1262-
if (params.log_score) {
1263-
const auto fname_score = fname_out + ".score.txt";
1264-
output_score(ctx, fname_score.c_str(), params, pcmf32s);
1197+
// macros to stringify function name
1198+
#define output_func(func, ext, param, ...) if (param && fout_factory.open(ext, #func)) {\
1199+
func(ctx, fout_factory.fout, params, __VA_ARGS__); \
1200+
}
1201+
#define output_ext(ext, ...) output_func(output_##ext, "." #ext, params.output_##ext, __VA_ARGS__)
1202+
1203+
output_ext(txt, pcmf32s);
1204+
output_ext(vtt, pcmf32s);
1205+
output_ext(srt, pcmf32s);
1206+
output_ext(wts, pcmf32s, fname_inp.c_str(), float(pcmf32.size() + 1000)/WHISPER_SAMPLE_RATE, fout_factory.fname_out.c_str());
1207+
output_ext(csv, pcmf32s);
1208+
output_func(output_json, ".json", params.output_jsn, pcmf32s);
1209+
output_ext(lrc, pcmf32s);
1210+
output_func(output_score, ".score.txt", params.log_score, pcmf32s);
1211+
1212+
#undef output_ext
1213+
#undef output_func
1214+
1215+
if (fout_factory.is_stdout && !fout_factory.used_stdout) {
1216+
fprintf(stderr, "warning: '--output-file -' used without any other '--output-*'");
12651217
}
12661218
}
12671219
}

0 commit comments

Comments
 (0)