@@ -300,7 +300,8 @@ def analyze_wer_folders(folder_truth, folder_hypothesis, folder_output,
300
300
filenames = df ['filename' ].unique ()
301
301
try :
302
302
df_calls_metadata = get_calls_metadata (filenames )
303
-
303
+ is_metadata_available = True
304
+
304
305
wer_by_filename_with_metadata = pd .merge (left = get_pivot_table_of_edits (df ), right = df_calls_metadata ,
305
306
left_on = 'filename' , right_on = 'call_id' , how = 'left' )
306
307
save_to_s3 (wer_by_filename_with_metadata , s3_filename = folder_output + '/wer_by_filename_with_metadata.csv' )
@@ -335,11 +336,8 @@ def wer_by_field(x):
335
336
print ('\n === WER by speaker_count_total: ===' )
336
337
print (wer_by_field ('speaker_count_total' ))
337
338
338
- transcription_edits_with_metadata = pd .merge (left = df , right = df_calls_metadata , left_on = 'filename' ,
339
- right_on = 'call_id' )
340
- save_to_s3 (transcription_edits_with_metadata , s3_filename = folder_output + '/transcription_edits_with_metadata.csv' )
341
-
342
339
except Exception as e :
340
+ is_metadata_available = False
343
341
print ('\n Error reading metadata. Skipping WER statistics per metadata metrics...' )
344
342
345
343
print ('Saving HTML of transcription differences...' )
@@ -353,7 +351,13 @@ def wer_by_field(x):
353
351
# Top errors
354
352
save_to_s3 (get_top_errors (df , groupby = ['text_reference' ]), s3_filename = folder_output + '/top_errors.tsv' )
355
353
356
- return transcription_edits_with_metadata
354
+ if is_metadata_available :
355
+ transcription_edits_with_metadata = pd .merge (left = df , right = df_calls_metadata , left_on = 'filename' ,
356
+ right_on = 'call_id' )
357
+ save_to_s3 (transcription_edits_with_metadata , s3_filename = folder_output + '/transcription_edits_with_metadata.csv' )
358
+ return transcription_edits_with_metadata
359
+ else :
360
+ return None
357
361
358
362
359
363
def der_save_metadata (df , folder_output ):
0 commit comments