@@ -745,6 +745,10 @@ def convert_json_to_jsonl(path):
745745
746746 with open (path .replace ('.json' , '.jsonl' ), 'w' ) as f :
747747 for k ,v in data .items ():
748+ conversations = v ['conversations' ]
749+ if isinstance (conversations [1 ]['value' ], dict ):
750+ new_value = conversations [1 ]['value' ]['caption_with_reasoning' ]
751+ conversations [1 ]['value' ] = new_value
748752 json .dump (v , f )
749753 f .write ('\n ' )
750754
@@ -820,10 +824,13 @@ def convert_instruct_json_to_jsonl(path, apply_filter = False):
820824 temp_3 ['conversations' ][0 ]['value' ] = third_question
821825 temp_3 ['conversations' ][1 ]['value' ] = third_answer
822826
827+ temps = [temp_1 , temp_2 , temp_3 ]
828+
823829 if apply_filter :
824830 if 'disagree_with_human_annotation' in v ['conversations' ][1 ]['value' ] and v ['conversations' ][1 ]['value' ]['disagree_with_human_annotation' ] is True :
825- continue
826- ret .append (temp_1 )
831+ continue
832+ random_index = np .random .randint (0 , 3 )
833+ ret .append (temps [random_index ])
827834 else :
828835 ret .append (temp_1 )
829836 ret .append (temp_2 )
@@ -882,8 +889,10 @@ def convert_instruct_json_to_jsonl(path, apply_filter = False):
882889 # ann = GPTHandObjectAnnotator(train_file_path, debug = False)
883890 # ann.multi_process_run(n_samples = -1)
884891
885- # convert_json_to_jsonl('train_anno_gpt-gt-reason_4_first_person_all.json')
892+ convert_json_to_jsonl ('train_anno_gpt-gt-reason_4_first_person_all.json' )
886893
887894 #calc_disagree_ratio_from_jsonl('train_anno_gpt-gt-reason_4_first_person_all.jsonl')
888895
889- convert_instruct_json_to_jsonl ('train_anno_gpt-hand-object_all.json' , apply_filter = True )
896+ #convert_instruct_json_to_jsonl('train_anno_gpt-hand-object_all.json', apply_filter = True)
897+
898+ #convert_instruct_json_to_jsonl('train_anno_gpt-gt-instruct-reason_4_first_person_all.json', apply_filter = True)
0 commit comments