22from typing import TYPE_CHECKING , Optional , Union
33
44from evals .elsuite .modelgraded .classify_utils import ANSWER_PROMPTS , choice_to_str , expand_args_dict
5- from evals .prompt .base import OpenAICreateChatPrompt
5+ from evals .elsuite .utils import format_prompt
6+ from evals .prompt .base import OpenAICreateChatPrompt , is_chat_prompt
67
78if TYPE_CHECKING :
89 from dataclasses import dataclass
1415class ModelGradedSpec :
1516 prompt : Union [str , OpenAICreateChatPrompt ]
1617 choice_strings : Union [list [str ], str ]
17- eval_type : str
1818 input_outputs : dict [str , str ]
1919
20+ eval_type : Optional [str ] = None
21+ format_type : str = "in_message"
2022 choice_scores : Optional [Union [dict [str , Union [float , int ]], str ]] = None
2123 multicomp_n : Optional [int ] = None
22- append_answer_prompt : bool = False
2324 args : Optional [dict [str , dict [str , str ]]] = None
2425 expand_args_dict : Optional [dict [str , dict [str , tuple [str ]]]] = None
2526 completion_sample_templates : Optional [dict [str , str ]] = None
@@ -45,13 +46,9 @@ def __post_init__(self):
4546 if self .choice_scores == "from_strings" :
4647 self .choice_scores = {c : float (c ) for c in self .choice_strings }
4748
48- # 'prompt' is a string that specifies the model-graded evaluation
49- assert isinstance (self .prompt , str ), f"prompt must be a string, not { type (self .prompt )} "
50- if self .append_answer_prompt :
51- self .prompt += "\n \n " + ANSWER_PROMPTS [self .eval_type ].format (
52- choices = choice_to_str (self .choice_strings )
53- )
54- self .prompt = [{"role" : "user" , "content" : self .prompt }]
49+ if isinstance (self .prompt , str ):
50+ self .prompt = [{"role" : "user" , "content" : self .prompt }]
51+ assert is_chat_prompt (self .prompt )
5552
5653 # 'input_outputs' is a dict that specifies the input and output keys in the sample
5754 # output key is the model's raw response to input key. These are used for filling 'prompt' template.
@@ -75,3 +72,74 @@ def __post_init__(self):
7572 assert (
7673 self .completion_sample_templates
7774 ), "completion_sample_templates must be specified if multicomp_n > 1"
75+
76+ def append_answer_prompt (
77+ self ,
78+ eval_type : str ,
79+ append_type : str = "as_content" ,
80+ prompt : Optional [OpenAICreateChatPrompt ] = None ,
81+ ):
82+ """Append answer prompt to prompt. Can only be called once."""
83+ assert self .eval_type is None , f"eval_type already set: { eval_type } "
84+ prompt = prompt or ANSWER_PROMPTS [eval_type ]
85+ prompt = format_prompt (prompt , choices = choice_to_str (self .choice_strings ))
86+ if append_type == "as_content" :
87+ assert isinstance (prompt , str ), f"prompt must be str, not { type (prompt )} "
88+ self .prompt [- 1 ]["content" ] += "\n \n " + prompt
89+ elif append_type == "as_message" :
90+ assert is_chat_prompt (prompt ), f"prompt must be chat prompt, not { prompt } "
91+ self .prompt += prompt
92+ else :
93+ raise ValueError (f"append_type must be 'as_content' or 'as_message', not { append_type } " )
94+ self .eval_type = eval_type
95+
96+ def format (self , ** kwargs : dict [str , OpenAICreateChatPrompt ]) -> OpenAICreateChatPrompt :
97+ """Return an OpenAICreateChatPrompt that can be passed PromptFn for modelgraded eval.
98+
99+ 'in_message' returns: [
100+ {
101+ "role": "user",
102+ "content": \" ""
103+ User: {input}
104+ Assistant: {completion}
105+
106+ Was the assistant response helpful?
107+ \" "".strip(),
108+ }
109+ ]
110+
111+ 'out_message' returns: [
112+ {"role": "user", "content": "{input}"},
113+ {"role": "assistant", "content": "{completion}"},
114+ {"role": "user", "content": "Was the last assistant response helpful?"},
115+ ]
116+ """
117+ if self .format_type == "in_message" :
118+ return format_prompt (self .prompt , ** kwargs )
119+ elif self .format_type == "out_message" :
120+ assert len (self .input_outputs ) == 1 , "out_message only supports one input/output pair"
121+ # extra input-output data, as it is treated specially
122+ input_completions = {
123+ k : (k , kwargs [k ], v , kwargs [v ]) for k , v in self .input_outputs .items ()
124+ }
125+ kwargs = {
126+ k : v
127+ for k , v in kwargs .items ()
128+ if k not in self .input_outputs .values () and k not in self .input_outputs
129+ }
130+ convo = []
131+ for input_key , input , completion_key , completion in input_completions .values ():
132+ del input_key , completion_key
133+ assert isinstance (
134+ completion , str
135+ ), f"completion must be str, not { type (completion )} "
136+ if is_chat_prompt (input ):
137+ convo += input
138+ else :
139+ convo .append ({"role" : "user" , "content" : input })
140+ convo .append ({"role" : "assistant" , "content" : completion })
141+ return convo + format_prompt (self .prompt , ** kwargs )
142+ else :
143+ raise ValueError (
144+ f"format_type must be 'in_message' or 'out_message', not { self .format_type } "
145+ )
0 commit comments