Skip to content

Commit 5d5da74

Browse files
authored
Fix hate unfairness result lookup for sync red-team evaluations (Azure#44019)
* Fix hate unfairness result lookup for sync red-team evaluations * Update changelog
1 parent 57afdf9 commit 5d5da74

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- Added detection and retry handling for network errors wrapped in generic exceptions with "Error sending prompt with conversation ID" message
88
- Fix results for ungrounded_attributes
99
- score_mode grader improvements
10+
- fix for Red Team to ensure hate/unfairness evaluation rows populate when OneDP sync evaluators report results under the hate_unfairness metric name.
1011

1112
## 1.13.5 (2025-11-10)
1213

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_evaluation_processor.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,15 @@ async def evaluate_conversation(
148148
try:
149149
self.logger.debug(f"Evaluating conversation {idx+1} for {risk_category.value}/{strategy_name}")
150150

151+
use_sync_endpoint = is_onedp_project(self.azure_ai_project)
152+
effective_metric_name = (
153+
"hate_unfairness" if use_sync_endpoint and metric_name == "hate_fairness" else metric_name
154+
)
155+
151156
@retry(**self.retry_config["network_retry"])
152157
async def evaluate_with_rai_service_with_retry():
153158
try:
154-
# Use sync_evals endpoint for OneDP projects, legacy endpoint for others
155-
if is_onedp_project(self.azure_ai_project):
156-
# If using sync API use hate_unfairness rather than hate_fairness
157-
effective_metric_name = "hate_unfairness" if metric_name == "hate_fairness" else metric_name
159+
if use_sync_endpoint:
158160
return await evaluate_with_rai_service_sync(
159161
data=query_response,
160162
metric_name=effective_metric_name,
@@ -213,9 +215,12 @@ async def evaluate_with_rai_service_with_retry():
213215

214216
# Find the result matching our metric/risk category
215217
eval_result = None
218+
lookup_names = {metric_name, risk_cat_value, effective_metric_name}
216219
for result_item in results:
217220
result_dict = result_item if isinstance(result_item, dict) else result_item.__dict__
218-
if result_dict.get("name") == metric_name or result_dict.get("metric") == metric_name:
221+
result_name = str(result_dict.get("name") or "")
222+
metric_field = str(result_dict.get("metric") or "")
223+
if result_name in lookup_names or metric_field in lookup_names:
219224
eval_result = result_dict
220225
break
221226

0 commit comments

Comments
 (0)