Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from opentelemetry.sdk.trace import ReadableSpan, Span
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult

from uipath._cli._evals.mocks.cache_manager import CacheManager
from uipath._cli._evals.mocks.input_mocker import (
generate_llm_input,
)
Expand Down Expand Up @@ -49,6 +50,7 @@
)
from ._span_collection import ExecutionSpanCollector
from .mocks.mocks import (
cache_manager_context,
clear_execution_context,
set_execution_context,
)
Expand Down Expand Up @@ -322,6 +324,9 @@ async def _execute_eval(
evaluators: List[BaseEvaluator[Any]],
event_bus: EventBus,
) -> EvaluationRunResult:
if cache_manager_context.get() is None:
cache_manager_context.set(CacheManager())

# Generate LLM-based input if input_mocking_strategy is defined
if eval_item.input_mocking_strategy:
eval_item = await self._generate_input_for_eval(eval_item)
Expand Down
79 changes: 79 additions & 0 deletions src/uipath/_cli/_evals/mocks/cache_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Cache manager for LLM and input mocker responses."""

import hashlib
import json
from pathlib import Path
from typing import Any, Dict, Optional


class CacheManager:
"""Manages file-based caching for LLM and input mocker responses."""

def __init__(self, cache_dir: Optional[Path] = None):
"""Initialize the cache manager."""
self.cache_dir = cache_dir or (Path.cwd() / ".uipath" / "eval_cache")

def _compute_cache_key(self, cache_key_data: Dict[str, Any]) -> str:
"""Compute a hash from cache key data."""
serialized = json.dumps(cache_key_data, sort_keys=True)
return hashlib.sha256(serialized.encode()).hexdigest()

def _get_cache_path(
self,
mocker_type: str,
eval_set_id: str,
eval_item_id: str,
cache_key: str,
function_name: str,
) -> Path:
"""Get the file path for a cache entry."""
return (
self.cache_dir
/ mocker_type
/ eval_set_id
/ eval_item_id
/ function_name
/ f"{cache_key}.json"
)

def get(
self,
mocker_type: str,
eval_set_id: str,
eval_item_id: str,
cache_key_data: Dict[str, Any],
function_name: str,
) -> Optional[Any]:
"""Retrieve a cached response."""
cache_key = self._compute_cache_key(cache_key_data)
cache_path = self._get_cache_path(
mocker_type, eval_set_id, eval_item_id, cache_key, function_name
)

if not cache_path.exists():
return None

with open(cache_path, "r") as f:
cached_response = json.load(f)

return cached_response

def set(
self,
mocker_type: str,
eval_set_id: str,
eval_item_id: str,
cache_key_data: Dict[str, Any],
response: Any,
function_name: str,
) -> None:
"""Store a response in the cache."""
cache_key = self._compute_cache_key(cache_key_data)
cache_path = self._get_cache_path(
mocker_type, eval_set_id, eval_item_id, cache_key, function_name
)

cache_path.parent.mkdir(parents=True, exist_ok=True)

with open(cache_path, "w") as f:
json.dump(response, f)
50 changes: 42 additions & 8 deletions src/uipath/_cli/_evals/mocks/input_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,24 @@ async def generate_llm_input(
input_schema: Dict[str, Any],
) -> Dict[str, Any]:
"""Generate synthetic input using an LLM based on the evaluation context."""
from .mocks import cache_manager_context

try:
llm = UiPath().llm
cache_manager = cache_manager_context.get()

prompt = get_input_mocking_prompt(
input_schema=json.dumps(input_schema, indent=2),
input_generation_instructions=evaluation_item.input_mocking_strategy.prompt
prompt_generation_args = {
"input_schema": json.dumps(input_schema),
"input_generation_instructions": evaluation_item.input_mocking_strategy.prompt
if evaluation_item.input_mocking_strategy
else "",
expected_behavior=evaluation_item.expected_agent_behavior or "",
expected_output=json.dumps(evaluation_item.expected_output, indent=2)
"expected_behavior": evaluation_item.expected_agent_behavior or "",
"expected_output": json.dumps(evaluation_item.expected_output)
if evaluation_item.expected_output
else "",
)
}

prompt = get_input_mocking_prompt(**prompt_generation_args)

response_format = {
"type": "json_schema",
Expand All @@ -92,15 +97,44 @@ async def generate_llm_input(
else {}
)

if cache_manager is not None:
cache_key_data = {
"response_format": response_format,
"completion_kwargs": completion_kwargs,
"prompt_generation_args": prompt_generation_args,
}

cached_response = cache_manager.get(
mocker_type="input_mocker",
eval_set_id=evaluation_item.eval_set_id,
eval_item_id=evaluation_item.id,
cache_key_data=cache_key_data,
function_name="generate_llm_input",
)

if cached_response is not None:
return cached_response

response = await llm.chat_completions(
[{"role": "user", "content": prompt}],
response_format=response_format,
**completion_kwargs,
)

generated_input_str = response.choices[0].message.content

return json.loads(generated_input_str)
result = json.loads(generated_input_str)

if cache_manager is not None:
cache_manager.set(
mocker_type="input_mocker",
eval_set_id=evaluation_item.eval_set_id,
eval_item_id=evaluation_item.id,
cache_key_data=cache_key_data,
response=result,
function_name="generate_llm_input",
)

return result
except json.JSONDecodeError as e:
raise UiPathInputMockingError(
f"Failed to parse LLM response as JSON: {str(e)}"
Expand Down
41 changes: 38 additions & 3 deletions src/uipath/_cli/_evals/mocks/llm_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ async def response(
from uipath._services.llm_gateway_service import _cleanup_schema

from .mocks import (
cache_manager_context,
evaluation_context,
execution_id_context,
span_collector_context,
Expand Down Expand Up @@ -161,7 +162,7 @@ class OutputSchema(BaseModel):
},
"testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt,
}
prompt_input = {
prompt_generation_args = {
k: json.dumps(pydantic_to_dict_safe(v))
for k, v in prompt_input.items()
}
Expand All @@ -171,11 +172,33 @@ class OutputSchema(BaseModel):
if model_parameters
else {}
)

formatted_prompt = PROMPT.format(**prompt_generation_args)

cache_key_data = {
"response_format": response_format,
"completion_kwargs": completion_kwargs,
"prompt_generation_args": prompt_generation_args,
}

cache_manager = cache_manager_context.get()
if cache_manager is not None:
cached_response = cache_manager.get(
mocker_type="llm_mocker",
eval_set_id=self.evaluation_item.eval_set_id,
eval_item_id=self.evaluation_item.id,
cache_key_data=cache_key_data,
function_name=function_name,
)

if cached_response is not None:
return cached_response

response = await llm.chat_completions(
[
{
"role": "user",
"content": PROMPT.format(**prompt_input),
"content": formatted_prompt,
},
],
response_format=response_format,
Expand All @@ -184,7 +207,19 @@ class OutputSchema(BaseModel):
mocked_response = OutputSchema(
**json.loads(response.choices[0].message.content)
)
return mocked_response.model_dump(mode="json")["response"]
result = mocked_response.model_dump(mode="json")["response"]

if cache_manager is not None:
cache_manager.set(
mocker_type="llm_mocker",
eval_set_id=self.evaluation_item.eval_set_id,
eval_item_id=self.evaluation_item.id,
cache_key_data=cache_key_data,
response=result,
function_name=function_name,
)

return result
except Exception as e:
raise UiPathMockResponseGenerationError() from e
else:
Expand Down
7 changes: 7 additions & 0 deletions src/uipath/_cli/_evals/mocks/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from uipath._cli._evals._models._evaluation_set import EvaluationItem
from uipath._cli._evals._span_collection import ExecutionSpanCollector
from uipath._cli._evals.mocks.cache_manager import CacheManager
from uipath._cli._evals.mocks.mocker import Mocker, UiPathNoMockFoundError
from uipath._cli._evals.mocks.mocker_factory import MockerFactory

Expand All @@ -26,6 +27,11 @@
"execution_id", default=None
)

# Cache manager for LLM and input mocker responses
cache_manager_context: ContextVar[Optional[CacheManager]] = ContextVar(
"cache_manager", default=None
)

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -54,6 +60,7 @@ def clear_execution_context() -> None:
mocker_context.set(None)
span_collector_context.set(None)
execution_id_context.set(None)
cache_manager_context.set(None)


async def get_mocked_response(
Expand Down
Loading