UiPath · bai-uipath · Oct 20, 2025
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -11,6 +11,7 @@
 from opentelemetry.sdk.trace import ReadableSpan, Span
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
 
+from uipath._cli._evals.mocks.cache_manager import CacheManager
 from uipath._cli._evals.mocks.input_mocker import (
     generate_llm_input,
 )
@@ -49,6 +50,7 @@
 )
 from ._span_collection import ExecutionSpanCollector
 from .mocks.mocks import (
+    cache_manager_context,
     clear_execution_context,
     set_execution_context,
 )
@@ -322,6 +324,9 @@ async def _execute_eval(
         evaluators: List[BaseEvaluator[Any]],
         event_bus: EventBus,
     ) -> EvaluationRunResult:
+        if cache_manager_context.get() is None:
+            cache_manager_context.set(CacheManager())
+
         # Generate LLM-based input if input_mocking_strategy is defined
         if eval_item.input_mocking_strategy:
             eval_item = await self._generate_input_for_eval(eval_item)

diff --git a/src/uipath/_cli/_evals/mocks/cache_manager.py b/src/uipath/_cli/_evals/mocks/cache_manager.py
@@ -0,0 +1,79 @@
+"""Cache manager for LLM and input mocker responses."""
+
+import hashlib
+import json
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+
+class CacheManager:
+    """Manages file-based caching for LLM and input mocker responses."""
+
+    def __init__(self, cache_dir: Optional[Path] = None):
+        """Initialize the cache manager."""
+        self.cache_dir = cache_dir or (Path.cwd() / ".uipath" / "eval_cache")
+
+    def _compute_cache_key(self, cache_key_data: Dict[str, Any]) -> str:
+        """Compute a hash from cache key data."""
+        serialized = json.dumps(cache_key_data, sort_keys=True)
+        return hashlib.sha256(serialized.encode()).hexdigest()
+
+    def _get_cache_path(
+        self,
+        mocker_type: str,
+        eval_set_id: str,
+        eval_item_id: str,
+        cache_key: str,
+        function_name: str,
+    ) -> Path:
+        """Get the file path for a cache entry."""
+        return (
+            self.cache_dir
+            / mocker_type
+            / eval_set_id
+            / eval_item_id
+            / function_name
+            / f"{cache_key}.json"
+        )
+
+    def get(
+        self,
+        mocker_type: str,
+        eval_set_id: str,
+        eval_item_id: str,
+        cache_key_data: Dict[str, Any],
+        function_name: str,
+    ) -> Optional[Any]:
+        """Retrieve a cached response."""
+        cache_key = self._compute_cache_key(cache_key_data)
+        cache_path = self._get_cache_path(
+            mocker_type, eval_set_id, eval_item_id, cache_key, function_name
+        )
+
+        if not cache_path.exists():
+            return None
+
+        with open(cache_path, "r") as f:
+            cached_response = json.load(f)
+
+        return cached_response
+
+    def set(
+        self,
+        mocker_type: str,
+        eval_set_id: str,
+        eval_item_id: str,
+        cache_key_data: Dict[str, Any],
+        response: Any,
+        function_name: str,
+    ) -> None:
+        """Store a response in the cache."""
+        cache_key = self._compute_cache_key(cache_key_data)
+        cache_path = self._get_cache_path(
+            mocker_type, eval_set_id, eval_item_id, cache_key, function_name
+        )
+
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(cache_path, "w") as f:
+            json.dump(response, f)
diff --git a/src/uipath/_cli/_evals/mocks/input_mocker.py b/src/uipath/_cli/_evals/mocks/input_mocker.py
@@ -58,19 +58,24 @@ async def generate_llm_input(
     input_schema: Dict[str, Any],
 ) -> Dict[str, Any]:
     """Generate synthetic input using an LLM based on the evaluation context."""
+    from .mocks import cache_manager_context
+
     try:
         llm = UiPath().llm
+        cache_manager = cache_manager_context.get()
 
-        prompt = get_input_mocking_prompt(
-            input_schema=json.dumps(input_schema, indent=2),
-            input_generation_instructions=evaluation_item.input_mocking_strategy.prompt
+        prompt_generation_args = {
+            "input_schema": json.dumps(input_schema),
+            "input_generation_instructions": evaluation_item.input_mocking_strategy.prompt
             if evaluation_item.input_mocking_strategy
             else "",
-            expected_behavior=evaluation_item.expected_agent_behavior or "",
-            expected_output=json.dumps(evaluation_item.expected_output, indent=2)
+            "expected_behavior": evaluation_item.expected_agent_behavior or "",
+            "expected_output": json.dumps(evaluation_item.expected_output)
             if evaluation_item.expected_output
             else "",
-        )
+        }
+
+        prompt = get_input_mocking_prompt(**prompt_generation_args)
 
         response_format = {
             "type": "json_schema",
@@ -92,15 +97,44 @@ async def generate_llm_input(
             else {}
         )
 
+        if cache_manager is not None:
+            cache_key_data = {
+                "response_format": response_format,
+                "completion_kwargs": completion_kwargs,
+                "prompt_generation_args": prompt_generation_args,
+            }
+
+            cached_response = cache_manager.get(
+                mocker_type="input_mocker",
+                eval_set_id=evaluation_item.eval_set_id,
+                eval_item_id=evaluation_item.id,
+                cache_key_data=cache_key_data,
+                function_name="generate_llm_input",
+            )
+
+            if cached_response is not None:
+                return cached_response
+
         response = await llm.chat_completions(
             [{"role": "user", "content": prompt}],
             response_format=response_format,
             **completion_kwargs,
         )
 
         generated_input_str = response.choices[0].message.content
-
-        return json.loads(generated_input_str)
+        result = json.loads(generated_input_str)
+
+        if cache_manager is not None:
+            cache_manager.set(
+                mocker_type="input_mocker",
+                eval_set_id=evaluation_item.eval_set_id,
+                eval_item_id=evaluation_item.id,
+                cache_key_data=cache_key_data,
+                response=result,
+                function_name="generate_llm_input",
+            )
+
+        return result
     except json.JSONDecodeError as e:
         raise UiPathInputMockingError(
             f"Failed to parse LLM response as JSON: {str(e)}"

diff --git a/src/uipath/_cli/_evals/mocks/llm_mocker.py b/src/uipath/_cli/_evals/mocks/llm_mocker.py
@@ -97,6 +97,7 @@ async def response(
             from uipath._services.llm_gateway_service import _cleanup_schema
 
             from .mocks import (
+                cache_manager_context,
                 evaluation_context,
                 execution_id_context,
                 span_collector_context,
@@ -161,7 +162,7 @@ class OutputSchema(BaseModel):
                     },
                     "testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt,
                 }
-                prompt_input = {
+                prompt_generation_args = {
                     k: json.dumps(pydantic_to_dict_safe(v))
                     for k, v in prompt_input.items()
                 }
@@ -171,11 +172,33 @@ class OutputSchema(BaseModel):
                     if model_parameters
                     else {}
                 )
+
+                formatted_prompt = PROMPT.format(**prompt_generation_args)
+
+                cache_key_data = {
+                    "response_format": response_format,
+                    "completion_kwargs": completion_kwargs,
+                    "prompt_generation_args": prompt_generation_args,
+                }
+
+                cache_manager = cache_manager_context.get()
+                if cache_manager is not None:
+                    cached_response = cache_manager.get(
+                        mocker_type="llm_mocker",
+                        eval_set_id=self.evaluation_item.eval_set_id,
+                        eval_item_id=self.evaluation_item.id,
+                        cache_key_data=cache_key_data,
+                        function_name=function_name,
+                    )
+
+                    if cached_response is not None:
+                        return cached_response
+
                 response = await llm.chat_completions(
                     [
                         {
                             "role": "user",
-                            "content": PROMPT.format(**prompt_input),
+                            "content": formatted_prompt,
                         },
                     ],
                     response_format=response_format,
@@ -184,7 +207,19 @@ class OutputSchema(BaseModel):
                 mocked_response = OutputSchema(
                     **json.loads(response.choices[0].message.content)
                 )
-                return mocked_response.model_dump(mode="json")["response"]
+                result = mocked_response.model_dump(mode="json")["response"]
+
+                if cache_manager is not None:
+                    cache_manager.set(
+                        mocker_type="llm_mocker",
+                        eval_set_id=self.evaluation_item.eval_set_id,
+                        eval_item_id=self.evaluation_item.id,
+                        cache_key_data=cache_key_data,
+                        response=result,
+                        function_name=function_name,
+                    )
+
+                return result
             except Exception as e:
                 raise UiPathMockResponseGenerationError() from e
         else:

diff --git a/src/uipath/_cli/_evals/mocks/mocks.py b/src/uipath/_cli/_evals/mocks/mocks.py
@@ -6,6 +6,7 @@
 
 from uipath._cli._evals._models._evaluation_set import EvaluationItem
 from uipath._cli._evals._span_collection import ExecutionSpanCollector
+from uipath._cli._evals.mocks.cache_manager import CacheManager
 from uipath._cli._evals.mocks.mocker import Mocker, UiPathNoMockFoundError
 from uipath._cli._evals.mocks.mocker_factory import MockerFactory
 
@@ -26,6 +27,11 @@
     "execution_id", default=None
 )
 
+# Cache manager for LLM and input mocker responses
+cache_manager_context: ContextVar[Optional[CacheManager]] = ContextVar(
+    "cache_manager", default=None
+)
+
 logger = logging.getLogger(__name__)
 
 
@@ -54,6 +60,7 @@ def clear_execution_context() -> None:
     mocker_context.set(None)
     span_collector_context.set(None)
     execution_id_context.set(None)
+    cache_manager_context.set(None)
 
 
 async def get_mocked_response(