From 4f72996e9d81f09228ca944cd9eae0e629729787 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Tue, 9 Sep 2025 19:55:33 -0700 Subject: [PATCH 01/12] Review Mode (Core) --- codex-rs/core/src/codex.rs | 262 ++++++++- codex-rs/core/src/config.rs | 22 + codex-rs/core/src/review_prompt.md | 3 + codex-rs/core/src/rollout/policy.rs | 4 +- codex-rs/core/tests/suite/mod.rs | 1 + codex-rs/core/tests/suite/review.rs | 531 ++++++++++++++++++ .../src/event_processor_with_human_output.rs | 2 + codex-rs/exec/src/lib.rs | 1 + .../mcp-server/src/codex_message_processor.rs | 1 + codex-rs/mcp-server/src/codex_tool_config.rs | 1 + codex-rs/mcp-server/src/codex_tool_runner.rs | 4 +- codex-rs/protocol/src/protocol.rs | 43 ++ codex-rs/tui/src/chatwidget.rs | 12 +- codex-rs/tui/src/lib.rs | 1 + 14 files changed, 866 insertions(+), 22 deletions(-) create mode 100644 codex-rs/core/src/review_prompt.md create mode 100644 codex-rs/core/tests/suite/review.rs diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 5bd5c50413..bf6ac11359 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -33,6 +33,9 @@ use tracing::info; use tracing::trace; use tracing::warn; +/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize. +const REVIEW_PROMPT: &str = include_str!("review_prompt.md"); + use crate::ModelProviderInfo; use crate::apply_patch; use crate::apply_patch::ApplyPatchExec; @@ -95,6 +98,7 @@ use crate::protocol::Op; use crate::protocol::PatchApplyBeginEvent; use crate::protocol::PatchApplyEndEvent; use crate::protocol::ReviewDecision; +use crate::protocol::ReviewOutputEvent; use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::StreamErrorEvent; @@ -265,6 +269,7 @@ struct State { pending_input: Vec, history: ConversationHistory, token_info: Option, + is_review_mode: bool, } /// Context for an initialized model agent @@ -305,6 +310,7 @@ pub(crate) struct TurnContext { pub(crate) sandbox_policy: SandboxPolicy, pub(crate) shell_environment_policy: ShellEnvironmentPolicy, pub(crate) tools_config: ToolsConfig, + pub(crate) is_review_mode: bool, } impl TurnContext { @@ -475,6 +481,7 @@ impl Session { sandbox_policy, shell_environment_policy: config.shell_environment_policy.clone(), cwd, + is_review_mode: false, }; let sess = Arc::new(Session { @@ -951,6 +958,13 @@ impl Session { state.pending_approvals.clear(); state.pending_input.clear(); if let Some(task) = state.current_task.take() { + if state.is_review_mode { + state.is_review_mode = false; + let _ = self.tx_event.try_send(Event { + id: task.sub_id.clone(), + msg: EventMsg::ExitedReviewMode(None), + }); + } task.abort(TurnAbortReason::Interrupted); } } @@ -1158,6 +1172,7 @@ async fn submission_loop( sandbox_policy: new_sandbox_policy.clone(), shell_environment_policy: prev.shell_environment_policy.clone(), cwd: new_cwd.clone(), + is_review_mode: false, }; // Install the new persistent context for subsequent tasks/turns. @@ -1239,6 +1254,7 @@ async fn submission_loop( sandbox_policy, shell_environment_policy: turn_context.shell_environment_policy.clone(), cwd, + is_review_mode: false, }; // TODO: record the new environment context in the conversation history // no current task, spawn a new one with the per‑turn context @@ -1390,6 +1406,16 @@ async fn submission_loop( }; sess.send_event(event).await; } + Op::Review { prompt } => { + spawn_review_thread( + sess.clone(), + config.clone(), + Arc::clone(&turn_context), + sub.id, + prompt, + ) + .await; + } _ => { // Ignore unknown ops; enum is non_exhaustive to allow extensions. } @@ -1398,6 +1424,90 @@ async fn submission_loop( debug!("Agent loop exited"); } +/// Spawn a review thread using the given prompt. +async fn spawn_review_thread( + sess: Arc, + config: Arc, + parent_turn_context: Arc, + sub_id: String, + prompt: String, +) { + let model = config.review_model.clone(); + let review_model_family = find_family_for_model(&model) + .unwrap_or_else(|| parent_turn_context.client.get_model_family()); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_family: &review_model_family, + approval_policy: parent_turn_context.approval_policy, + sandbox_policy: parent_turn_context.sandbox_policy.clone(), + include_plan_tool: false, + include_apply_patch_tool: config.include_apply_patch_tool, + include_web_search_request: false, + use_streamable_shell_tool: false, + include_view_image_tool: false, + }); + let base_instructions = Some(REVIEW_PROMPT.to_string()); + + let provider = parent_turn_context.client.get_provider(); + let auth_manager = parent_turn_context.client.get_auth_manager(); + let model_family = review_model_family.clone(); + + // Build per‑turn client with the requested model/family. + let mut per_turn_config = (*config).clone(); + per_turn_config.model = model.clone(); + per_turn_config.model_family = model_family.clone(); + if let Some(model_info) = get_model_info(&model_family) { + per_turn_config.model_context_window = Some(model_info.context_window); + } + + let client = ModelClient::new( + Arc::new(per_turn_config), + auth_manager, + provider, + parent_turn_context.client.get_reasoning_effort(), + parent_turn_context.client.get_reasoning_summary(), + sess.conversation_id, + ); + + let review_turn_context = TurnContext { + client, + tools_config, + user_instructions: None, + base_instructions, + approval_policy: parent_turn_context.approval_policy, + sandbox_policy: parent_turn_context.sandbox_policy.clone(), + shell_environment_policy: parent_turn_context.shell_environment_policy.clone(), + cwd: parent_turn_context.cwd.clone(), + is_review_mode: true, + }; + + // Seed the child task with the review prompt as the initial user message. + let input: Vec = vec![InputItem::Text { + text: prompt.clone(), + }]; + let tc = Arc::new(review_turn_context); + + // Clone sub_id for the upcoming announcement before moving it into the task. + let sub_id_for_event = sub_id.clone(); + let task = AgentTask::spawn(sess.clone(), tc.clone(), sub_id, input); + sess.set_task(task); + + { + // Mark session as being in review mode before notifying UIs. + let mut st = sess.state.lock_unchecked(); + st.is_review_mode = true; + } + + // Announce entering review mode so UIs can switch modes. + trace!("emitting EnteredReviewMode"); + let _ = sess + .tx_event + .send(Event { + id: sub_id_for_event, + msg: EventMsg::EnteredReviewMode, + }) + .await; +} + /// Takes a user message as input and runs a loop where, at each turn, the model /// replies with either: /// @@ -1411,6 +1521,10 @@ async fn submission_loop( /// back to the model in the next turn. /// - If the model sends only an assistant message, we record it in the /// conversation history and consider the task complete. +/// +/// Review mode: when `turn_context.is_review_mode` is true, the turn runs in an +/// isolated in-memory thread without the parent session's prior history or +/// user_instructions. Emits ExitedReviewMode upon final review message. async fn run_task( sess: Arc, turn_context: &TurnContext, @@ -1429,8 +1543,17 @@ async fn run_task( sess.send_event(event).await; let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input); - sess.record_input_and_rollout_usermsg(&initial_input_for_turn) - .await; + // For review threads, keep an isolated in-memory history so the + // model sees a fresh conversation without the parent session's history. + // For normal turns, continue recording to the session history as before. + let is_review_mode = turn_context.is_review_mode; + let mut review_thread_history: Vec = Vec::new(); + if is_review_mode { + review_thread_history.push(initial_input_for_turn.clone().into()); + } else { + sess.record_input_and_rollout_usermsg(&initial_input_for_turn) + .await; + } let mut last_agent_message: Option = None; // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains @@ -1446,14 +1569,29 @@ async fn run_task( .into_iter() .map(ResponseItem::from) .collect::>(); - sess.record_conversation_items(&pending_input).await; + if is_review_mode { + if !pending_input.is_empty() { + review_thread_history.extend(pending_input.clone()); + } + } else { + sess.record_conversation_items(&pending_input).await; + } - // Construct the input that we will send to the model. When using the - // Chat completions API (or ZDR clients), the model needs the full + // Construct the input that we will send to the model. + // + // - For review threads, use the isolated in-memory history so the + // model sees a fresh conversation (no parent history/user_instructions). + // + // - For normal turns, use the session's full history. When using the + // chat completions API (or ZDR clients), the model needs the full // conversation history on each turn. The rollout file, however, should // only record the new items that originated in this turn so that it // represents an append-only log without duplicates. - let turn_input: Vec = sess.turn_input_with_history(pending_input); + let turn_input: Vec = if is_review_mode { + review_thread_history.clone() + } else { + sess.turn_input_with_history(pending_input) + }; let turn_input_messages: Vec = turn_input .iter() @@ -1573,8 +1711,13 @@ async fn run_task( // Only attempt to take the lock if there is something to record. if !items_to_record_in_conversation_history.is_empty() { - sess.record_conversation_items(&items_to_record_in_conversation_history) - .await; + if is_review_mode { + review_thread_history + .extend(items_to_record_in_conversation_history.clone()); + } else { + sess.record_conversation_items(&items_to_record_in_conversation_history) + .await; + } } if responses.is_empty() { @@ -1604,7 +1747,56 @@ async fn run_task( } } } + + // If this was a review thread and we have a final assistant message, + // try to parse it as a ReviewOutput. + // + // If parsing fails, construct a minimal ReviewOutputEvent using the plain + // text as the overall explanation. + if turn_context.is_review_mode + && let Some(text) = last_agent_message.clone() + { + { + let mut st = sess.state.lock_unchecked(); + st.is_review_mode = false; + } + + // Emit ExitedReviewMode(Some) to signal completion with a result. + let review = parse_review_output_event(&text); + trace!("emitting ExitedReviewMode(Some(..))"); + let _ = sess + .tx_event + .send(Event { + id: sub_id.clone(), + msg: EventMsg::ExitedReviewMode(Some(review)), + }) + .await; + } + sess.remove_task(&sub_id); + // If this was a review thread, and we haven't already emitted an exit event + // (i.e., still in review mode), send a fallback ExitedReviewMode(None). + if turn_context.is_review_mode { + let should_emit_fallback = { + let mut st = sess.state.lock_unchecked(); + if st.is_review_mode { + st.is_review_mode = false; + true + } else { + false + } + }; + if should_emit_fallback { + trace!("emitting ExitedReviewMode(None)"); + let _ = sess + .tx_event + .send(Event { + id: sub_id.clone(), + msg: EventMsg::ExitedReviewMode(None), + }) + .await; + } + } let event = Event { id: sub_id, msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }), @@ -1612,6 +1804,34 @@ async fn run_task( sess.send_event(event).await; } +/// Parse the review output; when not valid JSON, build a structured +/// fallback that carries the plain text as the overall explanation. +/// +/// Returns: a ReviewOutputEvent parsed from JSON or a fallback populated from text. +fn parse_review_output_event(text: &str) -> ReviewOutputEvent { + // Try direct parse first + if let Ok(ev) = serde_json::from_str::(text) { + return ev; + } + // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object + if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}')) + && start < end + { + let slice = &text[start..=end]; + if let Ok(ev) = serde_json::from_str::(slice) { + return ev; + } + } + // Not JSON – return a structured ReviewOutputEvent that carries + // the plain text as the overall explanation. + ReviewOutputEvent { + findings: Vec::new(), + overall_correctness: String::new(), + overall_explanation: text.to_string(), + overall_confidence_score: 1.0, + } +} + async fn run_turn( sess: &Session, turn_context: &TurnContext, @@ -1827,11 +2047,17 @@ async fn try_run_turn( return Ok(output); } ResponseEvent::OutputTextDelta(delta) => { - let event = Event { - id: sub_id.to_string(), - msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }), - }; - sess.send_event(event).await; + // In review child threads, suppress assistant text deltas; the + // UI will show a selection popup from the final ReviewOutput. + if !turn_context.is_review_mode { + let event = Event { + id: sub_id.to_string(), + msg: EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }), + }; + sess.send_event(event).await; + } else { + trace!("suppressing OutputTextDelta in review mode"); + } } ResponseEvent::ReasoningSummaryDelta(delta) => { let event = Event { @@ -2048,7 +2274,15 @@ async fn handle_response_item( ResponseItem::Message { .. } | ResponseItem::Reasoning { .. } | ResponseItem::WebSearchCall { .. } => { - let msgs = map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning); + // In review child threads, suppress assistant message events but + // keep reasoning/web search. + let msgs = match &item { + ResponseItem::Message { .. } if turn_context.is_review_mode => { + trace!("suppressing assistant Message in review mode"); + Vec::new() + } + _ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning), + }; for msg in msgs { let event = Event { id: sub_id.to_string(), diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 338dd4a2dd..f13e9225ad 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -32,6 +32,7 @@ use toml::Value as TomlValue; use toml_edit::DocumentMut; const OPENAI_DEFAULT_MODEL: &str = "gpt-5"; +const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5"; /// Maximum number of bytes of the documentation that will be embedded. Larger /// files are *silently truncated* to this size so we do not take up too much of @@ -46,6 +47,9 @@ pub struct Config { /// Optional override of model selection. pub model: String, + /// Model used specifically for review sessions. Defaults to "gpt-5". + pub review_model: String, + pub model_family: ModelFamily, /// Size of the context window for the model, in tokens. @@ -385,6 +389,8 @@ fn apply_toml_override(root: &mut TomlValue, path: &str, value: TomlValue) { pub struct ConfigToml { /// Optional override of model selection. pub model: Option, + /// Review model override used by the `/review` feature. + pub review_model: Option, /// Provider to use from the model_providers map. pub model_provider: Option, @@ -612,6 +618,7 @@ impl ConfigToml { #[derive(Default, Debug, Clone)] pub struct ConfigOverrides { pub model: Option, + pub review_model: Option, pub cwd: Option, pub approval_policy: Option, pub sandbox_mode: Option, @@ -639,6 +646,7 @@ impl Config { // Destructure ConfigOverrides fully to ensure all overrides are applied. let ConfigOverrides { model, + review_model: override_review_model, cwd, approval_policy, sandbox_mode, @@ -765,8 +773,14 @@ impl Config { Self::get_base_instructions(experimental_instructions_path, &resolved_cwd)?; let base_instructions = base_instructions.or(file_base_instructions); + // Default review model when not set in config; allow CLI override to take precedence. + let review_model = override_review_model + .or(cfg.review_model) + .unwrap_or_else(default_review_model); + let config = Self { model, + review_model, model_family, model_context_window, model_max_output_tokens, @@ -888,6 +902,10 @@ fn default_model() -> String { OPENAI_DEFAULT_MODEL.to_string() } +fn default_review_model() -> String { + OPENAI_DEFAULT_REVIEW_MODEL.to_string() +} + /// Returns the path to the Codex configuration directory, which can be /// specified by the `CODEX_HOME` environment variable. If not set, defaults to /// `~/.codex`. @@ -1160,6 +1178,7 @@ model_verbosity = "high" assert_eq!( Config { model: "o3".to_string(), + review_model: "gpt-5".to_string(), model_family: find_family_for_model("o3").expect("known model slug"), model_context_window: Some(200_000), model_max_output_tokens: Some(100_000), @@ -1216,6 +1235,7 @@ model_verbosity = "high" )?; let expected_gpt3_profile_config = Config { model: "gpt-3.5-turbo".to_string(), + review_model: "gpt-5".to_string(), model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"), model_context_window: Some(16_385), model_max_output_tokens: Some(4_096), @@ -1287,6 +1307,7 @@ model_verbosity = "high" )?; let expected_zdr_profile_config = Config { model: "o3".to_string(), + review_model: "gpt-5".to_string(), model_family: find_family_for_model("o3").expect("known model slug"), model_context_window: Some(200_000), model_max_output_tokens: Some(100_000), @@ -1344,6 +1365,7 @@ model_verbosity = "high" )?; let expected_gpt5_profile_config = Config { model: "gpt-5".to_string(), + review_model: "gpt-5".to_string(), model_family: find_family_for_model("gpt-5").expect("known model slug"), model_context_window: Some(272_000), model_max_output_tokens: Some(128_000), diff --git a/codex-rs/core/src/review_prompt.md b/codex-rs/core/src/review_prompt.md new file mode 100644 index 0000000000..d60e8c28a1 --- /dev/null +++ b/codex-rs/core/src/review_prompt.md @@ -0,0 +1,3 @@ +# Review guidelines: + +(Will fill out later, pending approval.) diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 676f8e908d..db295d4431 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -36,7 +36,9 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::AgentMessage(_) | EventMsg::AgentReasoning(_) | EventMsg::AgentReasoningRawContent(_) - | EventMsg::TokenCount(_) => true, + | EventMsg::TokenCount(_) + | EventMsg::EnteredReviewMode + | EventMsg::ExitedReviewMode(_) => true, EventMsg::Error(_) | EventMsg::TaskStarted(_) | EventMsg::TaskComplete(_) diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index aabf83bbe3..8f3cd7771d 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -8,6 +8,7 @@ mod exec_stream_events; mod fork_conversation; mod live_cli; mod prompt_caching; +mod review; mod seatbelt; mod stream_error_allows_next_turn; mod stream_no_completed; diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs new file mode 100644 index 0000000000..d1825b6bde --- /dev/null +++ b/codex-rs/core/tests/suite/review.rs @@ -0,0 +1,531 @@ +use codex_core::CodexAuth; +use codex_core::CodexConversation; +use codex_core::ConversationManager; +use codex_core::ModelProviderInfo; +use codex_core::built_in_model_providers; +use codex_core::config::Config; +use codex_core::protocol::EventMsg; +use codex_core::protocol::InputItem; +use codex_core::protocol::Op; +use codex_core::protocol::ReviewCodeLocation; +use codex_core::protocol::ReviewFinding; +use codex_core::protocol::ReviewLineRange; +use codex_core::protocol::ReviewOutputEvent; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use core_test_support::load_default_config_for_test; +use core_test_support::load_sse_fixture_with_id_from_str; +use core_test_support::wait_for_event; +use pretty_assertions::assert_eq; +use std::path::PathBuf; +use std::sync::Arc; +use tempfile::TempDir; +use tokio::io::AsyncWriteExt as _; +use uuid::Uuid; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +/// Verify that submitting `Op::Review` spawns a child task and emits +/// EnteredReviewMode -> ExitedReviewMode(None) -> TaskComplete +/// in that order when the model returns a structured review JSON payload. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_op_emits_lifecycle_and_review_output() { + // Skip under Codex sandbox network restrictions. + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + // Start mock Responses API server. Return a single assistant message whose + // text is a JSON-encoded ReviewOutputEvent. + let review_json = serde_json::json!({ + "findings": [ + { + "title": "Prefer Stylize helpers", + "body": "Use .dim()/.bold() chaining instead of manual Style where possible.", + "confidence_score": 0.9, + "priority": 1, + "code_location": { + "absolute_file_path": "/tmp/file.rs", + "line_range": {"start": 10, "end": 20} + } + } + ], + "overall_correctness": "good", + "overall_explanation": "All good with some improvements suggested.", + "overall_confidence_score": 0.8 + }) + .to_string(); + let sse_template = r#"[ + {"type":"response.output_item.done", "item":{ + "type":"message", "role":"assistant", + "content":[{"type":"output_text","text":__REVIEW__}] + }}, + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let review_json_escaped = serde_json::to_string(&review_json).unwrap(); + let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped); + let server = start_responses_server_with_sse(&sse_raw, 1).await; + let codex_home = TempDir::new().unwrap(); + let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await; + + // Submit review request. + codex + .submit(Op::Review { + prompt: "Please review my changes".to_string(), + }) + .await + .unwrap(); + + // Verify lifecycle: Entered -> Exited(Some(review)) -> TaskComplete. + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await; + let review = match closed { + EventMsg::ExitedReviewMode(Some(r)) => r, + other => panic!("expected ExitedReviewMode(Some(..)), got {other:?}"), + }; + + // Deep compare full structure using PartialEq (floats are f32 on both sides). + let expected = ReviewOutputEvent { + findings: vec![ReviewFinding { + title: "Prefer Stylize helpers".to_string(), + body: "Use .dim()/.bold() chaining instead of manual Style where possible.".to_string(), + confidence_score: 0.9, + priority: 1, + code_location: ReviewCodeLocation { + absolute_file_path: PathBuf::from("/tmp/file.rs"), + line_range: ReviewLineRange { start: 10, end: 20 }, + }, + }], + overall_correctness: "good".to_string(), + overall_explanation: "All good with some improvements suggested.".to_string(), + overall_confidence_score: 0.8, + }; + assert_eq!(expected, review); + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + server.verify().await; +} + +/// When the model returns plain text that is not JSON, ensure the child +/// lifecycle still occurs and the plain text is surfaced via +/// ExitedReviewMode(Some(..)) as the overall_explanation. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_op_with_plain_text_emits_review_fallback() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + let sse_raw = r#"[ + {"type":"response.output_item.done", "item":{ + "type":"message", "role":"assistant", + "content":[{"type":"output_text","text":"just plain text"}] + }}, + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let server = start_responses_server_with_sse(sse_raw, 1).await; + let codex_home = TempDir::new().unwrap(); + let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await; + + codex + .submit(Op::Review { + prompt: "Plain text review".to_string(), + }) + .await + .unwrap(); + + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await; + let review = match closed { + EventMsg::ExitedReviewMode(Some(r)) => r, + other => panic!("expected ExitedReviewMode(Some(..)), got {other:?}"), + }; + + // Expect a structured fallback carrying the plain text. + let expected = ReviewOutputEvent { + findings: vec![], + overall_correctness: String::new(), + overall_explanation: "just plain text".to_string(), + overall_confidence_score: 1.0, + }; + assert_eq!(expected, review); + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + server.verify().await; +} + +/// When the model returns structured JSON in a review, ensure no AgentMessage +/// is emitted; the UI consumes the structured result via ExitedReviewMode. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_does_not_emit_agent_message_on_structured_output() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + let review_json = serde_json::json!({ + "findings": [ + { + "title": "Example", + "body": "Structured review output.", + "confidence_score": 0.5, + "priority": 1, + "code_location": { + "absolute_file_path": "/tmp/file.rs", + "line_range": {"start": 1, "end": 2} + } + } + ], + "overall_correctness": "ok", + "overall_explanation": "ok", + "overall_confidence_score": 0.5 + }) + .to_string(); + let sse_template = r#"[ + {"type":"response.output_item.done", "item":{ + "type":"message", "role":"assistant", + "content":[{"type":"output_text","text":__REVIEW__}] + }}, + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let review_json_escaped = serde_json::to_string(&review_json).unwrap(); + let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped); + let server = start_responses_server_with_sse(&sse_raw, 1).await; + let codex_home = TempDir::new().unwrap(); + let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await; + + codex + .submit(Op::Review { + prompt: "check structured".to_string(), + }) + .await + .unwrap(); + + // Drain events until TaskComplete; ensure none are AgentMessage. + use tokio::time::Duration; + use tokio::time::timeout; + let mut saw_entered = false; + let mut saw_exited = false; + loop { + let ev = timeout(Duration::from_secs(5), codex.next_event()) + .await + .expect("timeout waiting for event") + .expect("stream ended unexpectedly"); + match ev.msg { + EventMsg::TaskComplete(_) => break, + EventMsg::AgentMessage(_) => { + panic!("unexpected AgentMessage during review with structured output") + } + EventMsg::EnteredReviewMode => saw_entered = true, + EventMsg::ExitedReviewMode(_) => saw_exited = true, + _ => {} + } + } + assert!(saw_entered && saw_exited, "missing review lifecycle events"); + + server.verify().await; +} + +/// Ensure that when a custom `review_model` is set in the config, the review +/// request uses that model (and not the main chat model). +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_uses_custom_review_model_from_config() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + // Minimal stream: just a completed event + let sse_raw = r#"[ + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let server = start_responses_server_with_sse(sse_raw, 1).await; + let codex_home = TempDir::new().unwrap(); + // Choose a review model different from the main model; ensure it is used. + let codex = new_conversation_for_server(&server, &codex_home, |cfg| { + cfg.model = "gpt-4.1".to_string(); + cfg.review_model = "gpt-5".to_string(); + }) + .await; + + codex + .submit(Op::Review { + prompt: "use custom model".to_string(), + }) + .await + .unwrap(); + + // Wait for completion + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + // Assert the request body model equals the configured review model + let request = &server.received_requests().await.unwrap()[0]; + let body = request.body_json::().unwrap(); + assert_eq!(body["model"].as_str().unwrap(), "gpt-5"); + + server.verify().await; +} + +/// When a review session begins, it must not prepend prior chat history from +/// the parent session. The request `input` should contain only the review +/// prompt from the user. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_input_isolated_from_parent_history() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + // Mock server for the single review request + let sse_raw = r#"[ + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let server = start_responses_server_with_sse(sse_raw, 1).await; + + // Seed a parent session history via resume file with both user + assistant items. + let codex_home = TempDir::new().unwrap(); + let mut config = load_default_config_for_test(&codex_home); + config.model_provider = ModelProviderInfo { + base_url: Some(format!("{}/v1", server.uri())), + ..built_in_model_providers()["openai"].clone() + }; + + let session_file = codex_home.path().join("resume.jsonl"); + { + let mut f = tokio::fs::File::create(&session_file).await.unwrap(); + let convo_id = Uuid::new_v4(); + // Proper session_meta line (enveloped) with a conversation id + let meta_line = serde_json::json!({ + "timestamp": "2024-01-01T00:00:00.000Z", + "type": "session_meta", + "payload": { + "id": convo_id, + "timestamp": "2024-01-01T00:00:00Z", + "instructions": null, + "cwd": ".", + "originator": "test_originator", + "cli_version": "test_version" + } + }); + f.write_all(format!("{meta_line}\n").as_bytes()) + .await + .unwrap(); + + // Prior user message (enveloped response_item) + let user = codex_protocol::models::ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![codex_protocol::models::ContentItem::InputText { + text: "parent: earlier user message".to_string(), + }], + }; + let user_json = serde_json::to_value(&user).unwrap(); + let user_line = serde_json::json!({ + "timestamp": "2024-01-01T00:00:01.000Z", + "type": "response_item", + "payload": user_json + }); + f.write_all(format!("{user_line}\n").as_bytes()) + .await + .unwrap(); + + // Prior assistant message (enveloped response_item) + let assistant = codex_protocol::models::ResponseItem::Message { + id: None, + role: "assistant".to_string(), + content: vec![codex_protocol::models::ContentItem::OutputText { + text: "parent: assistant reply".to_string(), + }], + }; + let assistant_json = serde_json::to_value(&assistant).unwrap(); + let assistant_line = serde_json::json!({ + "timestamp": "2024-01-01T00:00:02.000Z", + "type": "response_item", + "payload": assistant_json + }); + f.write_all(format!("{assistant_line}\n").as_bytes()) + .await + .unwrap(); + } + config.experimental_resume = Some(session_file); + + let codex = new_conversation_for_server(&server, &codex_home, |cfg| { + // apply resume file + cfg.experimental_resume = config.experimental_resume.clone(); + }) + .await; + + // Submit review request; it must start fresh (no parent history in `input`). + let review_prompt = "Please review only this".to_string(); + codex + .submit(Op::Review { + prompt: review_prompt.clone(), + }) + .await + .unwrap(); + + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + // Assert the request `input` contains only the single review user message. + let request = &server.received_requests().await.unwrap()[0]; + let body = request.body_json::().unwrap(); + let expected_input = serde_json::json!([ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": review_prompt}] + } + ]); + assert_eq!(body["input"], expected_input); + + server.verify().await; +} + +/// After a review thread finishes, its conversation should not leak into the +/// parent session. A subsequent parent turn must not include any review +/// messages in its request `input`. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn review_history_does_not_leak_into_parent_session() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + // Respond to both the review request and the subsequent parent request. + let sse_raw = r#"[ + {"type":"response.output_item.done", "item":{ + "type":"message", "role":"assistant", + "content":[{"type":"output_text","text":"review assistant output"}] + }}, + {"type":"response.completed", "response": {"id": "__ID__"}} + ]"#; + let server = start_responses_server_with_sse(sse_raw, 2).await; + let codex_home = TempDir::new().unwrap(); + let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await; + + // 1) Run a review turn that produces an assistant message (isolated in child). + codex + .submit(Op::Review { + prompt: "Start a review".to_string(), + }) + .await + .unwrap(); + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _closed = wait_for_event(&codex, |ev| { + matches!(ev, EventMsg::ExitedReviewMode(Some(_))) + }) + .await; + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + // 2) Continue in the parent session; request input must not include any review items. + let followup = "back to parent".to_string(); + codex + .submit(Op::UserInput { + items: vec![InputItem::Text { + text: followup.clone(), + }], + }) + .await + .unwrap(); + let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + // Inspect the second request (parent turn) input contents. + // Parent turns include session initial messages (user_instructions, environment_context). + // Critically, no messages from the review thread should appear. + let requests = server.received_requests().await.unwrap(); + assert_eq!(requests.len(), 2); + let body = requests[1].body_json::().unwrap(); + let input = body["input"].as_array().expect("input array"); + + // Must include the followup as the last item for this turn + let last = input.last().expect("at least one item in input"); + assert_eq!(last["role"].as_str().unwrap(), "user"); + let last_text = last["content"][0]["text"].as_str().unwrap(); + assert_eq!(last_text, followup); + + // Ensure no review-thread content leaked into the parent request + let contains_review_prompt = input + .iter() + .any(|msg| msg["content"][0]["text"].as_str().unwrap_or_default() == "Start a review"); + let contains_review_assistant = input.iter().any(|msg| { + msg["content"][0]["text"].as_str().unwrap_or_default() == "review assistant output" + }); + assert!( + !contains_review_prompt, + "review prompt leaked into parent turn input" + ); + assert!( + !contains_review_assistant, + "review assistant output leaked into parent turn input" + ); + + server.verify().await; +} + +/// Start a mock Responses API server and mount the given SSE stream body. +async fn start_responses_server_with_sse(sse_raw: &str, expected_requests: usize) -> MockServer { + let server = MockServer::start().await; + let sse = load_sse_fixture_with_id_from_str(sse_raw, &Uuid::new_v4().to_string()); + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with( + ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(sse.clone(), "text/event-stream"), + ) + .expect(expected_requests as u64) + .mount(&server) + .await; + server +} + +/// Create a conversation configured to talk to the provided mock server. +#[expect(clippy::expect_used)] +async fn new_conversation_for_server( + server: &MockServer, + codex_home: &TempDir, + mutator: F, +) -> Arc +where + F: FnOnce(&mut Config), +{ + // Disable client retries for deterministic tests. + unsafe { + std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); + std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); + } + + let model_provider = ModelProviderInfo { + base_url: Some(format!("{}/v1", server.uri())), + ..built_in_model_providers()["openai"].clone() + }; + let mut config = load_default_config_for_test(codex_home); + config.model_provider = model_provider; + mutator(&mut config); + let conversation_manager = + ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key")); + conversation_manager + .new_conversation(config) + .await + .expect("create conversation") + .conversation +} diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index ae4708976f..4cfdd1a616 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -561,6 +561,8 @@ impl EventProcessor for EventProcessorWithHumanOutput { EventMsg::ShutdownComplete => return CodexStatus::Shutdown, EventMsg::ConversationHistory(_) => {} EventMsg::UserMessage(_) => {} + EventMsg::EnteredReviewMode => {} + EventMsg::ExitedReviewMode(_) => {} } CodexStatus::Running } diff --git a/codex-rs/exec/src/lib.rs b/codex-rs/exec/src/lib.rs index 66d0c09065..e4ee07b3ab 100644 --- a/codex-rs/exec/src/lib.rs +++ b/codex-rs/exec/src/lib.rs @@ -137,6 +137,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option) -> any // Load configuration and determine approval policy let overrides = ConfigOverrides { model, + review_model: None, config_profile, // This CLI is intended to be headless and has no affordances for asking // the user for approval. diff --git a/codex-rs/mcp-server/src/codex_message_processor.rs b/codex-rs/mcp-server/src/codex_message_processor.rs index 53e0b9d5a6..65529f1465 100644 --- a/codex-rs/mcp-server/src/codex_message_processor.rs +++ b/codex-rs/mcp-server/src/codex_message_processor.rs @@ -1145,6 +1145,7 @@ fn derive_config_from_params( } = params; let overrides = ConfigOverrides { model, + review_model: None, config_profile: profile, cwd: cwd.map(PathBuf::from), approval_policy, diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index 7b635d2dc3..d90924aa94 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -152,6 +152,7 @@ impl CodexToolCallParam { // Build the `ConfigOverrides` recognized by codex-core. let overrides = codex_core::config::ConfigOverrides { model, + review_model: None, config_profile: profile, cwd: cwd.map(PathBuf::from), approval_policy: approval_policy.map(Into::into), diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index feeb7f018a..6b5327f4c3 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -279,7 +279,9 @@ async fn run_codex_tool_session_inner( | EventMsg::TurnAborted(_) | EventMsg::ConversationHistory(_) | EventMsg::UserMessage(_) - | EventMsg::ShutdownComplete => { + | EventMsg::ShutdownComplete + | EventMsg::EnteredReviewMode + | EventMsg::ExitedReviewMode(_) => { // For now, we do not do anything extra for these // events. Note that // send(codex_event_to_notification(&event)) above has diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 2f1c46364a..23e5e134da 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -162,6 +162,10 @@ pub enum Op { /// The agent will use its existing context (either conversation history or previous response id) /// to generate a summary which will be returned as an AgentMessage event. Compact, + + /// Request a code review from the agent. + Review { prompt: String }, + /// Request to shut down codex instance. Shutdown, } @@ -500,6 +504,12 @@ pub enum EventMsg { ShutdownComplete, ConversationHistory(ConversationHistoryResponseEvent), + + /// Entered review mode. + EnteredReviewMode, + + /// Exited review mode with an optional final result to apply. + ExitedReviewMode(Option), } // Individual event payload types matching each `EventMsg` variant. @@ -804,6 +814,39 @@ pub struct ConversationHistoryResponseEvent { pub entries: Vec, } +/// Structured review result produced by a child review session. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] +pub struct ReviewOutputEvent { + pub findings: Vec, + pub overall_correctness: String, + pub overall_explanation: String, + pub overall_confidence_score: f32, +} + +/// A single review finding describing an observed issue or recommendation. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] +pub struct ReviewFinding { + pub title: String, + pub body: String, + pub confidence_score: f32, + pub priority: i32, + pub code_location: ReviewCodeLocation, +} + +/// Location of the code related to a review finding. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] +pub struct ReviewCodeLocation { + pub absolute_file_path: PathBuf, + pub line_range: ReviewLineRange, +} + +/// Inclusive line range in a file associated with the finding. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] +pub struct ReviewLineRange { + pub start: u32, + pub end: u32, +} + #[derive(Debug, Clone, Deserialize, Serialize, TS)] pub struct ExecCommandBeginEvent { /// Identifier so this can be paired with the ExecCommandEnd event. diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index dca821f255..e667d60117 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -382,9 +382,9 @@ impl ChatWidget { fn on_web_search_end(&mut self, ev: WebSearchEndEvent) { self.flush_answer_stream_with_separator(); + let WebSearchEndEvent { query, .. } = ev; self.add_to_history(history_cell::new_web_search_call(format!( - "Searched: {}", - ev.query + "Searched: {query}" ))); } @@ -810,10 +810,8 @@ impl ChatWidget { fn dispatch_command(&mut self, cmd: SlashCommand) { if !cmd.available_during_task() && self.bottom_pane.is_task_running() { - let message = format!( - "'/{}' is disabled while a task is in progress.", - cmd.command() - ); + let cmd_name = cmd.command(); + let message = format!("'/{cmd_name}' is disabled while a task is in progress."); self.add_to_history(history_cell::new_error_event(message)); self.request_redraw(); return; @@ -1087,6 +1085,8 @@ impl ChatWidget { self.app_event_tx .send(crate::app_event::AppEvent::ConversationHistory(ev)); } + EventMsg::EnteredReviewMode => {} + EventMsg::ExitedReviewMode(_) => {} } } diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 549037462c..c9077ff0c6 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -121,6 +121,7 @@ pub async fn run_main( let overrides = ConfigOverrides { model, + review_model: None, approval_policy, sandbox_mode, cwd, From a02326735e11b133a221e3e0d049af57dbbfecfc Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Wed, 10 Sep 2025 11:24:17 -0700 Subject: [PATCH 02/12] jif review fixes --- codex-rs/core/{src => }/review_prompt.md | 0 codex-rs/core/src/client_common.rs | 3 + codex-rs/core/src/codex.rs | 124 ++++++++++------------- codex-rs/core/tests/suite/review.rs | 6 -- codex-rs/tui/src/chatwidget.rs | 10 +- 5 files changed, 61 insertions(+), 82 deletions(-) rename codex-rs/core/{src => }/review_prompt.md (100%) diff --git a/codex-rs/core/src/review_prompt.md b/codex-rs/core/review_prompt.md similarity index 100% rename from codex-rs/core/src/review_prompt.md rename to codex-rs/core/review_prompt.md diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 2f849239ab..940eadb8bb 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -19,6 +19,9 @@ use tokio::sync::mpsc; /// with this content. const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md"); +/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize. +pub const REVIEW_PROMPT: &str = include_str!("../review_prompt.md"); + /// API request payload for a single model turn #[derive(Default, Debug, Clone)] pub struct Prompt { diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index bf6ac11359..3b4d3c876b 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::collections::HashMap; use std::collections::HashSet; +use std::mem; use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex; @@ -9,6 +10,7 @@ use std::sync::atomic::AtomicU64; use std::time::Duration; use crate::AuthManager; +use crate::client_common::REVIEW_PROMPT; use crate::event_mapping::map_response_item_to_event_messages; use crate::rollout::recorder::RolloutItem; use async_channel::Receiver; @@ -33,9 +35,6 @@ use tracing::info; use tracing::trace; use tracing::warn; -/// Review thread system prompt. Edit `core/src/review_prompt.md` to customize. -const REVIEW_PROMPT: &str = include_str!("review_prompt.md"); - use crate::ModelProviderInfo; use crate::apply_patch; use crate::apply_patch::ApplyPatchExec; @@ -958,13 +957,11 @@ impl Session { state.pending_approvals.clear(); state.pending_input.clear(); if let Some(task) = state.current_task.take() { - if state.is_review_mode { - state.is_review_mode = false; - let _ = self.tx_event.try_send(Event { - id: task.sub_id.clone(), - msg: EventMsg::ExitedReviewMode(None), - }); - } + let sub_id = task.sub_id.clone(); + let sess = task.sess.clone(); + tokio::spawn(async move { + try_exit_review_mode(sess, sub_id, None).await; + }); task.abort(TurnAbortReason::Interrupted); } } @@ -1445,8 +1442,8 @@ async fn spawn_review_thread( use_streamable_shell_tool: false, include_view_image_tool: false, }); - let base_instructions = Some(REVIEW_PROMPT.to_string()); + let base_instructions = Some(REVIEW_PROMPT.to_string()); let provider = parent_turn_context.client.get_provider(); let auth_manager = parent_turn_context.client.get_auth_manager(); let model_family = review_model_family.clone(); @@ -1481,9 +1478,7 @@ async fn spawn_review_thread( }; // Seed the child task with the review prompt as the initial user message. - let input: Vec = vec![InputItem::Text { - text: prompt.clone(), - }]; + let input: Vec = vec![InputItem::Text { text: prompt }]; let tc = Arc::new(review_turn_context); // Clone sub_id for the upcoming announcement before moving it into the task. @@ -1549,7 +1544,7 @@ async fn run_task( let is_review_mode = turn_context.is_review_mode; let mut review_thread_history: Vec = Vec::new(); if is_review_mode { - review_thread_history.push(initial_input_for_turn.clone().into()); + review_thread_history.push(initial_input_for_turn.into()); } else { sess.record_input_and_rollout_usermsg(&initial_input_for_turn) .await; @@ -1569,13 +1564,6 @@ async fn run_task( .into_iter() .map(ResponseItem::from) .collect::>(); - if is_review_mode { - if !pending_input.is_empty() { - review_thread_history.extend(pending_input.clone()); - } - } else { - sess.record_conversation_items(&pending_input).await; - } // Construct the input that we will send to the model. // @@ -1583,13 +1571,17 @@ async fn run_task( // model sees a fresh conversation (no parent history/user_instructions). // // - For normal turns, use the session's full history. When using the - // chat completions API (or ZDR clients), the model needs the full - // conversation history on each turn. The rollout file, however, should - // only record the new items that originated in this turn so that it - // represents an append-only log without duplicates. + // chat completions API (or ZDR clients), the model needs the full + // conversation history on each turn. The rollout file, however, should + // only record the new items that originated in this turn so that it + // represents an append-only log without duplicates. let turn_input: Vec = if is_review_mode { + if !pending_input.is_empty() { + review_thread_history.extend(pending_input); + } review_thread_history.clone() } else { + sess.record_conversation_items(&pending_input).await; sess.turn_input_with_history(pending_input) }; @@ -1752,51 +1744,19 @@ async fn run_task( // try to parse it as a ReviewOutput. // // If parsing fails, construct a minimal ReviewOutputEvent using the plain - // text as the overall explanation. - if turn_context.is_review_mode - && let Some(text) = last_agent_message.clone() - { - { - let mut st = sess.state.lock_unchecked(); - st.is_review_mode = false; - } - - // Emit ExitedReviewMode(Some) to signal completion with a result. - let review = parse_review_output_event(&text); - trace!("emitting ExitedReviewMode(Some(..))"); - let _ = sess - .tx_event - .send(Event { - id: sub_id.clone(), - msg: EventMsg::ExitedReviewMode(Some(review)), - }) - .await; + // text as the overall explanation. Else, just exit review mode with None. + // + // Also sets Session State's is_review_mode => false. + if turn_context.is_review_mode { + try_exit_review_mode( + sess.clone(), + sub_id.clone(), + last_agent_message.as_deref().map(parse_review_output_event), + ) + .await; } sess.remove_task(&sub_id); - // If this was a review thread, and we haven't already emitted an exit event - // (i.e., still in review mode), send a fallback ExitedReviewMode(None). - if turn_context.is_review_mode { - let should_emit_fallback = { - let mut st = sess.state.lock_unchecked(); - if st.is_review_mode { - st.is_review_mode = false; - true - } else { - false - } - }; - if should_emit_fallback { - trace!("emitting ExitedReviewMode(None)"); - let _ = sess - .tx_event - .send(Event { - id: sub_id.clone(), - msg: EventMsg::ExitedReviewMode(None), - }) - .await; - } - } let event = Event { id: sub_id, msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }), @@ -1816,11 +1776,10 @@ fn parse_review_output_event(text: &str) -> ReviewOutputEvent { // If wrapped in markdown fences or extra prose, attempt to extract the first JSON object if let (Some(start), Some(end)) = (text.find('{'), text.rfind('}')) && start < end + && let Some(slice) = text.get(start..=end) + && let Ok(ev) = serde_json::from_str::(slice) { - let slice = &text[start..=end]; - if let Ok(ev) = serde_json::from_str::(slice) { - return ev; - } + return ev; } // Not JSON – return a structured ReviewOutputEvent that carries // the plain text as the overall explanation. @@ -3184,6 +3143,27 @@ fn convert_call_tool_result_to_function_call_output_payload( } } +/// Sets session state's is_review_mode = false, and +/// emits an ExitedReviewMode Event with optional ReviewOutput. +async fn try_exit_review_mode( + session: Arc, + task_sub_id: String, + res: Option, +) { + let is_review_mode = { + let mut st = session.state.lock_unchecked(); + mem::take(&mut st.is_review_mode) + }; + + if is_review_mode { + let event = Event { + id: task_sub_id, + msg: EventMsg::ExitedReviewMode(res), + }; + session.send_event(event).await; + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index d1825b6bde..8359784398 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -508,12 +508,6 @@ async fn new_conversation_for_server( where F: FnOnce(&mut Config), { - // Disable client retries for deterministic tests. - unsafe { - std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); - std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); - } - let model_provider = ModelProviderInfo { base_url: Some(format!("{}/v1", server.uri())), ..built_in_model_providers()["openai"].clone() diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index e667d60117..da947ddb91 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -382,9 +382,9 @@ impl ChatWidget { fn on_web_search_end(&mut self, ev: WebSearchEndEvent) { self.flush_answer_stream_with_separator(); - let WebSearchEndEvent { query, .. } = ev; self.add_to_history(history_cell::new_web_search_call(format!( - "Searched: {query}" + "Searched: {}", + ev.query ))); } @@ -810,8 +810,10 @@ impl ChatWidget { fn dispatch_command(&mut self, cmd: SlashCommand) { if !cmd.available_during_task() && self.bottom_pane.is_task_running() { - let cmd_name = cmd.command(); - let message = format!("'/{cmd_name}' is disabled while a task is in progress."); + let message = format!( + "'/{}' is disabled while a task is in progress.", + cmd.command() + ); self.add_to_history(history_cell::new_error_event(message)); self.request_redraw(); return; From a9c727b9b6b7972c0bde495eae30503d868e1bff Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Thu, 11 Sep 2025 19:43:49 -0700 Subject: [PATCH 03/12] rm is_review_mode from State --- codex-rs/core/src/codex.rs | 86 ++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3b4d3c876b..b25eafcdd7 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use std::collections::HashMap; use std::collections::HashSet; -use std::mem; use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex; @@ -268,7 +267,6 @@ struct State { pending_input: Vec, history: ConversationHistory, token_info: Option, - is_review_mode: bool, } /// Context for an initialized model agent @@ -957,11 +955,6 @@ impl Session { state.pending_approvals.clear(); state.pending_input.clear(); if let Some(task) = state.current_task.take() { - let sub_id = task.sub_id.clone(); - let sess = task.sess.clone(); - tokio::spawn(async move { - try_exit_review_mode(sess, sub_id, None).await; - }); task.abort(TurnAbortReason::Interrupted); } } @@ -1017,11 +1010,19 @@ pub(crate) struct ApplyPatchCommandContext { pub(crate) changes: HashMap, } +#[derive(Clone, Debug, Eq, PartialEq)] +enum AgentTaskKind { + Regular, + Review, + Compact, +} + /// A series of Turns in response to user input. pub(crate) struct AgentTask { sess: Arc, sub_id: String, handle: AbortHandle, + kind: AgentTaskKind, } impl AgentTask { @@ -1042,6 +1043,28 @@ impl AgentTask { sess, sub_id, handle, + kind: AgentTaskKind::Regular, + } + } + + fn review( + sess: Arc, + turn_context: Arc, + sub_id: String, + input: Vec, + ) -> Self { + let handle = { + let sess = sess.clone(); + let sub_id = sub_id.clone(); + let tc = Arc::clone(&turn_context); + tokio::spawn(async move { run_task(sess, tc.as_ref(), sub_id, input).await }) + .abort_handle() + }; + Self { + sess, + sub_id, + handle, + kind: AgentTaskKind::Review, } } @@ -1065,12 +1088,20 @@ impl AgentTask { sess, sub_id, handle, + kind: AgentTaskKind::Compact, } } fn abort(self, reason: TurnAbortReason) { // TOCTOU? if !self.handle.is_finished() { + if self.kind == AgentTaskKind::Review { + let sess = self.sess.clone(); + let sub_id = self.sub_id.clone(); + tokio::spawn(async move { + try_exit_review_mode(sess, sub_id, None).await; + }); + } self.handle.abort(); let event = Event { id: self.sub_id, @@ -1483,24 +1514,15 @@ async fn spawn_review_thread( // Clone sub_id for the upcoming announcement before moving it into the task. let sub_id_for_event = sub_id.clone(); - let task = AgentTask::spawn(sess.clone(), tc.clone(), sub_id, input); + let task = AgentTask::review(sess.clone(), tc.clone(), sub_id, input); sess.set_task(task); - { - // Mark session as being in review mode before notifying UIs. - let mut st = sess.state.lock_unchecked(); - st.is_review_mode = true; - } - // Announce entering review mode so UIs can switch modes. - trace!("emitting EnteredReviewMode"); - let _ = sess - .tx_event - .send(Event { - id: sub_id_for_event, - msg: EventMsg::EnteredReviewMode, - }) - .await; + sess.send_event(Event { + id: sub_id_for_event, + msg: EventMsg::EnteredReviewMode, + }) + .await; } /// Takes a user message as input and runs a loop where, at each turn, the model @@ -1746,7 +1768,7 @@ async fn run_task( // If parsing fails, construct a minimal ReviewOutputEvent using the plain // text as the overall explanation. Else, just exit review mode with None. // - // Also sets Session State's is_review_mode => false. + // Emits an ExitedReviewMode event with the parsed review output. if turn_context.is_review_mode { try_exit_review_mode( sess.clone(), @@ -3143,25 +3165,17 @@ fn convert_call_tool_result_to_function_call_output_payload( } } -/// Sets session state's is_review_mode = false, and -/// emits an ExitedReviewMode Event with optional ReviewOutput. +/// Emits an ExitedReviewMode Event with optional ReviewOutput. async fn try_exit_review_mode( session: Arc, task_sub_id: String, res: Option, ) { - let is_review_mode = { - let mut st = session.state.lock_unchecked(); - mem::take(&mut st.is_review_mode) + let event = Event { + id: task_sub_id, + msg: EventMsg::ExitedReviewMode(res), }; - - if is_review_mode { - let event = Event { - id: task_sub_id, - msg: EventMsg::ExitedReviewMode(res), - }; - session.send_event(event).await; - } + session.send_event(event).await; } #[cfg(test)] From 8b7093e125b7c7d43f7b69e0bfa4ef9df4067ab7 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Thu, 11 Sep 2025 20:10:30 -0700 Subject: [PATCH 04/12] Add review prompt --- codex-rs/core/review_prompt.md | 86 +++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/codex-rs/core/review_prompt.md b/codex-rs/core/review_prompt.md index d60e8c28a1..01d93598a7 100644 --- a/codex-rs/core/review_prompt.md +++ b/codex-rs/core/review_prompt.md @@ -1,3 +1,87 @@ # Review guidelines: -(Will fill out later, pending approval.) +You are acting as a reviewer for a proposed code change made by another engineer. + +Below are some default guidelines for determining whether the original author would appreciate the issue being flagged. + +These are not the final word in determining whether an issue is a bug. In many cases, you will encounter other, more specific guidelines. These may be present elsewhere in a developer message, a user message, a file, or even elsewhere in this system message. +Those guidelines should be considered to override these general instructions. + +Here are the general guidelines for determining whether something is a bug and should be flagged. + +1. It meaningfully impacts the accuracy, performance, security, or maintainability of the code. +2. The bug is discrete and actionable (i.e. not a general issue with the codebase or a combination of multiple issues). +3. Fixing the bug does not demand a level of rigor that is not present in the rest of the codebase (e.g. one doesn't need very detailed comments and input validation in a repository of one-off scripts in personal projects) +4. The bug was introduced in the commit (pre-existing bugs should not be flagged). +5. The author of the original PR would likely fix the issue if they were made aware of it. +6. The bug does not rely on unstated assumptions about the codebase or author's intent. +7. It is not enough to speculate that a change may disrupt another part of the codebase, to be considered a bug, one must identify the other parts of the code that are provably affected. +8. The bug is clearly not just an intentional change by the original author. + +When flagging a bug, you will also provide an accompanying comment. Once again, these guidelines are not the final word on how to construct a comment -- defer to any subsequent guidelines that you encounter. + +1. The comment should be clear about why the issue is a bug. +2. The comment should appropriately communicate the severity of the issue. It should not claim that an issue is more severe than it actually is. +3. The comment should be brief. The body should be at most 1 paragraph. It should not introduce line breaks within the natural language flow unless it is necessary for the code fragment. +4. The comment should not include any chunks of code longer than 3 lines. Any code chunks should be wrapped in markdown inline code tags or a code block. +5. The comment should clearly and explicitly communicate the scenarios, environments, or inputs that are necessary for the bug to arise. The comment should immediately indicate that the issue's severity depends on these factors. +6. The comment's tone should be matter-of-fact and not accusatory or overly positive. It should read as a helpful AI assistant suggestion without sounding too much like a human reviewer. +7. The comment should be written such that the original author can immediately grasp the idea without close reading. +8. The comment should avoid excessive flattery and comments that are not helpful to the original author. The comment should avoid phrasing like "Great job ...", "Thanks for ...". + +Below are some more detailed guidelines that you should apply to this specific review. + +HOW MANY FINDINGS TO RETURN: + +Output all findings that the original author would fix if they knew about it. If there is no finding that a person would definitely love to see and fix, prefer outputting no findings. Do not stop at the first qualifying finding. Continue until you've listed every qualifying finding. + +GUIDELINES: + +- Ignore trivial style unless it obscures meaning or violates documented standards. +- Use one comment per distinct issue (or a multi-line range if necessary). +- Use ```suggestion blocks ONLY for concrete replacement code (minimal lines; no commentary inside the block). +- In every ```suggestion block, preserve the exact leading whitespace of the replaced lines (spaces vs tabs, number of spaces). +- Do NOT introduce or remove outer indentation levels unless that is the actual fix. + +The comments will be presented in the code review as inline comments. You should avoid providing unnecessary location details in the comment body. Always keep the line range as short as possible for interpreting the issue. Avoid ranges longer than 5–10 lines; instead, choose the most suitable subrange that pinpoints the problem. + +At the beginning of the finding title, tag the bug with priority level. For example "[P1] Un-padding slices along wrong tensor dimensions". [P0] – Drop everything to fix. Blocking release, operations, or major usage. Only use for universal issues that do not depend on any assumptions about the inputs. · [P1] – Urgent. Should be addressed in the next cycle · [P2] – Normal. To be fixed eventually · [P3] – Low. Nice to have. + +Additionally, include a numeric priority field in the JSON output for each finding: set "priority" to 0 for P0, 1 for P1, 2 for P2, or 3 for P3. If a priority cannot be determined, omit the field or use null. + +At the end of your findings, output an "overall correctness" verdict of whether or not the patch should be considered "correct". +Correct implies that existing code and tests will not break, and the patch is free of bugs and other blocking issues. +Ignore non-blocking issues such as style, formatting, typos, documentation, and other nits. + +FORMATTING GUIDELINES: +The finding description should be one paragraph. + +OUTPUT FORMAT: + +## Output schema — MUST MATCH *exactly* + +```json +{ + "findings": [ + { + "title": "<≤ 80 chars, imperative>", + "body": "", + "confidence_score": , + "priority": , + "code_location": { + "absolute_file_path": "", + "line_range": {"start": , "end": } + } + } + ], + "overall_correctness": "patch is correct" | "patch is incorrect", + "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>", + "overall_confidence_score": +} +``` + +* **Do not** wrap the JSON in markdown fences or extra prose. +* The code_location field is required and must include absolute_file_path and line_range. +*Line ranges must be as short as possible for interpreting the issue (avoid ranges over 5–10 lines; pick the most suitable subrange). +* The code_location should overlap with the diff. +* Do not generate a PR fix. \ No newline at end of file From e01ea039ef34c204e95bb663e90ca3eb2c6b0dc7 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Thu, 11 Sep 2025 20:14:28 -0700 Subject: [PATCH 05/12] rnm --- codex-rs/core/src/codex.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index b25eafcdd7..f2412a5044 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1099,7 +1099,7 @@ impl AgentTask { let sess = self.sess.clone(); let sub_id = self.sub_id.clone(); tokio::spawn(async move { - try_exit_review_mode(sess, sub_id, None).await; + exit_review_mode(sess, sub_id, None).await; }); } self.handle.abort(); @@ -1770,7 +1770,7 @@ async fn run_task( // // Emits an ExitedReviewMode event with the parsed review output. if turn_context.is_review_mode { - try_exit_review_mode( + exit_review_mode( sess.clone(), sub_id.clone(), last_agent_message.as_deref().map(parse_review_output_event), @@ -3166,7 +3166,7 @@ fn convert_call_tool_result_to_function_call_output_payload( } /// Emits an ExitedReviewMode Event with optional ReviewOutput. -async fn try_exit_review_mode( +async fn exit_review_mode( session: Arc, task_sub_id: String, res: Option, From 2ec526eed1ba7d9d5e60a5c100f7961e17290660 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Thu, 11 Sep 2025 20:38:51 -0700 Subject: [PATCH 06/12] Fix compile --- codex-rs/core/src/codex.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3685e5af5c..67451e58c9 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1511,6 +1511,7 @@ async fn spawn_review_thread( include_web_search_request: false, use_streamable_shell_tool: false, include_view_image_tool: false, + experimental_unified_exec_tool: config.use_experimental_unified_exec_tool, }); let base_instructions = Some(REVIEW_PROMPT.to_string()); From daa0b6896a4d22683cea47db2814a7a9f166fd16 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 12:37:04 -0700 Subject: [PATCH 07/12] Add default --- codex-rs/core/tests/suite/review.rs | 4 +--- codex-rs/protocol/src/protocol.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index 8359784398..5be4fd8c2a 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -150,10 +150,8 @@ async fn review_op_with_plain_text_emits_review_fallback() { // Expect a structured fallback carrying the plain text. let expected = ReviewOutputEvent { - findings: vec![], - overall_correctness: String::new(), overall_explanation: "just plain text".to_string(), - overall_confidence_score: 1.0, + ..Default::default() }; assert_eq!(expected, review); let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 0f9195ed78..ff19df6f45 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -956,6 +956,17 @@ pub struct ReviewOutputEvent { pub overall_confidence_score: f32, } +impl Default for ReviewOutputEvent { + fn default() -> Self { + Self { + findings: Vec::new(), + overall_correctness: String::default(), + overall_explanation: String::default(), + overall_confidence_score: 0.0, + } + } +} + /// A single review finding describing an observed issue or recommendation. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] pub struct ReviewFinding { From acf35d4654717a4a25a4bb5287416681309f7d10 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 12:38:30 -0700 Subject: [PATCH 08/12] fmt --- codex-rs/core/src/codex.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 77d48c1963..609307196d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -10,9 +10,6 @@ use std::time::Duration; use crate::AuthManager; use crate::client_common::REVIEW_PROMPT; -use crate::config_edit::CONFIG_KEY_EFFORT; -use crate::config_edit::CONFIG_KEY_MODEL; -use crate::config_edit::persist_non_null_overrides; use crate::event_mapping::map_response_item_to_event_messages; use async_channel::Receiver; use async_channel::Sender; From 69ffd2a49beecec247c64ce8339caffc8ce806e9 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 13:31:39 -0700 Subject: [PATCH 09/12] change to .clone() --- codex-rs/core/src/codex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 609307196d..afe9043881 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1460,7 +1460,7 @@ async fn submission_loop( spawn_review_thread( sess.clone(), config.clone(), - Arc::clone(&turn_context), + turn_context.clone(), sub.id, prompt, ) From ec0f859dfd1ac87ec4f64fb72dd9b8679c46936a Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 13:32:49 -0700 Subject: [PATCH 10/12] ..default --- codex-rs/core/src/codex.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index afe9043881..68f1511d9a 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1829,10 +1829,8 @@ fn parse_review_output_event(text: &str) -> ReviewOutputEvent { // Not JSON – return a structured ReviewOutputEvent that carries // the plain text as the overall explanation. ReviewOutputEvent { - findings: Vec::new(), - overall_correctness: String::new(), overall_explanation: text.to_string(), - overall_confidence_score: 1.0, + ..Default::default() } } From 2b31e9c2ac282620b80b621de60f16c617a6c5e3 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 16:10:12 -0700 Subject: [PATCH 11/12] Add review_request arg to Op::Review and to EnteredReviewMode --- codex-rs/core/src/codex.rs | 13 +++--- codex-rs/core/src/rollout/policy.rs | 2 +- codex-rs/core/tests/suite/review.rs | 43 +++++++++++++------ .../src/event_processor_with_human_output.rs | 2 +- codex-rs/mcp-server/src/codex_tool_runner.rs | 2 +- codex-rs/protocol/src/protocol.rs | 11 ++++- 6 files changed, 51 insertions(+), 22 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index edeed508e1..a8b58f4510 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -18,6 +18,7 @@ use codex_apply_patch::MaybeApplyPatchVerified; use codex_apply_patch::maybe_parse_apply_patch_verified; use codex_protocol::mcp_protocol::ConversationId; use codex_protocol::protocol::ConversationPathResponseEvent; +use codex_protocol::protocol::ReviewRequest; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::TaskStartedEvent; use codex_protocol::protocol::TurnAbortReason; @@ -1474,13 +1475,13 @@ async fn submission_loop( }; sess.send_event(event).await; } - Op::Review { prompt } => { + Op::Review { review_request } => { spawn_review_thread( sess.clone(), config.clone(), turn_context.clone(), sub.id, - prompt, + review_request, ) .await; } @@ -1498,7 +1499,7 @@ async fn spawn_review_thread( config: Arc, parent_turn_context: Arc, sub_id: String, - prompt: String, + review_request: ReviewRequest, ) { let model = config.review_model.clone(); let review_model_family = find_family_for_model(&model) @@ -1550,7 +1551,9 @@ async fn spawn_review_thread( }; // Seed the child task with the review prompt as the initial user message. - let input: Vec = vec![InputItem::Text { text: prompt }]; + let input: Vec = vec![InputItem::Text { + text: review_request.prompt.clone(), + }]; let tc = Arc::new(review_turn_context); // Clone sub_id for the upcoming announcement before moving it into the task. @@ -1561,7 +1564,7 @@ async fn spawn_review_thread( // Announce entering review mode so UIs can switch modes. sess.send_event(Event { id: sub_id_for_event, - msg: EventMsg::EnteredReviewMode, + msg: EventMsg::EnteredReviewMode(review_request), }) .await; } diff --git a/codex-rs/core/src/rollout/policy.rs b/codex-rs/core/src/rollout/policy.rs index 284bda3a71..f46fa7cab5 100644 --- a/codex-rs/core/src/rollout/policy.rs +++ b/codex-rs/core/src/rollout/policy.rs @@ -39,7 +39,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool { | EventMsg::AgentReasoning(_) | EventMsg::AgentReasoningRawContent(_) | EventMsg::TokenCount(_) - | EventMsg::EnteredReviewMode + | EventMsg::EnteredReviewMode(_) | EventMsg::ExitedReviewMode(_) => true, EventMsg::Error(_) | EventMsg::TaskStarted(_) diff --git a/codex-rs/core/tests/suite/review.rs b/codex-rs/core/tests/suite/review.rs index 5be4fd8c2a..21d447e25d 100644 --- a/codex-rs/core/tests/suite/review.rs +++ b/codex-rs/core/tests/suite/review.rs @@ -11,6 +11,7 @@ use codex_core::protocol::ReviewCodeLocation; use codex_core::protocol::ReviewFinding; use codex_core::protocol::ReviewLineRange; use codex_core::protocol::ReviewOutputEvent; +use codex_core::protocol::ReviewRequest; use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use core_test_support::load_default_config_for_test; use core_test_support::load_sse_fixture_with_id_from_str; @@ -76,13 +77,16 @@ async fn review_op_emits_lifecycle_and_review_output() { // Submit review request. codex .submit(Op::Review { - prompt: "Please review my changes".to_string(), + review_request: ReviewRequest { + prompt: "Please review my changes".to_string(), + user_facing_hint: "my changes".to_string(), + }, }) .await .unwrap(); // Verify lifecycle: Entered -> Exited(Some(review)) -> TaskComplete. - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await; let review = match closed { EventMsg::ExitedReviewMode(Some(r)) => r, @@ -136,12 +140,15 @@ async fn review_op_with_plain_text_emits_review_fallback() { codex .submit(Op::Review { - prompt: "Plain text review".to_string(), + review_request: ReviewRequest { + prompt: "Plain text review".to_string(), + user_facing_hint: "plain text review".to_string(), + }, }) .await .unwrap(); - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; let closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(_))).await; let review = match closed { EventMsg::ExitedReviewMode(Some(r)) => r, @@ -203,7 +210,10 @@ async fn review_does_not_emit_agent_message_on_structured_output() { codex .submit(Op::Review { - prompt: "check structured".to_string(), + review_request: ReviewRequest { + prompt: "check structured".to_string(), + user_facing_hint: "check structured".to_string(), + }, }) .await .unwrap(); @@ -223,7 +233,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() { EventMsg::AgentMessage(_) => { panic!("unexpected AgentMessage during review with structured output") } - EventMsg::EnteredReviewMode => saw_entered = true, + EventMsg::EnteredReviewMode(_) => saw_entered = true, EventMsg::ExitedReviewMode(_) => saw_exited = true, _ => {} } @@ -259,13 +269,16 @@ async fn review_uses_custom_review_model_from_config() { codex .submit(Op::Review { - prompt: "use custom model".to_string(), + review_request: ReviewRequest { + prompt: "use custom model".to_string(), + user_facing_hint: "use custom model".to_string(), + }, }) .await .unwrap(); // Wait for completion - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -372,12 +385,15 @@ async fn review_input_isolated_from_parent_history() { let review_prompt = "Please review only this".to_string(); codex .submit(Op::Review { - prompt: review_prompt.clone(), + review_request: ReviewRequest { + prompt: review_prompt.clone(), + user_facing_hint: review_prompt.clone(), + }, }) .await .unwrap(); - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; let _closed = wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExitedReviewMode(None))).await; let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; @@ -423,11 +439,14 @@ async fn review_history_does_not_leak_into_parent_session() { // 1) Run a review turn that produces an assistant message (isolated in child). codex .submit(Op::Review { - prompt: "Start a review".to_string(), + review_request: ReviewRequest { + prompt: "Start a review".to_string(), + user_facing_hint: "Start a review".to_string(), + }, }) .await .unwrap(); - let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode)).await; + let _entered = wait_for_event(&codex, |ev| matches!(ev, EventMsg::EnteredReviewMode(_))).await; let _closed = wait_for_event(&codex, |ev| { matches!(ev, EventMsg::ExitedReviewMode(Some(_))) }) diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 7c7beea918..c126208fad 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -562,7 +562,7 @@ impl EventProcessor for EventProcessorWithHumanOutput { EventMsg::ShutdownComplete => return CodexStatus::Shutdown, EventMsg::ConversationPath(_) => {} EventMsg::UserMessage(_) => {} - EventMsg::EnteredReviewMode => {} + EventMsg::EnteredReviewMode(_) => {} EventMsg::ExitedReviewMode(_) => {} } CodexStatus::Running diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index f8beedf3b9..db48da28e2 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -280,7 +280,7 @@ async fn run_codex_tool_session_inner( | EventMsg::ConversationPath(_) | EventMsg::UserMessage(_) | EventMsg::ShutdownComplete - | EventMsg::EnteredReviewMode + | EventMsg::EnteredReviewMode(_) | EventMsg::ExitedReviewMode(_) => { // For now, we do not do anything extra for these // events. Note that diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 6e1487bbb5..6f102cdeb1 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -168,7 +168,7 @@ pub enum Op { Compact, /// Request a code review from the agent. - Review { prompt: String }, + Review { review_request: ReviewRequest }, /// Request to shut down codex instance. Shutdown, @@ -510,7 +510,7 @@ pub enum EventMsg { ConversationPath(ConversationPathResponseEvent), /// Entered review mode. - EnteredReviewMode, + EnteredReviewMode(ReviewRequest), /// Exited review mode with an optional final result to apply. ExitedReviewMode(Option), @@ -972,6 +972,13 @@ pub struct GitInfo { pub repository_url: Option, } +/// Review request sent to the review session. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] +pub struct ReviewRequest { + pub prompt: String, + pub user_facing_hint: String, +} + /// Structured review result produced by a child review session. #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)] pub struct ReviewOutputEvent { From 28343548816838e2a2e9c45f608cf6e4d36d436b Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Fri, 12 Sep 2025 16:13:38 -0700 Subject: [PATCH 12/12] Fix --- codex-rs/core/src/codex.rs | 3 +-- codex-rs/tui/src/chatwidget.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index a8b58f4510..9aa8486a0d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1083,8 +1083,7 @@ impl AgentTask { let sess = sess.clone(); let sub_id = sub_id.clone(); let tc = Arc::clone(&turn_context); - tokio::spawn(async move { run_task(sess, tc.as_ref(), sub_id, input).await }) - .abort_handle() + tokio::spawn(async move { run_task(sess, tc, sub_id, input).await }).abort_handle() }; Self { sess, diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 8a83c289e2..aabf3defeb 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1089,7 +1089,7 @@ impl ChatWidget { self.app_event_tx .send(crate::app_event::AppEvent::ConversationHistory(ev)); } - EventMsg::EnteredReviewMode => {} + EventMsg::EnteredReviewMode(_) => {} EventMsg::ExitedReviewMode(_) => {} } }