Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ impl Session {
aggregated_output,
duration,
exit_code,
timed_out: _,
} = output;
// Send full stdout/stderr to clients; do not truncate.
let stdout = stdout.text.clone();
Expand Down Expand Up @@ -890,13 +891,15 @@ impl Session {
let output_stderr;
let borrowed: &ExecToolCallOutput = match &result {
Ok(output) => output,
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
Err(e) => {
output_stderr = ExecToolCallOutput {
exit_code: -1,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(get_error_message_ui(e)),
aggregated_output: StreamOutput::new(get_error_message_ui(e)),
duration: Duration::default(),
timed_out: false,
};
&output_stderr
}
Expand Down Expand Up @@ -2887,15 +2890,12 @@ async fn handle_sandbox_error(
let sub_id = exec_command_context.sub_id.clone();
let cwd = exec_command_context.cwd.clone();

// if the command timed out, we can simply return this failure to the model
if matches!(error, SandboxErr::Timeout) {
if let SandboxErr::Timeout { output } = &error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the case where the command times out, but is not run in a sandbox (i.e., --dangerously-bypass-approvals-and-sandbox) also covered by this case even though it isn't run in a "sandbox"?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it all goes through the run_exec_with_events who's result we're processing here.

let content = format_exec_output(output);
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!(
"command timed out after {} milliseconds",
params.timeout_duration().as_millis()
),
content,
success: Some(false),
},
};
Expand Down Expand Up @@ -3020,7 +3020,17 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.

let s = aggregated_output.text.as_str();
let mut s = &aggregated_output.text;
let prefixed_str: String;

if exec_output.timed_out {
prefixed_str = format!(
"command timed out after {} milliseconds\n",
exec_output.duration.as_millis()
) + s;
s = &prefixed_str;
}

let total_lines = s.lines().count();
if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return s.to_string();
Expand Down Expand Up @@ -3063,6 +3073,7 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
// Build final string respecting byte budgets
let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));

result.push_str(head_part);
result.push_str(&marker);

Expand Down Expand Up @@ -3271,6 +3282,7 @@ mod tests {
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full),
duration: StdDuration::from_secs(1),
timed_out: false,
};

let out = format_exec_output_str(&exec);
Expand Down Expand Up @@ -3313,6 +3325,7 @@ mod tests {
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full.clone()),
duration: StdDuration::from_secs(1),
timed_out: false,
};

let out = format_exec_output_str(&exec);
Expand All @@ -3335,6 +3348,25 @@ mod tests {
);
}

#[test]
fn includes_timed_out_message() {
let exec = ExecToolCallOutput {
exit_code: 0,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new("Command output".to_string()),
duration: StdDuration::from_secs(1),
timed_out: true,
};

let out = format_exec_output_str(&exec);

assert_eq!(
out,
"command timed out after 1000 milliseconds\nCommand output"
);
}

#[test]
fn falls_back_to_content_when_structured_is_null() {
let ctr = CallToolResult {
Expand Down
17 changes: 12 additions & 5 deletions codex-rs/core/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::exec::ExecToolCallOutput;
use crate::token_data::KnownPlan;
use crate::token_data::PlanType;
use codex_protocol::mcp_protocol::ConversationId;
Expand All @@ -13,8 +14,11 @@ pub type Result<T> = std::result::Result<T, CodexErr>;
#[derive(Error, Debug)]
pub enum SandboxErr {
/// Error from sandbox execution
#[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")]
Denied(i32, String, String),
#[error(
"sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}",
.output.exit_code, .output.stdout.text, .output.stderr.text
)]
Denied { output: Box<ExecToolCallOutput> },

/// Error from linux seccomp filter setup
#[cfg(target_os = "linux")]
Expand All @@ -28,7 +32,7 @@ pub enum SandboxErr {

/// Command timed out
#[error("command timed out")]
Timeout,
Timeout { output: Box<ExecToolCallOutput> },
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the error() include fields from output like Denied?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have an opinion. Don't even know when it's used.


/// Command was killed by a signal
#[error("command was killed by a signal")]
Expand Down Expand Up @@ -245,9 +249,12 @@ impl CodexErr {

pub fn get_error_message_ui(e: &CodexErr) -> String {
match e {
CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
// Timeouts are not sandbox errors from a UX perspective; present them plainly
CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(),
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
"error: command timed out after {} ms",
output.duration.as_millis()
),
_ => e.to_string(),
}
}
Expand Down
75 changes: 48 additions & 27 deletions codex-rs/core/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
const SIGKILL_CODE: i32 = 9;
const TIMEOUT_CODE: i32 = 64;
const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal
const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code

// I/O buffer sizing
const READ_CHUNK_SIZE: usize = 8192; // bytes per read
Expand Down Expand Up @@ -86,11 +87,12 @@ pub async fn process_exec_tool_call(
) -> Result<ExecToolCallOutput> {
let start = Instant::now();

let timeout_duration = params.timeout_duration();

let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
{
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
SandboxType::MacosSeatbelt => {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
} = params;
Expand All @@ -102,10 +104,9 @@ pub async fn process_exec_tool_call(
env,
)
.await?;
consume_truncated_output(child, timeout, stdout_stream.clone()).await
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
}
SandboxType::LinuxSeccomp => {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
} = params;
Expand All @@ -123,41 +124,56 @@ pub async fn process_exec_tool_call(
)
.await?;

consume_truncated_output(child, timeout, stdout_stream).await
consume_truncated_output(child, timeout_duration, stdout_stream).await
}
};
let duration = start.elapsed();
match raw_output_result {
Ok(raw_output) => {
let stdout = raw_output.stdout.from_utf8_lossy();
let stderr = raw_output.stderr.from_utf8_lossy();
#[allow(unused_mut)]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this because of the #[cfg(target_family = "unix")] below? If so, alternatively:

  #[cfg(unix)]
  let mut timed_out = raw_output.timed_out;
  #[cfg(not(unix))]
  let timed_out = raw_output.timed_out;

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is because of the cfg, I think #[allow(unused_mut)] is a bit cleaner.

let mut timed_out = raw_output.timed_out;

#[cfg(target_family = "unix")]
match raw_output.exit_status.signal() {
Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
Some(signal) => {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
{
if let Some(signal) = raw_output.exit_status.signal() {
if signal == TIMEOUT_CODE {
timed_out = true;
} else {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
}
}
None => {}
}

let exit_code = raw_output.exit_status.code().unwrap_or(-1);

if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
return Err(CodexErr::Sandbox(SandboxErr::Denied(
exit_code,
stdout.text,
stderr.text,
)));
let mut exit_code = raw_output.exit_status.code().unwrap_or(-1);
if timed_out {
exit_code = EXEC_TIMEOUT_EXIT_CODE;
}

Ok(ExecToolCallOutput {
let stdout = raw_output.stdout.from_utf8_lossy();
let stderr = raw_output.stderr.from_utf8_lossy();
let aggregated_output = raw_output.aggregated_output.from_utf8_lossy();
let exec_output = ExecToolCallOutput {
exit_code,
stdout,
stderr,
aggregated_output: raw_output.aggregated_output.from_utf8_lossy(),
aggregated_output,
duration,
})
timed_out,
};

if timed_out {
return Err(CodexErr::Sandbox(SandboxErr::Timeout {
output: Box::new(exec_output),
}));
}

if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
return Err(CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(exec_output),
}));
}

Ok(exec_output)
}
Err(err) => {
tracing::error!("exec error: {err}");
Expand Down Expand Up @@ -197,6 +213,7 @@ struct RawExecToolCallOutput {
pub stdout: StreamOutput<Vec<u8>>,
pub stderr: StreamOutput<Vec<u8>>,
pub aggregated_output: StreamOutput<Vec<u8>>,
pub timed_out: bool,
}

impl StreamOutput<String> {
Expand Down Expand Up @@ -229,6 +246,7 @@ pub struct ExecToolCallOutput {
pub stderr: StreamOutput<String>,
pub aggregated_output: StreamOutput<String>,
pub duration: Duration,
pub timed_out: bool,
}

async fn exec(
Expand Down Expand Up @@ -298,22 +316,24 @@ async fn consume_truncated_output(
Some(agg_tx.clone()),
));

let exit_status = tokio::select! {
let (exit_status, timed_out) = tokio::select! {
result = tokio::time::timeout(timeout, child.wait()) => {
match result {
Ok(Ok(exit_status)) => exit_status,
Ok(e) => e?,
Ok(status_result) => {
let exit_status = status_result?;
(exit_status, false)
}
Err(_) => {
// timeout
child.start_kill()?;
// Debatable whether `child.wait().await` should be called here.
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE)
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
}
}
}
_ = tokio::signal::ctrl_c() => {
child.start_kill()?;
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE)
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false)
}
};

Expand All @@ -336,6 +356,7 @@ async fn consume_truncated_output(
stdout,
stderr,
aggregated_output,
timed_out,
})
}

Expand Down
36 changes: 36 additions & 0 deletions codex-rs/core/tests/suite/exec_stream_events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;

use async_channel::Receiver;
use codex_core::error::CodexErr;
use codex_core::error::SandboxErr;
use codex_core::exec::ExecParams;
use codex_core::exec::SandboxType;
use codex_core::exec::StdoutStream;
Expand Down Expand Up @@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() {
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
assert_eq!(result.aggregated_output.truncated_after_lines, None);
}

#[tokio::test]
async fn test_exec_timeout_returns_partial_output() {
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
];

let params = ExecParams {
command: cmd,
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
timeout_ms: Some(200),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};

let policy = SandboxPolicy::new_read_only_policy();

let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await;

let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
panic!("expected timeout error");
};

assert_eq!(output.exit_code, 124);
assert_eq!(output.stdout.text, "before\n");
assert!(output.stderr.text.is_empty());
assert_eq!(output.aggregated_output.text, "before\n");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is command timed out after {} milliseconds\n in the putput?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not in the output. It gets formatted in right before the result is sent back to model in codex.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe integration test would be helpful? to see what got sent to the model. just to make sure that the model knows it timed out.

assert!(output.duration >= Duration::from_millis(200));
assert!(output.timed_out);
}
Loading
Loading