-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Include command output when sending timeout to model #3576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c012b6e
a1c8fa2
b0a5fb8
29ff462
ad76f12
2d208b1
aa30ed4
6061af3
a19d7ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
use crate::exec::ExecToolCallOutput; | ||
use crate::token_data::KnownPlan; | ||
use crate::token_data::PlanType; | ||
use codex_protocol::mcp_protocol::ConversationId; | ||
|
@@ -13,8 +14,11 @@ pub type Result<T> = std::result::Result<T, CodexErr>; | |
#[derive(Error, Debug)] | ||
pub enum SandboxErr { | ||
/// Error from sandbox execution | ||
#[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")] | ||
Denied(i32, String, String), | ||
#[error( | ||
"sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}", | ||
.output.exit_code, .output.stdout.text, .output.stderr.text | ||
)] | ||
Denied { output: Box<ExecToolCallOutput> }, | ||
pakrym-oai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/// Error from linux seccomp filter setup | ||
#[cfg(target_os = "linux")] | ||
|
@@ -28,7 +32,7 @@ pub enum SandboxErr { | |
|
||
/// Command timed out | ||
#[error("command timed out")] | ||
Timeout, | ||
Timeout { output: Box<ExecToolCallOutput> }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have an opinion. Don't even know when it's used. |
||
|
||
/// Command was killed by a signal | ||
#[error("command was killed by a signal")] | ||
|
@@ -245,9 +249,12 @@ impl CodexErr { | |
|
||
pub fn get_error_message_ui(e: &CodexErr) -> String { | ||
match e { | ||
CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(), | ||
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(), | ||
// Timeouts are not sandbox errors from a UX perspective; present them plainly | ||
CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(), | ||
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!( | ||
"error: command timed out after {} ms", | ||
output.duration.as_millis() | ||
), | ||
_ => e.to_string(), | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000; | |
const SIGKILL_CODE: i32 = 9; | ||
const TIMEOUT_CODE: i32 = 64; | ||
const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal | ||
const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code | ||
|
||
// I/O buffer sizing | ||
const READ_CHUNK_SIZE: usize = 8192; // bytes per read | ||
|
@@ -86,11 +87,12 @@ pub async fn process_exec_tool_call( | |
) -> Result<ExecToolCallOutput> { | ||
let start = Instant::now(); | ||
|
||
let timeout_duration = params.timeout_duration(); | ||
|
||
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type | ||
{ | ||
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await, | ||
SandboxType::MacosSeatbelt => { | ||
let timeout = params.timeout_duration(); | ||
let ExecParams { | ||
command, cwd, env, .. | ||
} = params; | ||
|
@@ -102,10 +104,9 @@ pub async fn process_exec_tool_call( | |
env, | ||
) | ||
.await?; | ||
consume_truncated_output(child, timeout, stdout_stream.clone()).await | ||
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await | ||
} | ||
SandboxType::LinuxSeccomp => { | ||
let timeout = params.timeout_duration(); | ||
let ExecParams { | ||
command, cwd, env, .. | ||
} = params; | ||
|
@@ -123,41 +124,56 @@ pub async fn process_exec_tool_call( | |
) | ||
.await?; | ||
|
||
consume_truncated_output(child, timeout, stdout_stream).await | ||
consume_truncated_output(child, timeout_duration, stdout_stream).await | ||
} | ||
}; | ||
let duration = start.elapsed(); | ||
match raw_output_result { | ||
Ok(raw_output) => { | ||
let stdout = raw_output.stdout.from_utf8_lossy(); | ||
let stderr = raw_output.stderr.from_utf8_lossy(); | ||
#[allow(unused_mut)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this because of the #[cfg(unix)]
let mut timed_out = raw_output.timed_out;
#[cfg(not(unix))]
let timed_out = raw_output.timed_out; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is because of the cfg, I think #[allow(unused_mut)] is a bit cleaner. |
||
let mut timed_out = raw_output.timed_out; | ||
|
||
#[cfg(target_family = "unix")] | ||
match raw_output.exit_status.signal() { | ||
Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)), | ||
Some(signal) => { | ||
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal))); | ||
{ | ||
if let Some(signal) = raw_output.exit_status.signal() { | ||
if signal == TIMEOUT_CODE { | ||
timed_out = true; | ||
} else { | ||
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal))); | ||
} | ||
} | ||
None => {} | ||
} | ||
|
||
let exit_code = raw_output.exit_status.code().unwrap_or(-1); | ||
|
||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { | ||
return Err(CodexErr::Sandbox(SandboxErr::Denied( | ||
exit_code, | ||
stdout.text, | ||
stderr.text, | ||
))); | ||
let mut exit_code = raw_output.exit_status.code().unwrap_or(-1); | ||
if timed_out { | ||
exit_code = EXEC_TIMEOUT_EXIT_CODE; | ||
} | ||
|
||
Ok(ExecToolCallOutput { | ||
let stdout = raw_output.stdout.from_utf8_lossy(); | ||
let stderr = raw_output.stderr.from_utf8_lossy(); | ||
let aggregated_output = raw_output.aggregated_output.from_utf8_lossy(); | ||
let exec_output = ExecToolCallOutput { | ||
exit_code, | ||
stdout, | ||
stderr, | ||
aggregated_output: raw_output.aggregated_output.from_utf8_lossy(), | ||
aggregated_output, | ||
duration, | ||
}) | ||
timed_out, | ||
}; | ||
|
||
if timed_out { | ||
return Err(CodexErr::Sandbox(SandboxErr::Timeout { | ||
output: Box::new(exec_output), | ||
})); | ||
} | ||
|
||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { | ||
return Err(CodexErr::Sandbox(SandboxErr::Denied { | ||
output: Box::new(exec_output), | ||
})); | ||
} | ||
|
||
Ok(exec_output) | ||
} | ||
Err(err) => { | ||
tracing::error!("exec error: {err}"); | ||
|
@@ -197,6 +213,7 @@ struct RawExecToolCallOutput { | |
pub stdout: StreamOutput<Vec<u8>>, | ||
pub stderr: StreamOutput<Vec<u8>>, | ||
pub aggregated_output: StreamOutput<Vec<u8>>, | ||
pub timed_out: bool, | ||
} | ||
|
||
impl StreamOutput<String> { | ||
|
@@ -229,6 +246,7 @@ pub struct ExecToolCallOutput { | |
pub stderr: StreamOutput<String>, | ||
pub aggregated_output: StreamOutput<String>, | ||
pub duration: Duration, | ||
pub timed_out: bool, | ||
} | ||
|
||
async fn exec( | ||
|
@@ -298,22 +316,24 @@ async fn consume_truncated_output( | |
Some(agg_tx.clone()), | ||
)); | ||
|
||
let exit_status = tokio::select! { | ||
let (exit_status, timed_out) = tokio::select! { | ||
result = tokio::time::timeout(timeout, child.wait()) => { | ||
match result { | ||
Ok(Ok(exit_status)) => exit_status, | ||
Ok(e) => e?, | ||
Ok(status_result) => { | ||
let exit_status = status_result?; | ||
(exit_status, false) | ||
} | ||
Err(_) => { | ||
// timeout | ||
child.start_kill()?; | ||
// Debatable whether `child.wait().await` should be called here. | ||
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE) | ||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true) | ||
} | ||
} | ||
} | ||
_ = tokio::signal::ctrl_c() => { | ||
child.start_kill()?; | ||
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE) | ||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false) | ||
} | ||
}; | ||
|
||
|
@@ -336,6 +356,7 @@ async fn consume_truncated_output( | |
stdout, | ||
stderr, | ||
aggregated_output, | ||
timed_out, | ||
}) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,11 @@ | |
|
||
use std::collections::HashMap; | ||
use std::path::PathBuf; | ||
use std::time::Duration; | ||
|
||
use async_channel::Receiver; | ||
use codex_core::error::CodexErr; | ||
use codex_core::error::SandboxErr; | ||
use codex_core::exec::ExecParams; | ||
use codex_core::exec::SandboxType; | ||
use codex_core::exec::StdoutStream; | ||
|
@@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() { | |
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n"); | ||
assert_eq!(result.aggregated_output.truncated_after_lines, None); | ||
} | ||
|
||
#[tokio::test] | ||
async fn test_exec_timeout_returns_partial_output() { | ||
let cmd = vec![ | ||
"/bin/sh".to_string(), | ||
"-c".to_string(), | ||
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(), | ||
]; | ||
|
||
let params = ExecParams { | ||
command: cmd, | ||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), | ||
timeout_ms: Some(200), | ||
env: HashMap::new(), | ||
with_escalated_permissions: None, | ||
justification: None, | ||
}; | ||
|
||
let policy = SandboxPolicy::new_read_only_policy(); | ||
|
||
let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await; | ||
|
||
let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else { | ||
panic!("expected timeout error"); | ||
}; | ||
|
||
assert_eq!(output.exit_code, 124); | ||
assert_eq!(output.stdout.text, "before\n"); | ||
assert!(output.stderr.text.is_empty()); | ||
assert_eq!(output.aggregated_output.text, "before\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not in the output. It gets formatted in right before the result is sent back to model in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe integration test would be helpful? to see what got sent to the model. just to make sure that the model knows it timed out. |
||
assert!(output.duration >= Duration::from_millis(200)); | ||
assert!(output.timed_out); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the case where the command times out, but is not run in a sandbox (i.e.,
--dangerously-bypass-approvals-and-sandbox
) also covered by this case even though it isn't run in a "sandbox"?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it all goes through the
run_exec_with_events
who's result we're processing here.