diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index ad3947aa94..40ccbf6769 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -43,7 +43,7 @@ mod models; mod openai_model_info; mod openai_tools; pub mod plan_tool; -mod project_doc; +pub mod project_doc; mod rollout; pub(crate) mod safety; pub mod seatbelt; diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 3591b5b1d8..a869b40cb3 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -1,18 +1,19 @@ //! Project-level documentation discovery. //! -//! Project-level documentation can be stored in a file named `AGENTS.md`. -//! Currently, we include only the contents of the first file found as follows: +//! Project-level documentation can be stored in files named `AGENTS.md`. +//! We include the concatenation of all files found along the path from the +//! repository root to the current working directory as follows: //! -//! 1. Look for the doc file in the current working directory (as determined -//! by the `Config`). -//! 2. If not found, walk *upwards* until the Git repository root is reached -//! (detected by the presence of a `.git` directory/file), or failing that, -//! the filesystem root. -//! 3. If the Git root is encountered, look for the doc file there. If it -//! exists, the search stops – we do **not** walk past the Git root. +//! 1. Determine the Git repository root by walking upwards from the current +//! working directory until a `.git` directory or file is found. If no Git +//! root is found, only the current working directory is considered. +//! 2. Collect every `AGENTS.md` found from the repository root down to the +//! current working directory (inclusive) and concatenate their contents in +//! that order. +//! 3. We do **not** walk past the Git root. use crate::config::Config; -use std::path::Path; +use std::path::PathBuf; use tokio::io::AsyncReadExt; use tracing::error; @@ -26,7 +27,7 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n"; /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single /// string of instructions. pub(crate) async fn get_user_instructions(config: &Config) -> Option { - match find_project_doc(config).await { + match read_project_docs(config).await { Ok(Some(project_doc)) => match &config.user_instructions { Some(original_instructions) => Some(format!( "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}" @@ -41,95 +42,135 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option { } } -/// Attempt to locate and load the project documentation. Currently, the search -/// starts from `Config::cwd`, but if we may want to consider other directories -/// in the future, e.g., additional writable directories in the `SandboxPolicy`. +/// Attempt to locate and load the project documentation. /// -/// On success returns `Ok(Some(contents))`. If no documentation file is found -/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as -/// `Err` so callers can decide how to handle them. -async fn find_project_doc(config: &Config) -> std::io::Result> { - let max_bytes = config.project_doc_max_bytes; - - // Attempt to load from the working directory first. - if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); +/// On success returns `Ok(Some(contents))` where `contents` is the +/// concatenation of all discovered docs. If no documentation file is found the +/// function returns `Ok(None)`. Unexpected I/O failures bubble up as `Err` so +/// callers can decide how to handle them. +pub async fn read_project_docs(config: &Config) -> std::io::Result> { + let max_total = config.project_doc_max_bytes; + + if max_total == 0 { + return Ok(None); } - // Walk up towards the filesystem root, stopping once we encounter the Git - // repository root. The presence of **either** a `.git` *file* or - // *directory* counts. - let mut dir = config.cwd.clone(); - - // Canonicalize the path so that we do not end up in an infinite loop when - // `cwd` contains `..` components. - if let Ok(canon) = dir.canonicalize() { - dir = canon; + let paths = discover_project_doc_paths(config)?; + if paths.is_empty() { + return Ok(None); } - while let Some(parent) = dir.parent() { - // `.git` can be a *file* (for worktrees or submodules) or a *dir*. - let git_marker = dir.join(".git"); - let git_exists = match tokio::fs::metadata(&git_marker).await { - Ok(_) => true, - Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, - Err(e) => return Err(e), - }; + let mut remaining: u64 = max_total as u64; + let mut parts: Vec = Vec::new(); - if git_exists { - // We are at the repo root – attempt one final load. - if let Some(doc) = load_first_candidate(&dir, CANDIDATE_FILENAMES, max_bytes).await? { - return Ok(Some(doc)); - } + for p in paths { + if remaining == 0 { break; } - dir = parent.to_path_buf(); - } - - Ok(None) -} - -/// Attempt to load the first candidate file found in `dir`. Returns the file -/// contents (truncated if it exceeds `max_bytes`) when successful. -async fn load_first_candidate( - dir: &Path, - names: &[&str], - max_bytes: usize, -) -> std::io::Result> { - for name in names { - let candidate = dir.join(name); - - let file = match tokio::fs::File::open(&candidate).await { + let file = match tokio::fs::File::open(&p).await { + Ok(f) => f, Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, Err(e) => return Err(e), - Ok(f) => f, }; let size = file.metadata().await?.len(); + let mut reader = tokio::io::BufReader::new(file).take(remaining); + let mut data: Vec = Vec::new(); + reader.read_to_end(&mut data).await?; - let reader = tokio::io::BufReader::new(file); - let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes)); - let mut limited = reader.take(max_bytes as u64); - limited.read_to_end(&mut data).await?; - - if size as usize > max_bytes { + if size > remaining { tracing::warn!( - "Project doc `{}` exceeds {max_bytes} bytes - truncating.", - candidate.display(), + "Project doc `{}` exceeds remaining budget ({} bytes) - truncating.", + p.display(), + remaining, ); } - let contents = String::from_utf8_lossy(&data).to_string(); - if contents.trim().is_empty() { - // Empty file – treat as not found. - continue; + let text = String::from_utf8_lossy(&data).to_string(); + if !text.trim().is_empty() { + parts.push(text); + remaining = remaining.saturating_sub(data.len() as u64); + } + } + + if parts.is_empty() { + Ok(None) + } else { + Ok(Some(parts.join("\n\n"))) + } +} + +/// Discover the list of AGENTS.md files using the same search rules as +/// `read_project_docs`, but return the file paths instead of concatenated +/// contents. The list is ordered from repository root to the current working +/// directory (inclusive). Symlinks are allowed. When `project_doc_max_bytes` +/// is zero, returns an empty list. +pub fn discover_project_doc_paths(config: &Config) -> std::io::Result> { + let mut dir = config.cwd.clone(); + if let Ok(canon) = dir.canonicalize() { + dir = canon; + } + + // Build chain from cwd upwards and detect git root. + let mut chain: Vec = vec![dir.clone()]; + let mut git_root: Option = None; + let mut cursor = dir.clone(); + while let Some(parent) = cursor.parent() { + let git_marker = cursor.join(".git"); + let git_exists = match std::fs::metadata(&git_marker) { + Ok(_) => true, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => false, + Err(e) => return Err(e), + }; + + if git_exists { + git_root = Some(cursor.clone()); + break; } - return Ok(Some(contents)); + chain.push(parent.to_path_buf()); + cursor = parent.to_path_buf(); } - Ok(None) + let search_dirs: Vec = if let Some(root) = git_root { + let mut dirs: Vec = Vec::new(); + let mut saw_root = false; + for p in chain.iter().rev() { + if !saw_root { + if p == &root { + saw_root = true; + } else { + continue; + } + } + dirs.push(p.clone()); + } + dirs + } else { + vec![config.cwd.clone()] + }; + + let mut found: Vec = Vec::new(); + for d in search_dirs { + for name in CANDIDATE_FILENAMES { + let candidate = d.join(name); + match std::fs::symlink_metadata(&candidate) { + Ok(md) => { + let ft = md.file_type(); + // Allow regular files and symlinks; opening will later fail for dangling links. + if ft.is_file() || ft.is_symlink() { + found.push(candidate); + break; + } + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue, + Err(e) => return Err(e), + } + } + } + + Ok(found) } #[cfg(test)] @@ -278,4 +319,32 @@ mod tests { assert_eq!(res, Some(INSTRUCTIONS.to_string())); } + + /// When both the repository root and the working directory contain + /// AGENTS.md files, their contents are concatenated from root to cwd. + #[tokio::test] + async fn concatenates_root_and_cwd_docs() { + let repo = tempfile::tempdir().expect("tempdir"); + + // Simulate a git repository. + std::fs::write( + repo.path().join(".git"), + "gitdir: /path/to/actual/git/dir\n", + ) + .unwrap(); + + // Repo root doc. + fs::write(repo.path().join("AGENTS.md"), "root doc").unwrap(); + + // Nested working directory with its own doc. + let nested = repo.path().join("workspace/crate_a"); + std::fs::create_dir_all(&nested).unwrap(); + fs::write(nested.join("AGENTS.md"), "crate doc").unwrap(); + + let mut cfg = make_config(&repo, 4096, None); + cfg.cwd = nested; + + let res = get_user_instructions(&cfg).await.expect("doc expected"); + assert_eq!(res, "root doc\n\ncrate doc"); + } } diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 3583aaa397..0e48204be0 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -11,6 +11,7 @@ use codex_core::config::Config; use codex_core::plan_tool::PlanItemArg; use codex_core::plan_tool::StepStatus; use codex_core::plan_tool::UpdatePlanArgs; +use codex_core::project_doc::discover_project_doc_paths; use codex_core::protocol::FileChange; use codex_core::protocol::McpInvocation; use codex_core::protocol::SandboxPolicy; @@ -538,6 +539,54 @@ pub(crate) fn new_status_output( sandbox_name.into(), ])); + // AGENTS.md files discovered via core's project_doc logic + let agents_list = { + match discover_project_doc_paths(config) { + Ok(paths) => { + let mut rels: Vec = Vec::new(); + for p in paths { + let display = if let Some(parent) = p.parent() { + if parent == config.cwd { + "AGENTS.md".to_string() + } else { + let mut cur = config.cwd.as_path(); + let mut ups = 0usize; + let mut reached = false; + while let Some(c) = cur.parent() { + if cur == parent { + reached = true; + break; + } + cur = c; + ups += 1; + } + if reached { + format!("{}AGENTS.md", "../".repeat(ups)) + } else if let Ok(stripped) = p.strip_prefix(&config.cwd) { + stripped.display().to_string() + } else { + p.display().to_string() + } + } + } else { + p.display().to_string() + }; + rels.push(display); + } + rels + } + Err(_) => Vec::new(), + } + }; + if agents_list.is_empty() { + lines.push(Line::from(" • AGENTS files: (none)")); + } else { + lines.push(Line::from(vec![ + " • AGENTS files: ".into(), + agents_list.join(", ").into(), + ])); + } + lines.push(Line::from("")); // 👤 Account (only if ChatGPT tokens exist), shown under the first block