Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion codex-rs/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ mod models;
mod openai_model_info;
mod openai_tools;
pub mod plan_tool;
mod project_doc;
pub mod project_doc;
mod rollout;
pub(crate) mod safety;
pub mod seatbelt;
Expand Down
223 changes: 146 additions & 77 deletions codex-rs/core/src/project_doc.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
//! Project-level documentation discovery.
//!
//! Project-level documentation can be stored in a file named `AGENTS.md`.
//! Currently, we include only the contents of the first file found as follows:
//! Project-level documentation can be stored in files named `AGENTS.md`.
//! We include the concatenation of all files found along the path from the
//! repository root to the current working directory as follows:
//!
//! 1. Look for the doc file in the current working directory (as determined
//! by the `Config`).
//! 2. If not found, walk *upwards* until the Git repository root is reached
//! (detected by the presence of a `.git` directory/file), or failing that,
//! the filesystem root.
//! 3. If the Git root is encountered, look for the doc file there. If it
//! exists, the search stops – we do **not** walk past the Git root.
//! 1. Determine the Git repository root by walking upwards from the current
//! working directory until a `.git` directory or file is found. If no Git
//! root is found, only the current working directory is considered.
//! 2. Collect every `AGENTS.md` found from the repository root down to the
//! current working directory (inclusive) and concatenate their contents in
//! that order.
//! 3. We do **not** walk past the Git root.

use crate::config::Config;
use std::path::Path;
use std::path::PathBuf;
use tokio::io::AsyncReadExt;
use tracing::error;

Expand All @@ -26,7 +27,7 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
/// string of instructions.
pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
match find_project_doc(config).await {
match read_project_docs(config).await {
Ok(Some(project_doc)) => match &config.user_instructions {
Some(original_instructions) => Some(format!(
"{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
Expand All @@ -41,95 +42,135 @@ pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
}
}

/// Attempt to locate and load the project documentation. Currently, the search
/// starts from `Config::cwd`, but if we may want to consider other directories
/// in the future, e.g., additional writable directories in the `SandboxPolicy`.
/// Attempt to locate and load the project documentation.
///
/// On success returns `Ok(Some(contents))`. If no documentation file is found
/// the function returns `Ok(None)`. Unexpected I/O failures bubble up as
/// `Err` so callers can decide how to handle them.
async fn find_project_doc(config: &Config) -> std::io::Result<Option<String>> {
let max_bytes = config.project_doc_max_bytes;

// Attempt to load from the working directory first.
if let Some(doc) = load_first_candidate(&config.cwd, CANDIDATE_FILENAMES, max_bytes).await? {
return Ok(Some(doc));
/// On success returns `Ok(Some(contents))` where `contents` is the
/// concatenation of all discovered docs. If no documentation file is found the
/// function returns `Ok(None)`. Unexpected I/O failures bubble up as `Err` so
/// callers can decide how to handle them.
pub async fn read_project_docs(config: &Config) -> std::io::Result<Option<String>> {
let max_total = config.project_doc_max_bytes;

if max_total == 0 {
return Ok(None);
}

// Walk up towards the filesystem root, stopping once we encounter the Git
// repository root. The presence of **either** a `.git` *file* or
// *directory* counts.
let mut dir = config.cwd.clone();

// Canonicalize the path so that we do not end up in an infinite loop when
// `cwd` contains `..` components.
if let Ok(canon) = dir.canonicalize() {
dir = canon;
let paths = discover_project_doc_paths(config)?;
if paths.is_empty() {
return Ok(None);
}

while let Some(parent) = dir.parent() {
// `.git` can be a *file* (for worktrees or submodules) or a *dir*.
let git_marker = dir.join(".git");
let git_exists = match tokio::fs::metadata(&git_marker).await {
Ok(_) => true,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
Err(e) => return Err(e),
};
let mut remaining: u64 = max_total as u64;
let mut parts: Vec<String> = Vec::new();

if git_exists {
// We are at the repo root – attempt one final load.
if let Some(doc) = load_first_candidate(&dir, CANDIDATE_FILENAMES, max_bytes).await? {
return Ok(Some(doc));
}
for p in paths {
if remaining == 0 {
break;
}

dir = parent.to_path_buf();
}

Ok(None)
}

/// Attempt to load the first candidate file found in `dir`. Returns the file
/// contents (truncated if it exceeds `max_bytes`) when successful.
async fn load_first_candidate(
dir: &Path,
names: &[&str],
max_bytes: usize,
) -> std::io::Result<Option<String>> {
for name in names {
let candidate = dir.join(name);

let file = match tokio::fs::File::open(&candidate).await {
let file = match tokio::fs::File::open(&p).await {
Ok(f) => f,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
Err(e) => return Err(e),
Ok(f) => f,
};

let size = file.metadata().await?.len();
let mut reader = tokio::io::BufReader::new(file).take(remaining);
let mut data: Vec<u8> = Vec::new();
reader.read_to_end(&mut data).await?;

let reader = tokio::io::BufReader::new(file);
let mut data = Vec::with_capacity(std::cmp::min(size as usize, max_bytes));
let mut limited = reader.take(max_bytes as u64);
limited.read_to_end(&mut data).await?;

if size as usize > max_bytes {
if size > remaining {
tracing::warn!(
"Project doc `{}` exceeds {max_bytes} bytes - truncating.",
candidate.display(),
"Project doc `{}` exceeds remaining budget ({} bytes) - truncating.",
p.display(),
remaining,
);
}

let contents = String::from_utf8_lossy(&data).to_string();
if contents.trim().is_empty() {
// Empty file – treat as not found.
continue;
let text = String::from_utf8_lossy(&data).to_string();
if !text.trim().is_empty() {
parts.push(text);
remaining = remaining.saturating_sub(data.len() as u64);
}
}

if parts.is_empty() {
Ok(None)
} else {
Ok(Some(parts.join("\n\n")))
}
}

/// Discover the list of AGENTS.md files using the same search rules as
/// `read_project_docs`, but return the file paths instead of concatenated
/// contents. The list is ordered from repository root to the current working
/// directory (inclusive). Symlinks are allowed. When `project_doc_max_bytes`
/// is zero, returns an empty list.
pub fn discover_project_doc_paths(config: &Config) -> std::io::Result<Vec<PathBuf>> {
let mut dir = config.cwd.clone();
if let Ok(canon) = dir.canonicalize() {
dir = canon;
}

// Build chain from cwd upwards and detect git root.
let mut chain: Vec<PathBuf> = vec![dir.clone()];
let mut git_root: Option<PathBuf> = None;
let mut cursor = dir.clone();
while let Some(parent) = cursor.parent() {
let git_marker = cursor.join(".git");
let git_exists = match std::fs::metadata(&git_marker) {
Ok(_) => true,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
Err(e) => return Err(e),
};

if git_exists {
git_root = Some(cursor.clone());
break;
}

return Ok(Some(contents));
chain.push(parent.to_path_buf());
cursor = parent.to_path_buf();
}

Ok(None)
let search_dirs: Vec<PathBuf> = if let Some(root) = git_root {
let mut dirs: Vec<PathBuf> = Vec::new();
let mut saw_root = false;
for p in chain.iter().rev() {
if !saw_root {
if p == &root {
saw_root = true;
} else {
continue;
}
}
dirs.push(p.clone());
}
dirs
} else {
vec![config.cwd.clone()]
};

let mut found: Vec<PathBuf> = Vec::new();
for d in search_dirs {
for name in CANDIDATE_FILENAMES {
let candidate = d.join(name);
match std::fs::symlink_metadata(&candidate) {
Ok(md) => {
let ft = md.file_type();
// Allow regular files and symlinks; opening will later fail for dangling links.
if ft.is_file() || ft.is_symlink() {
found.push(candidate);
break;
}
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => continue,
Err(e) => return Err(e),
}
}
}

Ok(found)
}

#[cfg(test)]
Expand Down Expand Up @@ -278,4 +319,32 @@ mod tests {

assert_eq!(res, Some(INSTRUCTIONS.to_string()));
}

/// When both the repository root and the working directory contain
/// AGENTS.md files, their contents are concatenated from root to cwd.
#[tokio::test]
async fn concatenates_root_and_cwd_docs() {
let repo = tempfile::tempdir().expect("tempdir");

// Simulate a git repository.
std::fs::write(
repo.path().join(".git"),
"gitdir: /path/to/actual/git/dir\n",
)
.unwrap();

// Repo root doc.
fs::write(repo.path().join("AGENTS.md"), "root doc").unwrap();

// Nested working directory with its own doc.
let nested = repo.path().join("workspace/crate_a");
std::fs::create_dir_all(&nested).unwrap();
fs::write(nested.join("AGENTS.md"), "crate doc").unwrap();

let mut cfg = make_config(&repo, 4096, None);
cfg.cwd = nested;

let res = get_user_instructions(&cfg).await.expect("doc expected");
assert_eq!(res, "root doc\n\ncrate doc");
}
}
49 changes: 49 additions & 0 deletions codex-rs/tui/src/history_cell.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use codex_core::config::Config;
use codex_core::plan_tool::PlanItemArg;
use codex_core::plan_tool::StepStatus;
use codex_core::plan_tool::UpdatePlanArgs;
use codex_core::project_doc::discover_project_doc_paths;
use codex_core::protocol::FileChange;
use codex_core::protocol::McpInvocation;
use codex_core::protocol::SandboxPolicy;
Expand Down Expand Up @@ -538,6 +539,54 @@ pub(crate) fn new_status_output(
sandbox_name.into(),
]));

// AGENTS.md files discovered via core's project_doc logic
let agents_list = {
match discover_project_doc_paths(config) {
Ok(paths) => {
let mut rels: Vec<String> = Vec::new();
for p in paths {
let display = if let Some(parent) = p.parent() {
if parent == config.cwd {
"AGENTS.md".to_string()
} else {
let mut cur = config.cwd.as_path();
let mut ups = 0usize;
let mut reached = false;
while let Some(c) = cur.parent() {
if cur == parent {
reached = true;
break;
}
cur = c;
ups += 1;
}
if reached {
format!("{}AGENTS.md", "../".repeat(ups))
} else if let Ok(stripped) = p.strip_prefix(&config.cwd) {
stripped.display().to_string()
} else {
p.display().to_string()
}
}
} else {
p.display().to_string()
};
rels.push(display);
}
rels
}
Err(_) => Vec::new(),
}
};
if agents_list.is_empty() {
lines.push(Line::from(" • AGENTS files: (none)"));
} else {
lines.push(Line::from(vec![
" • AGENTS files: ".into(),
agents_list.join(", ").into(),
]));
}

lines.push(Line::from(""));

// 👤 Account (only if ChatGPT tokens exist), shown under the first block
Expand Down
Loading