From ea4224b95c7cf68715333b5ec78b87082ae63515 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Wed, 30 Jul 2025 11:47:05 -0700 Subject: [PATCH 01/20] First shot --- codex-rs/Cargo.lock | 1 + codex-rs/core/Cargo.toml | 2 + codex-rs/core/src/apply_patch.rs | 1 + codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/patch_accumulator.rs | 378 ++++++++++++++++++ codex-rs/core/src/protocol.rs | 2 + .../src/event_processor_with_human_output.rs | 1 + 7 files changed, 386 insertions(+) create mode 100644 codex-rs/core/src/patch_accumulator.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 120050c227..88d464100d 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -698,6 +698,7 @@ dependencies = [ "serde_json", "sha1", "shlex", + "similar", "strum_macros 0.27.2", "tempfile", "thiserror 2.0.12", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index ecc904cd5e..7f57746348 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -51,6 +51,8 @@ tree-sitter-bash = "0.25.0" uuid = { version = "1", features = ["serde", "v4"] } whoami = "1.6.0" wildmatch = "2.4.0" +tempfile = "3" +similar = "2" [target.'cfg(target_os = "linux")'.dependencies] diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index f116c790ab..6aa12a0a3f 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -237,6 +237,7 @@ pub(crate) async fn apply_patch( stdout: String::from_utf8_lossy(&stdout).to_string(), stderr: String::from_utf8_lossy(&stderr).to_string(), success: success_flag, + changes: convert_apply_patch_to_protocol(&action), }), }) .await; diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 054abd742a..d761afd478 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -34,6 +34,7 @@ pub use model_provider_info::built_in_model_providers; mod models; mod openai_model_info; mod openai_tools; +pub mod patch_accumulator; pub mod plan_tool; mod project_doc; pub mod protocol; diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs new file mode 100644 index 0000000000..08da6b5940 --- /dev/null +++ b/codex-rs/core/src/patch_accumulator.rs @@ -0,0 +1,378 @@ +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use std::io::Read; +use std::io::Seek; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context; +use anyhow::Result; +use similar::TextDiff; +use tempfile::NamedTempFile; + +use crate::protocol::FileChange; + +/// Accumulates multiple change sets and exposes the overall unified diff. +#[derive(Default)] +pub struct PatchAccumulator { + /// Snapshot of original file contents at first sighting. + original_file_copies: HashMap, + /// All change sets in the order they occurred. + changes: Vec>, + /// Aggregated unified diff for all accumulated changes across files. + pub unified_diff: Option, +} + +impl PatchAccumulator { + pub fn new() -> Self { + Self::default() + } + + /// Ensure we have an original snapshot for each file in `changes`. + /// For files that don't exist yet (e.g., Add), we snapshot as empty. + pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { + for path in changes.keys() { + if !self.original_file_copies.contains_key(path) { + let mut tmp = NamedTempFile::new().context("create temp file for snapshot")?; + if path.exists() { + let content = fs::read(path) + .with_context(|| format!("failed to read original {}", path.display()))?; + tmp.write_all(&content).with_context(|| { + format!("failed to write snapshot for {}", path.display()) + })?; + } else { + // Represent missing file as empty baseline. + // Leaving the temp file empty is sufficient. + } + // Ensure file cursor at start for future reads. + tmp.as_file_mut() + .rewind() + .context("rewind snapshot temp file")?; + self.original_file_copies.insert(path.clone(), tmp); + } + } + Ok(()) + } + + /// Record this change set and recompute the aggregated unified diff by + /// applying all change sets to the snapshots in memory. + pub fn on_patch_end(&mut self, changes: HashMap) -> Result<()> { + self.changes.push(changes); + + // Build initial working set from original snapshots. + let mut current: HashMap = HashMap::new(); + for (origin, tmp) in &mut self.original_file_copies { + let mut buf = String::new(); + tmp.as_file_mut() + .rewind() + .with_context(|| format!("rewind snapshot {}", origin.display()))?; + tmp.as_file_mut() + .read_to_string(&mut buf) + .with_context(|| format!("read snapshot {}", origin.display()))?; + current.insert(origin.clone(), buf); + } + + // Track current path per origin to support moves. + let mut current_path_by_origin: HashMap = self + .original_file_copies + .keys() + .map(|p| (p.clone(), p.clone())) + .collect(); + // Reverse mapping for efficient lookup of origin by current path. + let mut origin_by_current_path: HashMap = current_path_by_origin + .iter() + .map(|(o, p)| (p.clone(), o.clone())) + .collect(); + + // Apply each change set in order. + for change_set in &self.changes { + for (path, change) in change_set { + match change { + FileChange::Add { content } => { + // Ensure snapshot exists for added files as empty baseline. + if !self.original_file_copies.contains_key(path) { + let mut tmp = NamedTempFile::new() + .context("create temp file for added file baseline")?; + tmp.as_file_mut().rewind().ok(); + self.original_file_copies.insert(path.clone(), tmp); + current_path_by_origin.insert(path.clone(), path.clone()); + origin_by_current_path.insert(path.clone(), path.clone()); + } + current.insert(path.clone(), content.clone()); + } + FileChange::Delete => { + current.remove(path); + // mapping remains so we can diff baseline -> empty. + } + FileChange::Update { + unified_diff, + move_path, + } => { + // Determine source content. + let src_path = path; + let src_content = current + .get(src_path) + .cloned() + .or_else(|| self.read_snapshot_str(src_path).ok()) + .unwrap_or_default(); + + let new_content = apply_unified_diff(&src_content, unified_diff) + .with_context(|| { + format!("apply unified diff for {}", src_path.display()) + })?; + + if let Some(dest) = move_path { + current.remove(src_path); + current.insert(dest.clone(), new_content); + + // Update origin mapping for the move. + if let Some(origin) = origin_by_current_path.remove(src_path) { + current_path_by_origin.insert(origin.clone(), dest.clone()); + origin_by_current_path.insert(dest.clone(), origin); + } else { + // If we did not know this path yet, seed origin as src. + current_path_by_origin.insert(src_path.clone(), dest.clone()); + origin_by_current_path.insert(dest.clone(), src_path.clone()); + } + } else { + current.insert(src_path.clone(), new_content); + } + } + } + } + } + + // Compute aggregated unified diff across all origins we've seen. + let mut all_origins: HashSet = self.original_file_copies.keys().cloned().collect(); + // Include any paths that were added but had no original snapshot (should already be present). + for p in current.keys() { + all_origins.insert(p.clone()); + } + + let mut combined = String::new(); + for origin in all_origins { + let old = self.read_snapshot_str(&origin).unwrap_or_default(); + let new_path = current_path_by_origin + .get(&origin) + .cloned() + .unwrap_or(origin.clone()); + let new = current.get(&new_path).cloned().unwrap_or_default(); + + if old == new { + continue; + } + + let diff = TextDiff::from_lines(&old, &new).unified_diff().to_string(); + let diff = with_paths_in_headers(&diff, &origin, &new_path); + if !combined.is_empty() { + combined.push('\n'); + } + combined.push_str(&diff); + } + + self.unified_diff = if combined.is_empty() { + None + } else { + Some(combined) + }; + Ok(()) + } + + fn read_snapshot_str(&self, path: &Path) -> Result { + if let Some(tmp) = self.original_file_copies.get(path).map(|t| t.reopen()) { + let mut file = tmp.context("reopen temp file")?; + file.rewind().ok(); + let mut s = String::new(); + file.read_to_string(&mut s).context("read temp file")?; + Ok(s) + } else { + Ok(String::new()) + } + } +} + +fn with_paths_in_headers(diff: &str, old_path: &Path, new_path: &Path) -> String { + let mut out = String::new(); + let mut replaced = 0usize; + for line in diff.lines() { + if replaced < 1 && line.starts_with("---") { + out.push_str(&format!("--- {}\n", old_path.display())); + replaced += 1; + continue; + } + if replaced < 2 && line.starts_with("+++") { + out.push_str(&format!("+++ {}\n", new_path.display())); + replaced += 1; + continue; + } + out.push_str(line); + out.push('\n'); + } + out +} + +fn apply_unified_diff(base: &str, unified_diff: &str) -> Result { + let base_lines: Vec<&str> = if base.is_empty() { + Vec::new() + } else { + base.split_inclusive('\n').collect() + }; + + let mut result: Vec = Vec::new(); + let mut pos: usize = 0; // index in base_lines + + let mut it = unified_diff.lines().peekable(); + while let Some(line) = it.next() { + if line.starts_with("---") || line.starts_with("+++") { + continue; + } + if line.starts_with("@@") { + // Parse old start index from header: "@@ -a,b +c,d @@" + let middle = if let (Some(s), Some(e)) = (line.find("@@ "), line.rfind(" @@")) { + &line[s + 3..e] + } else { + "" + }; + let old_range = middle.split_whitespace().next().unwrap_or(""); // "-a,b" + let old_start_str = old_range + .strip_prefix('-') + .unwrap_or(old_range) + .split(',') + .next() + .unwrap_or("1"); + let old_start: usize = old_start_str.parse().unwrap_or(1); + + // Append unchanged lines up to this hunk + let target = old_start.saturating_sub(1); + while pos < target && pos < base_lines.len() { + result.push(base_lines[pos].to_string()); + pos += 1; + } + + // Apply hunk body until next header or EOF + while let Some(peek) = it.peek() { + let body_line = *peek; + if body_line.starts_with("@@") + || body_line.starts_with("---") + || body_line.starts_with("+++") + { + break; + } + let _ = it.next(); + if body_line.starts_with(' ') { + if let Some(src) = base_lines.get(pos) { + result.push((*src).to_string()); + } + pos += 1; + } else if body_line.starts_with('-') { + pos += 1; + } else if body_line.starts_with('+') { + result.push(format!( + "{}\n", + body_line.strip_prefix('+').unwrap_or(body_line) + )); + } else if body_line.is_empty() { + result.push("\n".to_string()); + } else { + if let Some(src) = base_lines.get(pos) { + result.push((*src).to_string()); + } + pos += 1; + } + } + } + } + + // Append remaining + while pos < base_lines.len() { + result.push(base_lines[pos].to_string()); + pos += 1; + } + + Ok(result.concat()) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + use super::*; + use tempfile::tempdir; + + #[test] + fn accumulates_add_and_update() { + let dir = tempdir().unwrap(); + let file = dir.path().join("a.txt"); + + let mut acc = PatchAccumulator::new(); + + // First patch: add file + let add_changes = HashMap::from([( + file.clone(), + FileChange::Add { + content: "foo\n".to_string(), + }, + )]); + acc.on_patch_begin(&add_changes).unwrap(); + acc.on_patch_end(add_changes).unwrap(); + let first = acc.unified_diff.clone().unwrap(); + assert!(first.contains("+foo")); + + // Second patch: update + let old = "foo\n"; + let new = "foo\nbar\n"; + let diff = TextDiff::from_lines(old, new).unified_diff().to_string(); + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: diff, + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes).unwrap(); + acc.on_patch_end(update_changes).unwrap(); + let combined = acc.unified_diff.clone().unwrap(); + assert!(combined.contains("+bar")); + } + + #[test] + fn accumulates_delete() { + let dir = tempdir().unwrap(); + let file = dir.path().join("b.txt"); + fs::write(&file, "x\n").unwrap(); + + let mut acc = PatchAccumulator::new(); + let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_changes).unwrap(); + acc.on_patch_end(del_changes).unwrap(); + let diff = acc.unified_diff.clone().unwrap(); + assert!(diff.contains("-x")); + } + + #[test] + fn accumulates_move_and_update() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dst.txt"); + fs::write(&src, "line\n").unwrap(); + + let old = "line\n"; + let new = "line2\n"; + let diff = TextDiff::from_lines(old, new).unified_diff().to_string(); + + let mut acc = PatchAccumulator::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: diff, + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes).unwrap(); + acc.on_patch_end(mv_changes).unwrap(); + let out = acc.unified_diff.clone().unwrap(); + assert!(out.contains("-line")); + assert!(out.contains("+line2")); + } +} diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index bc922eb0e2..dc800b5e6b 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -523,6 +523,8 @@ pub struct PatchApplyEndEvent { pub stderr: String, /// Whether the patch was applied successfully. pub success: bool, + /// The changes that were applied. + pub changes: HashMap, } #[derive(Debug, Clone, Deserialize, Serialize)] diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 54604d538b..ade2ad4f48 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -402,6 +402,7 @@ impl EventProcessor for EventProcessorWithHumanOutput { stdout, stderr, success, + .. }) => { let patch_begin = self.call_id_to_patch.remove(&call_id); From 3793670da88777babc8c038501f2ab2e09129e08 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Wed, 30 Jul 2025 13:09:16 -0700 Subject: [PATCH 02/20] WIP temp repo --- codex-rs/core/src/patch_accumulator.rs | 385 ++++++++++++++++--------- 1 file changed, 245 insertions(+), 140 deletions(-) diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs index 08da6b5940..671ea57685 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/patch_accumulator.rs @@ -1,24 +1,30 @@ use std::collections::HashMap; -use std::collections::HashSet; use std::fs; use std::io::Read; -use std::io::Seek; -use std::io::Write; use std::path::Path; use std::path::PathBuf; +use std::process::Command; use anyhow::Context; use anyhow::Result; -use similar::TextDiff; -use tempfile::NamedTempFile; +use tempfile::TempDir; +use uuid::Uuid; use crate::protocol::FileChange; /// Accumulates multiple change sets and exposes the overall unified diff. #[derive(Default)] pub struct PatchAccumulator { - /// Snapshot of original file contents at first sighting. - original_file_copies: HashMap, + /// Temporary git repository for building accumulated diffs. + temp_git_repo: Option, + /// Baseline commit that includes snapshots of all files seen so far. + baseline_commit: Option, + /// Map external path -> internal filename (uuid + same extension). + file_mapping: HashMap, + /// Internal filename -> external path as of baseline commit. + internal_to_baseline_external: HashMap, + /// Internal filename -> external path as of current accumulated state (after applying all changes). + internal_to_current_external: HashMap, /// All change sets in the order they occurred. changes: Vec>, /// Aggregated unified diff for all accumulated changes across files. @@ -30,187 +36,285 @@ impl PatchAccumulator { Self::default() } - /// Ensure we have an original snapshot for each file in `changes`. - /// For files that don't exist yet (e.g., Add), we snapshot as empty. + /// Ensure we have an initialized repository and a baseline snapshot of any new files. pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { + self.ensure_repo_init()?; + let repo_dir = self.repo_dir()?.to_path_buf(); + + let mut added_any = false; for path in changes.keys() { - if !self.original_file_copies.contains_key(path) { - let mut tmp = NamedTempFile::new().context("create temp file for snapshot")?; + if !self.file_mapping.contains_key(path) { + // Assign a stable internal filename for this external path. + let internal = uuid_filename_for(path); + self.file_mapping.insert(path.clone(), internal.clone()); + self.internal_to_baseline_external + .insert(internal.clone(), path.clone()); + self.internal_to_current_external + .insert(internal.clone(), path.clone()); + + // If the file exists on disk, copy its contents into the repo and stage it. if path.exists() { - let content = fs::read(path) + let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; - tmp.write_all(&content).with_context(|| { - format!("failed to write snapshot for {}", path.display()) + let internal_path = repo_dir.join(&internal); + if let Some(parent) = internal_path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).ok(); + } + } + fs::write(&internal_path, contents).with_context(|| { + format!("failed to write baseline file {}", internal_path.display()) })?; - } else { - // Represent missing file as empty baseline. - // Leaving the temp file empty is sufficient. + run_git(&repo_dir, &["add", &internal])?; + added_any = true; } - // Ensure file cursor at start for future reads. - tmp.as_file_mut() - .rewind() - .context("rewind snapshot temp file")?; - self.original_file_copies.insert(path.clone(), tmp); } } + + // Commit baseline additions if any. + if added_any { + run_git(&repo_dir, &["commit", "-m", "baseline"])?; + // Update baseline commit id to HEAD. + let id = run_git_capture(&repo_dir, &["rev-parse", "HEAD"])?; + self.baseline_commit = Some(id.trim().to_string()); + } + Ok(()) } /// Record this change set and recompute the aggregated unified diff by - /// applying all change sets to the snapshots in memory. + /// applying all change sets to the repo working tree and diffing against the baseline commit. pub fn on_patch_end(&mut self, changes: HashMap) -> Result<()> { self.changes.push(changes); - // Build initial working set from original snapshots. - let mut current: HashMap = HashMap::new(); - for (origin, tmp) in &mut self.original_file_copies { - let mut buf = String::new(); - tmp.as_file_mut() - .rewind() - .with_context(|| format!("rewind snapshot {}", origin.display()))?; - tmp.as_file_mut() - .read_to_string(&mut buf) - .with_context(|| format!("read snapshot {}", origin.display()))?; - current.insert(origin.clone(), buf); + self.ensure_repo_init()?; + let repo_dir = self.repo_dir()?.to_path_buf(); + + // Reset working tree to baseline commit state. + if let Some(commit) = &self.baseline_commit { + run_git(&repo_dir, &["reset", "--hard", commit])?; + } + + // Start with current external mapping reflective of baseline. + // Rebuild current mapping from baseline mapping first. + self.internal_to_current_external = self.internal_to_baseline_external.clone(); + + // Apply all change sets in order to working tree. + let mut current_contents: HashMap = HashMap::new(); + // Preload current content for any baseline-tracked files. + for (internal, _ext_path) in self.internal_to_current_external.clone() { + let file_path = repo_dir.join(&internal); + if file_path.exists() { + let mut s = String::new(); + fs::File::open(&file_path) + .and_then(|mut f| f.read_to_string(&mut s)) + .ok(); + current_contents.insert(internal.clone(), s); + } } - // Track current path per origin to support moves. - let mut current_path_by_origin: HashMap = self - .original_file_copies - .keys() - .map(|p| (p.clone(), p.clone())) - .collect(); - // Reverse mapping for efficient lookup of origin by current path. - let mut origin_by_current_path: HashMap = current_path_by_origin - .iter() - .map(|(o, p)| (p.clone(), o.clone())) - .collect(); - - // Apply each change set in order. for change_set in &self.changes { - for (path, change) in change_set { + for (ext_path, change) in change_set { + let internal = match self.file_mapping.get(ext_path) { + Some(i) => i.clone(), + None => { + // Newly referenced path; create mapping (no baseline tracked -> add shows up as new file). + let i = uuid_filename_for(ext_path); + self.file_mapping.insert(ext_path.clone(), i.clone()); + self.internal_to_baseline_external + .insert(i.clone(), ext_path.clone()); + self.internal_to_current_external + .insert(i.clone(), ext_path.clone()); + i + } + }; match change { FileChange::Add { content } => { - // Ensure snapshot exists for added files as empty baseline. - if !self.original_file_copies.contains_key(path) { - let mut tmp = NamedTempFile::new() - .context("create temp file for added file baseline")?; - tmp.as_file_mut().rewind().ok(); - self.original_file_copies.insert(path.clone(), tmp); - current_path_by_origin.insert(path.clone(), path.clone()); - origin_by_current_path.insert(path.clone(), path.clone()); + // Create/overwrite internal file with provided content. + let file_path = repo_dir.join(&internal); + if let Some(parent) = file_path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).ok(); + } } - current.insert(path.clone(), content.clone()); + fs::write(&file_path, content) + .with_context(|| format!("failed to write {}", file_path.display()))?; + current_contents.insert(internal.clone(), content.clone()); + // Update current external path mapping (no move, but ensure it's present) + self.internal_to_current_external + .insert(internal.clone(), ext_path.clone()); } FileChange::Delete => { - current.remove(path); - // mapping remains so we can diff baseline -> empty. + let file_path = repo_dir.join(&internal); + if file_path.exists() { + let _ = fs::remove_file(&file_path); + } + current_contents.remove(&internal); + // Keep current mapping entry as-is; diff will show deletion. } FileChange::Update { unified_diff, move_path, } => { - // Determine source content. - let src_path = path; - let src_content = current - .get(src_path) - .cloned() - .or_else(|| self.read_snapshot_str(src_path).ok()) - .unwrap_or_default(); - - let new_content = apply_unified_diff(&src_content, unified_diff) - .with_context(|| { - format!("apply unified diff for {}", src_path.display()) + // Apply unified diff to the current contents of internal file. + let base = current_contents.get(&internal).cloned().unwrap_or_default(); + let new_content = + apply_unified_diff(&base, unified_diff).with_context(|| { + format!("apply unified diff for {}", ext_path.display()) })?; - - if let Some(dest) = move_path { - current.remove(src_path); - current.insert(dest.clone(), new_content); - - // Update origin mapping for the move. - if let Some(origin) = origin_by_current_path.remove(src_path) { - current_path_by_origin.insert(origin.clone(), dest.clone()); - origin_by_current_path.insert(dest.clone(), origin); - } else { - // If we did not know this path yet, seed origin as src. - current_path_by_origin.insert(src_path.clone(), dest.clone()); - origin_by_current_path.insert(dest.clone(), src_path.clone()); + let file_path = repo_dir.join(&internal); + if let Some(parent) = file_path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).ok(); } - } else { - current.insert(src_path.clone(), new_content); + } + fs::write(&file_path, &new_content) + .with_context(|| format!("failed to write {}", file_path.display()))?; + current_contents.insert(internal.clone(), new_content); + + if let Some(dest_path) = move_path { + // Update current external mapping for this internal id to the new external path. + self.internal_to_current_external + .insert(internal.clone(), dest_path.clone()); + // Also update forward file_mapping: external current -> internal name. + self.file_mapping.remove(ext_path); + self.file_mapping + .insert(dest_path.clone(), internal.clone()); } } } } } - // Compute aggregated unified diff across all origins we've seen. - let mut all_origins: HashSet = self.original_file_copies.keys().cloned().collect(); - // Include any paths that were added but had no original snapshot (should already be present). - for p in current.keys() { - all_origins.insert(p.clone()); + // Generate unified diff with git and rewrite internal paths to external paths. + let raw = run_git_capture(&repo_dir, &["diff", "--no-color", "HEAD"])?; + let rewritten = self.rewrite_diff_paths(&raw); + self.unified_diff = if rewritten.trim().is_empty() { + None + } else { + Some(rewritten) + }; + Ok(()) + } + + fn repo_dir(&self) -> Result<&Path> { + self.temp_git_repo + .as_ref() + .map(|d| d.path()) + .ok_or_else(|| anyhow::anyhow!("temp git repo not initialized")) + } + + fn ensure_repo_init(&mut self) -> Result<()> { + if self.temp_git_repo.is_some() { + return Ok(()); } + let tmp = TempDir::new().context("create temp git dir")?; + // Initialize git repo. + run_git(tmp.path(), &["init"])?; + // Configure identity to allow commits. + let _ = run_git(tmp.path(), &["config", "user.email", "codex@example.com"]); + let _ = run_git(tmp.path(), &["config", "user.name", "Codex"]); + // Create an initial empty commit. + run_git(tmp.path(), &["commit", "--allow-empty", "-m", "init"])?; + let id = run_git_capture(tmp.path(), &["rev-parse", "HEAD"])?; + self.baseline_commit = Some(id.trim().to_string()); + self.temp_git_repo = Some(tmp); + Ok(()) + } - let mut combined = String::new(); - for origin in all_origins { - let old = self.read_snapshot_str(&origin).unwrap_or_default(); - let new_path = current_path_by_origin - .get(&origin) - .cloned() - .unwrap_or(origin.clone()); - let new = current.get(&new_path).cloned().unwrap_or_default(); - - if old == new { - continue; + /// Rewrites the internal repo filenames to external paths in diff headers. + fn rewrite_diff_paths(&self, diff: &str) -> String { + let mut out = String::new(); + for line in diff.lines() { + if let Some(rest) = line.strip_prefix("diff --git ") { + // Format: diff --git a/ b/ + let parts: Vec<&str> = rest.split_whitespace().collect(); + if parts.len() == 2 { + let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]); + let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]); + let (a_ext, b_ext) = ( + self.internal_to_baseline_external + .get(a) + .cloned() + .unwrap_or_else(|| PathBuf::from(a)), + self.internal_to_current_external + .get(b) + .cloned() + .unwrap_or_else(|| PathBuf::from(b)), + ); + out.push_str(&format!( + "diff --git a/{} b/{}\n", + a_ext.display(), + b_ext.display() + )); + continue; + } } - - let diff = TextDiff::from_lines(&old, &new).unified_diff().to_string(); - let diff = with_paths_in_headers(&diff, &origin, &new_path); - if !combined.is_empty() { - combined.push('\n'); + if let Some(rest) = line.strip_prefix("--- ") { + if let Some(path) = rest.strip_prefix("a/") { + let external = self + .internal_to_baseline_external + .get(path) + .cloned() + .unwrap_or_else(|| PathBuf::from(path)); + out.push_str(&format!("--- {}\n", external.display())); + continue; + } } - combined.push_str(&diff); + if let Some(rest) = line.strip_prefix("+++ ") { + if let Some(path) = rest.strip_prefix("b/") { + let external = self + .internal_to_current_external + .get(path) + .cloned() + .unwrap_or_else(|| PathBuf::from(path)); + out.push_str(&format!("+++ {}\n", external.display())); + continue; + } + } + out.push_str(line); + out.push('\n'); } + out + } +} - self.unified_diff = if combined.is_empty() { - None - } else { - Some(combined) - }; - Ok(()) +fn uuid_filename_for(path: &Path) -> String { + let id = Uuid::new_v4().to_string(); + match path.extension().and_then(|e| e.to_str()) { + Some(ext) if !ext.is_empty() => format!("{}.{}", id, ext), + _ => id, } +} - fn read_snapshot_str(&self, path: &Path) -> Result { - if let Some(tmp) = self.original_file_copies.get(path).map(|t| t.reopen()) { - let mut file = tmp.context("reopen temp file")?; - file.rewind().ok(); - let mut s = String::new(); - file.read_to_string(&mut s).context("read temp file")?; - Ok(s) - } else { - Ok(String::new()) - } +fn run_git(repo: &Path, args: &[&str]) -> Result<()> { + let status = Command::new("git") + .arg("-C") + .arg(repo) + .args(args) + .status() + .with_context(|| format!("failed to run git {args:?} in {}", repo.display()))?; + if !status.success() { + anyhow::bail!("git {args:?} failed with status {:?}", status); } + Ok(()) } -fn with_paths_in_headers(diff: &str, old_path: &Path, new_path: &Path) -> String { - let mut out = String::new(); - let mut replaced = 0usize; - for line in diff.lines() { - if replaced < 1 && line.starts_with("---") { - out.push_str(&format!("--- {}\n", old_path.display())); - replaced += 1; - continue; - } - if replaced < 2 && line.starts_with("+++") { - out.push_str(&format!("+++ {}\n", new_path.display())); - replaced += 1; - continue; - } - out.push_str(line); - out.push('\n'); +fn run_git_capture(repo: &Path, args: &[&str]) -> Result { + let output = Command::new("git") + .arg("-C") + .arg(repo) + .args(args) + .output() + .with_context(|| format!("failed to run git {args:?} in {}", repo.display()))?; + if !output.status.success() { + anyhow::bail!( + "git {args:?} failed with status {:?}: {}", + output.status, + String::from_utf8_lossy(&output.stderr) + ); } - out + Ok(String::from_utf8_lossy(&output.stdout).into_owned()) } fn apply_unified_diff(base: &str, unified_diff: &str) -> Result { @@ -298,6 +402,7 @@ fn apply_unified_diff(base: &str, unified_diff: &str) -> Result { mod tests { #![allow(clippy::unwrap_used)] use super::*; + use similar::TextDiff; use tempfile::tempdir; #[test] From 38e0461af5eaa6523b0033a4cec2822547733234 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Wed, 30 Jul 2025 14:20:33 -0700 Subject: [PATCH 03/20] Cleanup --- codex-rs/core/src/patch_accumulator.rs | 215 +++++++++++-------------- 1 file changed, 90 insertions(+), 125 deletions(-) diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs index 671ea57685..46699f4d88 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/patch_accumulator.rs @@ -1,12 +1,12 @@ use std::collections::HashMap; use std::fs; -use std::io::Read; use std::path::Path; use std::path::PathBuf; use std::process::Command; use anyhow::Context; use anyhow::Result; +use anyhow::bail; use tempfile::TempDir; use uuid::Uuid; @@ -41,7 +41,7 @@ impl PatchAccumulator { self.ensure_repo_init()?; let repo_dir = self.repo_dir()?.to_path_buf(); - let mut added_any = false; + let mut staged_new_baseline = false; for path in changes.keys() { if !self.file_mapping.contains_key(path) { // Assign a stable internal filename for this external path. @@ -57,25 +57,19 @@ impl PatchAccumulator { let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; let internal_path = repo_dir.join(&internal); - if let Some(parent) = internal_path.parent() { - if !parent.as_os_str().is_empty() { - fs::create_dir_all(parent).ok(); - } - } fs::write(&internal_path, contents).with_context(|| { format!("failed to write baseline file {}", internal_path.display()) })?; run_git(&repo_dir, &["add", &internal])?; - added_any = true; + staged_new_baseline = true; } } } - // Commit baseline additions if any. - if added_any { - run_git(&repo_dir, &["commit", "-m", "baseline"])?; - // Update baseline commit id to HEAD. - let id = run_git_capture(&repo_dir, &["rev-parse", "HEAD"])?; + // If new baseline files were staged, commit them and update the baseline commit id. + if staged_new_baseline { + run_git(&repo_dir, &["commit", "-m", "Baseline snapshot"])?; + let id = run_git(&repo_dir, &["rev-parse", "HEAD"])?; self.baseline_commit = Some(id.trim().to_string()); } @@ -85,115 +79,97 @@ impl PatchAccumulator { /// Record this change set and recompute the aggregated unified diff by /// applying all change sets to the repo working tree and diffing against the baseline commit. pub fn on_patch_end(&mut self, changes: HashMap) -> Result<()> { - self.changes.push(changes); - - self.ensure_repo_init()?; let repo_dir = self.repo_dir()?.to_path_buf(); - - // Reset working tree to baseline commit state. - if let Some(commit) = &self.baseline_commit { - run_git(&repo_dir, &["reset", "--hard", commit])?; - } - - // Start with current external mapping reflective of baseline. - // Rebuild current mapping from baseline mapping first. - self.internal_to_current_external = self.internal_to_baseline_external.clone(); - - // Apply all change sets in order to working tree. - let mut current_contents: HashMap = HashMap::new(); - // Preload current content for any baseline-tracked files. - for (internal, _ext_path) in self.internal_to_current_external.clone() { - let file_path = repo_dir.join(&internal); - if file_path.exists() { - let mut s = String::new(); - fs::File::open(&file_path) - .and_then(|mut f| f.read_to_string(&mut s)) - .ok(); - current_contents.insert(internal.clone(), s); - } - } - - for change_set in &self.changes { - for (ext_path, change) in change_set { - let internal = match self.file_mapping.get(ext_path) { - Some(i) => i.clone(), - None => { - // Newly referenced path; create mapping (no baseline tracked -> add shows up as new file). - let i = uuid_filename_for(ext_path); - self.file_mapping.insert(ext_path.clone(), i.clone()); - self.internal_to_baseline_external - .insert(i.clone(), ext_path.clone()); - self.internal_to_current_external - .insert(i.clone(), ext_path.clone()); - i - } - }; - match change { - FileChange::Add { content } => { - // Create/overwrite internal file with provided content. - let file_path = repo_dir.join(&internal); - if let Some(parent) = file_path.parent() { - if !parent.as_os_str().is_empty() { - fs::create_dir_all(parent).ok(); - } + let baseline_commit = self + .baseline_commit + .as_ref() + .ok_or_else(|| anyhow::anyhow!("baseline commit missing"))?; + + // Apply only the incoming change set to the already-updated working tree. + for (ext_path, change) in &changes { + let internal = match self.file_mapping.get(ext_path) { + Some(i) => i.clone(), + None => { + // Newly referenced path; create mapping (no baseline tracked -> add shows up as new file). + let i = uuid_filename_for(ext_path); + self.file_mapping.insert(ext_path.clone(), i.clone()); + self.internal_to_baseline_external + .insert(i.clone(), ext_path.clone()); + self.internal_to_current_external + .insert(i.clone(), ext_path.clone()); + i + } + }; + match change { + FileChange::Add { content } => { + // Create/overwrite internal file with provided content. + let file_path = repo_dir.join(&internal); + if let Some(parent) = file_path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).ok(); } - fs::write(&file_path, content) - .with_context(|| format!("failed to write {}", file_path.display()))?; - current_contents.insert(internal.clone(), content.clone()); - // Update current external path mapping (no move, but ensure it's present) - self.internal_to_current_external - .insert(internal.clone(), ext_path.clone()); } - FileChange::Delete => { - let file_path = repo_dir.join(&internal); - if file_path.exists() { - let _ = fs::remove_file(&file_path); - } - current_contents.remove(&internal); - // Keep current mapping entry as-is; diff will show deletion. + fs::write(&file_path, content) + .with_context(|| format!("failed to write {}", file_path.display()))?; + // Ensure current external path mapping is present + self.internal_to_current_external + .insert(internal.clone(), ext_path.clone()); + // Stage the new/modified file so it shows up in the diff against HEAD. + run_git(&repo_dir, &["add", &internal])?; + } + FileChange::Delete => { + let file_path = repo_dir.join(&internal); + if file_path.exists() { + let _ = fs::remove_file(&file_path); } - FileChange::Update { - unified_diff, - move_path, - } => { - // Apply unified diff to the current contents of internal file. - let base = current_contents.get(&internal).cloned().unwrap_or_default(); - let new_content = - apply_unified_diff(&base, unified_diff).with_context(|| { - format!("apply unified diff for {}", ext_path.display()) - })?; - let file_path = repo_dir.join(&internal); - if let Some(parent) = file_path.parent() { - if !parent.as_os_str().is_empty() { - fs::create_dir_all(parent).ok(); - } - } - fs::write(&file_path, &new_content) - .with_context(|| format!("failed to write {}", file_path.display()))?; - current_contents.insert(internal.clone(), new_content); - - if let Some(dest_path) = move_path { - // Update current external mapping for this internal id to the new external path. - self.internal_to_current_external - .insert(internal.clone(), dest_path.clone()); - // Also update forward file_mapping: external current -> internal name. - self.file_mapping.remove(ext_path); - self.file_mapping - .insert(dest_path.clone(), internal.clone()); + // Keep current mapping entry as-is; diff will show deletion. + } + FileChange::Update { + unified_diff, + move_path, + } => { + // Apply unified diff to the current contents of internal file. + let file_path = repo_dir.join(&internal); + let base = fs::read_to_string(&file_path).unwrap_or_default(); + let new_content = + apply_unified_diff(&base, unified_diff).with_context(|| { + format!("apply unified diff for {}", ext_path.display()) + })?; + if let Some(parent) = file_path.parent() { + if !parent.as_os_str().is_empty() { + fs::create_dir_all(parent).ok(); } } + fs::write(&file_path, &new_content) + .with_context(|| format!("failed to write {}", file_path.display()))?; + // Stage the updated file so it shows up in the diff against HEAD. + run_git(&repo_dir, &["add", &internal])?; + + if let Some(dest_path) = move_path { + // Update current external mapping for this internal id to the new external path. + self.internal_to_current_external + .insert(internal.clone(), dest_path.clone()); + // Also update forward file_mapping: external current -> internal name. + self.file_mapping.remove(ext_path); + self.file_mapping + .insert(dest_path.clone(), internal.clone()); + } } } } // Generate unified diff with git and rewrite internal paths to external paths. - let raw = run_git_capture(&repo_dir, &["diff", "--no-color", "HEAD"])?; + let raw = run_git(&repo_dir, &["diff", "--no-color", baseline_commit])?; let rewritten = self.rewrite_diff_paths(&raw); self.unified_diff = if rewritten.trim().is_empty() { None } else { Some(rewritten) }; + + // Record this change set for history after applying. + self.changes.push(changes); + Ok(()) } @@ -212,11 +188,14 @@ impl PatchAccumulator { // Initialize git repo. run_git(tmp.path(), &["init"])?; // Configure identity to allow commits. - let _ = run_git(tmp.path(), &["config", "user.email", "codex@example.com"]); + let _ = run_git(tmp.path(), &["config", "user.email", "codex@openai.com"]); let _ = run_git(tmp.path(), &["config", "user.name", "Codex"]); // Create an initial empty commit. - run_git(tmp.path(), &["commit", "--allow-empty", "-m", "init"])?; - let id = run_git_capture(tmp.path(), &["rev-parse", "HEAD"])?; + run_git( + tmp.path(), + &["commit", "--allow-empty", "-m", "Initial commit"], + )?; + let id = run_git(tmp.path(), &["rev-parse", "HEAD"])?; self.baseline_commit = Some(id.trim().to_string()); self.temp_git_repo = Some(tmp); Ok(()) @@ -282,28 +261,14 @@ impl PatchAccumulator { fn uuid_filename_for(path: &Path) -> String { let id = Uuid::new_v4().to_string(); match path.extension().and_then(|e| e.to_str()) { - Some(ext) if !ext.is_empty() => format!("{}.{}", id, ext), + Some(ext) if !ext.is_empty() => format!("{id}.{ext}"), _ => id, } } -fn run_git(repo: &Path, args: &[&str]) -> Result<()> { - let status = Command::new("git") - .arg("-C") - .arg(repo) - .args(args) - .status() - .with_context(|| format!("failed to run git {args:?} in {}", repo.display()))?; - if !status.success() { - anyhow::bail!("git {args:?} failed with status {:?}", status); - } - Ok(()) -} - -fn run_git_capture(repo: &Path, args: &[&str]) -> Result { +fn run_git(repo: &Path, args: &[&str]) -> Result { let output = Command::new("git") - .arg("-C") - .arg(repo) + .current_dir(repo) .args(args) .output() .with_context(|| format!("failed to run git {args:?} in {}", repo.display()))?; From 756708a671192948738e7de51e52e64e204f065d Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Wed, 30 Jul 2025 21:31:11 -0700 Subject: [PATCH 04/20] Emit TurnDiff --- codex-rs/core/src/apply_patch.rs | 19 ++++++++++++ codex-rs/core/src/codex.rs | 31 ++++++++++++++++--- codex-rs/core/src/patch_accumulator.rs | 7 +++-- codex-rs/core/src/protocol.rs | 7 +++++ .../src/event_processor_with_human_output.rs | 5 +++ codex-rs/mcp-server/src/codex_tool_runner.rs | 1 + codex-rs/mcp-server/src/mcp_protocol.rs | 2 ++ 7 files changed, 64 insertions(+), 8 deletions(-) diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 6aa12a0a3f..a4daf5ff5a 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -1,12 +1,14 @@ use crate::codex::Session; use crate::models::FunctionCallOutputPayload; use crate::models::ResponseInputItem; +use crate::patch_accumulator::PatchAccumulator; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::FileChange; use crate::protocol::PatchApplyBeginEvent; use crate::protocol::PatchApplyEndEvent; use crate::protocol::ReviewDecision; +use crate::protocol::TurnDiffEvent; use crate::safety::SafetyCheck; use crate::safety::assess_patch_safety; use anyhow::Context; @@ -42,6 +44,7 @@ impl From for InternalApplyPatchInvocation { pub(crate) async fn apply_patch( sess: &Session, + patch_accumulator: &mut PatchAccumulator, sub_id: &str, call_id: &str, action: ApplyPatchAction, @@ -148,6 +151,8 @@ pub(crate) async fn apply_patch( }) .await; + let _ = patch_accumulator.on_patch_begin(&convert_apply_patch_to_protocol(&action)); + let mut stdout = Vec::new(); let mut stderr = Vec::new(); // Enforce writable roots. If a write is blocked, collect offending root @@ -242,6 +247,20 @@ pub(crate) async fn apply_patch( }) .await; + let _ = patch_accumulator.on_patch_end(convert_apply_patch_to_protocol(&action)); + + if let Some(unified_diff) = &patch_accumulator.unified_diff { + let _ = sess + .tx_event + .send(Event { + id: sub_id.to_owned(), + msg: EventMsg::TurnDiff(TurnDiffEvent { + unified_diff: unified_diff.clone(), + }), + }) + .await; + } + let item = match result { Ok(_) => ResponseInputItem::FunctionCallOutput { call_id: call_id.to_owned(), diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index cd92739c72..687b900337 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -59,6 +59,7 @@ use crate::models::ReasoningItemReasoningSummary; use crate::models::ResponseInputItem; use crate::models::ResponseItem; use crate::models::ShellToolCallParams; +use crate::patch_accumulator::PatchAccumulator; use crate::plan_tool::handle_update_plan; use crate::project_doc::get_user_instructions; use crate::protocol::AgentMessageDeltaEvent; @@ -1074,10 +1075,11 @@ async fn run_turn( extra_tools, base_instructions_override: sess.base_instructions.clone(), }; + let mut patch_accumulator = PatchAccumulator::new(); let mut retries = 0; loop { - match try_run_turn(sess, &sub_id, &prompt).await { + match try_run_turn(sess, &sub_id, &prompt, &mut patch_accumulator).await { Ok(output) => return Ok(output), Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), @@ -1125,6 +1127,7 @@ async fn try_run_turn( sess: &Session, sub_id: &str, prompt: &Prompt, + patch_accumulator: &mut PatchAccumulator, ) -> CodexResult> { // call_ids that are part of this response. let completed_call_ids = prompt @@ -1210,7 +1213,8 @@ async fn try_run_turn( match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { - let response = handle_response_item(sess, sub_id, item.clone()).await?; + let response = + handle_response_item(sess, patch_accumulator, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } @@ -1250,6 +1254,7 @@ async fn try_run_turn( async fn handle_response_item( sess: &Session, + patch_accumulator: &mut PatchAccumulator, sub_id: &str, item: ResponseItem, ) -> CodexResult> { @@ -1287,7 +1292,17 @@ async fn handle_response_item( .. } => { info!("FunctionCall: {arguments}"); - Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await) + Some( + handle_function_call( + sess, + patch_accumulator, + sub_id.to_string(), + name, + arguments, + call_id, + ) + .await, + ) } ResponseItem::LocalShellCall { id, @@ -1322,6 +1337,7 @@ async fn handle_response_item( handle_container_exec_with_params( exec_params, sess, + patch_accumulator, sub_id.to_string(), effective_call_id, ) @@ -1339,6 +1355,7 @@ async fn handle_response_item( async fn handle_function_call( sess: &Session, + patch_accumulator: &mut PatchAccumulator, sub_id: String, name: String, arguments: String, @@ -1352,7 +1369,8 @@ async fn handle_function_call( return *output; } }; - handle_container_exec_with_params(params, sess, sub_id, call_id).await + handle_container_exec_with_params(params, sess, patch_accumulator, sub_id, call_id) + .await } "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, _ => { @@ -1426,6 +1444,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams async fn handle_container_exec_with_params( params: ExecParams, sess: &Session, + patch_accumulator: &mut PatchAccumulator, sub_id: String, call_id: String, ) -> ResponseInputItem { @@ -1433,7 +1452,9 @@ async fn handle_container_exec_with_params( let apply_patch_action_for_exec = match maybe_parse_apply_patch_verified(¶ms.command, ¶ms.cwd) { MaybeApplyPatchVerified::Body(changes) => { - match apply_patch::apply_patch(sess, &sub_id, &call_id, changes).await { + match apply_patch::apply_patch(sess, patch_accumulator, &sub_id, &call_id, changes) + .await + { InternalApplyPatchInvocation::Output(item) => return item, InternalApplyPatchInvocation::DelegateToExec(action) => Some(action), } diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs index 46699f4d88..475ebc3eb9 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/patch_accumulator.rs @@ -6,7 +6,6 @@ use std::process::Command; use anyhow::Context; use anyhow::Result; -use anyhow::bail; use tempfile::TempDir; use uuid::Uuid; @@ -24,8 +23,8 @@ pub struct PatchAccumulator { /// Internal filename -> external path as of baseline commit. internal_to_baseline_external: HashMap, /// Internal filename -> external path as of current accumulated state (after applying all changes). + /// This is where renames are tracked. internal_to_current_external: HashMap, - /// All change sets in the order they occurred. changes: Vec>, /// Aggregated unified diff for all accumulated changes across files. pub unified_diff: Option, @@ -36,7 +35,7 @@ impl PatchAccumulator { Self::default() } - /// Ensure we have an initialized repository and a baseline snapshot of any new files. + /// Front-run apply patch calls to track the starting contents of any modified files. pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { self.ensure_repo_init()?; let repo_dir = self.repo_dir()?.to_path_buf(); @@ -60,6 +59,7 @@ impl PatchAccumulator { fs::write(&internal_path, contents).with_context(|| { format!("failed to write baseline file {}", internal_path.display()) })?; + // Stage the starting contents of the file to be included in the baseline snapshot. run_git(&repo_dir, &["add", &internal])?; staged_new_baseline = true; } @@ -67,6 +67,7 @@ impl PatchAccumulator { } // If new baseline files were staged, commit them and update the baseline commit id. + // Only the original file contents are staged so the baseline will always contain only the starting contents of each file. if staged_new_baseline { run_git(&repo_dir, &["commit", "-m", "Baseline snapshot"])?; let id = run_git(&repo_dir, &["rev-parse", "HEAD"])?; diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index dc800b5e6b..4440202c0d 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -334,6 +334,8 @@ pub enum EventMsg { /// Notification that a patch application has finished. PatchApplyEnd(PatchApplyEndEvent), + TurnDiff(TurnDiffEvent), + /// Response to GetHistoryEntryRequest. GetHistoryEntryResponse(GetHistoryEntryResponseEvent), @@ -527,6 +529,11 @@ pub struct PatchApplyEndEvent { pub changes: HashMap, } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct TurnDiffEvent { + pub unified_diff: String, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct GetHistoryEntryResponseEvent { pub offset: usize, diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index ade2ad4f48..cb4e8d9f66 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TokenUsage; +use codex_core::protocol::TurnDiffEvent; use owo_colors::OwoColorize; use owo_colors::Style; use shlex::try_join; @@ -432,6 +433,10 @@ impl EventProcessor for EventProcessorWithHumanOutput { println!("{}", line.style(self.dimmed)); } } + EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => { + ts_println!(self, "{}", "turn diff:".style(self.magenta)); + println!("{unified_diff}"); + } EventMsg::ExecApprovalRequest(_) => { // Should we exit? } diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index f25659b284..4943b53ee3 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -262,6 +262,7 @@ async fn run_codex_tool_session_inner( | EventMsg::BackgroundEvent(_) | EventMsg::PatchApplyBegin(_) | EventMsg::PatchApplyEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::GetHistoryEntryResponse(_) | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete => { diff --git a/codex-rs/mcp-server/src/mcp_protocol.rs b/codex-rs/mcp-server/src/mcp_protocol.rs index e507376c16..b26787c2bc 100644 --- a/codex-rs/mcp-server/src/mcp_protocol.rs +++ b/codex-rs/mcp-server/src/mcp_protocol.rs @@ -247,6 +247,7 @@ pub enum ClientNotification { #[allow(clippy::expect_used)] #[allow(clippy::unwrap_used)] mod tests { + use std::collections::HashMap; use std::path::PathBuf; use super::*; @@ -903,6 +904,7 @@ mod tests { stdout: "ok".into(), stderr: "".into(), success: true, + changes: HashMap::new(), }), }; From b249e5098f82d3525c3655a8171025d4062e6160 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Wed, 30 Jul 2025 22:52:31 -0700 Subject: [PATCH 05/20] Tests pass --- codex-rs/Cargo.lock | 1 - codex-rs/core/Cargo.toml | 1 - codex-rs/core/src/patch_accumulator.rs | 110 +++++++++++++++++++------ 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 88d464100d..120050c227 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -698,7 +698,6 @@ dependencies = [ "serde_json", "sha1", "shlex", - "similar", "strum_macros 0.27.2", "tempfile", "thiserror 2.0.12", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 7f57746348..67d338c6c2 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -52,7 +52,6 @@ uuid = { version = "1", features = ["serde", "v4"] } whoami = "1.6.0" wildmatch = "2.4.0" tempfile = "3" -similar = "2" [target.'cfg(target_os = "linux")'.dependencies] diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs index 475ebc3eb9..8d8eb3dd17 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/patch_accumulator.rs @@ -11,7 +11,12 @@ use uuid::Uuid; use crate::protocol::FileChange; -/// Accumulates multiple change sets and exposes the overall unified diff. +/// Tracks sets of changes to files and exposes the overall unified diff. +/// Internally, the way this works is: +/// 1. Start by creating an empty git repo in a temp directory. +/// 2. Front-run apply patch calls to add the initial state of any changed/created/deleted files to the temp repo. +/// 3. Files are added to the temp repo in a flat structure named by a uuid and the same extension and a mapping is created to go back and forth. +/// 4. A baseline snapshot sha is created and updated any time a new file is added to the repo to ensure that there is a starting state for the diff. #[derive(Default)] pub struct PatchAccumulator { /// Temporary git repository for building accumulated diffs. @@ -40,7 +45,7 @@ impl PatchAccumulator { self.ensure_repo_init()?; let repo_dir = self.repo_dir()?.to_path_buf(); - let mut staged_new_baseline = false; + let mut baseline_paths: Vec = Vec::new(); for path in changes.keys() { if !self.file_mapping.contains_key(path) { // Assign a stable internal filename for this external path. @@ -52,6 +57,7 @@ impl PatchAccumulator { .insert(internal.clone(), path.clone()); // If the file exists on disk, copy its contents into the repo and stage it. + // If it doesn't exist, it's a new file and it will get added in on_patch_end. if path.exists() { let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; @@ -61,15 +67,21 @@ impl PatchAccumulator { })?; // Stage the starting contents of the file to be included in the baseline snapshot. run_git(&repo_dir, &["add", &internal])?; - staged_new_baseline = true; + baseline_paths.push(internal.clone()); } } } // If new baseline files were staged, commit them and update the baseline commit id. // Only the original file contents are staged so the baseline will always contain only the starting contents of each file. - if staged_new_baseline { - run_git(&repo_dir, &["commit", "-m", "Baseline snapshot"])?; + if !baseline_paths.is_empty() { + // Commit only the newly staged baseline files to avoid sweeping other staged changes into the baseline. + let mut args: Vec<&str> = vec!["commit", "-m", "Baseline snapshot", "--"]; + let baseline_refs: Vec = baseline_paths; + for p in &baseline_refs { + args.push(p.as_str()); + } + run_git(&repo_dir, &args)?; let id = run_git(&repo_dir, &["rev-parse", "HEAD"])?; self.baseline_commit = Some(id.trim().to_string()); } @@ -88,7 +100,7 @@ impl PatchAccumulator { // Apply only the incoming change set to the already-updated working tree. for (ext_path, change) in &changes { - let internal = match self.file_mapping.get(ext_path) { + let uuid_filename = match self.file_mapping.get(ext_path) { Some(i) => i.clone(), None => { // Newly referenced path; create mapping (no baseline tracked -> add shows up as new file). @@ -104,7 +116,7 @@ impl PatchAccumulator { match change { FileChange::Add { content } => { // Create/overwrite internal file with provided content. - let file_path = repo_dir.join(&internal); + let file_path = repo_dir.join(&uuid_filename); if let Some(parent) = file_path.parent() { if !parent.as_os_str().is_empty() { fs::create_dir_all(parent).ok(); @@ -114,12 +126,10 @@ impl PatchAccumulator { .with_context(|| format!("failed to write {}", file_path.display()))?; // Ensure current external path mapping is present self.internal_to_current_external - .insert(internal.clone(), ext_path.clone()); - // Stage the new/modified file so it shows up in the diff against HEAD. - run_git(&repo_dir, &["add", &internal])?; + .insert(uuid_filename.clone(), ext_path.clone()); } FileChange::Delete => { - let file_path = repo_dir.join(&internal); + let file_path = repo_dir.join(&uuid_filename); if file_path.exists() { let _ = fs::remove_file(&file_path); } @@ -130,7 +140,7 @@ impl PatchAccumulator { move_path, } => { // Apply unified diff to the current contents of internal file. - let file_path = repo_dir.join(&internal); + let file_path = repo_dir.join(&uuid_filename); let base = fs::read_to_string(&file_path).unwrap_or_default(); let new_content = apply_unified_diff(&base, unified_diff).with_context(|| { @@ -143,24 +153,28 @@ impl PatchAccumulator { } fs::write(&file_path, &new_content) .with_context(|| format!("failed to write {}", file_path.display()))?; - // Stage the updated file so it shows up in the diff against HEAD. - run_git(&repo_dir, &["add", &internal])?; if let Some(dest_path) = move_path { // Update current external mapping for this internal id to the new external path. self.internal_to_current_external - .insert(internal.clone(), dest_path.clone()); + .insert(uuid_filename.clone(), dest_path.clone()); // Also update forward file_mapping: external current -> internal name. self.file_mapping.remove(ext_path); self.file_mapping - .insert(dest_path.clone(), internal.clone()); + .insert(dest_path.clone(), uuid_filename.clone()); } } } } // Generate unified diff with git and rewrite internal paths to external paths. - let raw = run_git(&repo_dir, &["diff", "--no-color", baseline_commit])?; + // Stage all changes so new files and deletions are included in the diff against the baseline. + let _ = run_git(&repo_dir, &["add", "-A"])?; + // Diff baseline commit against the index to capture all staged changes. + let raw = run_git( + &repo_dir, + &["diff", "--no-color", "--cached", baseline_commit], + )?; let rewritten = self.rewrite_diff_paths(&raw); self.unified_diff = if rewritten.trim().is_empty() { None @@ -368,7 +382,6 @@ fn apply_unified_diff(base: &str, unified_diff: &str) -> Result { mod tests { #![allow(clippy::unwrap_used)] use super::*; - use similar::TextDiff; use tempfile::tempdir; #[test] @@ -391,13 +404,15 @@ mod tests { assert!(first.contains("+foo")); // Second patch: update - let old = "foo\n"; - let new = "foo\nbar\n"; - let diff = TextDiff::from_lines(old, new).unified_diff().to_string(); let update_changes = HashMap::from([( file.clone(), FileChange::Update { - unified_diff: diff, + unified_diff: r#"--- ++++ +@@ -1,1 +1,2 @@ + foo ++bar"# + .to_owned(), move_path: None, }, )]); @@ -428,15 +443,16 @@ mod tests { let dest = dir.path().join("dst.txt"); fs::write(&src, "line\n").unwrap(); - let old = "line\n"; - let new = "line2\n"; - let diff = TextDiff::from_lines(old, new).unified_diff().to_string(); - let mut acc = PatchAccumulator::new(); let mv_changes = HashMap::from([( src.clone(), FileChange::Update { - unified_diff: diff, + unified_diff: r#"--- +++++ +@@ -1,1 +1,2 @@ +-line ++line2"# + .to_owned(), move_path: Some(dest.clone()), }, )]); @@ -446,4 +462,44 @@ mod tests { assert!(out.contains("-line")); assert!(out.contains("+line2")); } + + #[test] + fn update_persists_across_new_baseline_for_new_file() { + let dir = tempdir().unwrap(); + let a = dir.path().join("a.txt"); + let b = dir.path().join("b.txt"); + fs::write(&a, "foo\n").unwrap(); + fs::write(&b, "z\n").unwrap(); + + let mut acc = PatchAccumulator::new(); + + // First: update existing a.txt (stages the update in index) + let update_a = HashMap::from([( + a.clone(), + FileChange::Update { + unified_diff: r#"--- ++++ +@@ -1,1 +1,2 @@ + foo ++bar"# + .to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_a).unwrap(); + acc.on_patch_end(update_a).unwrap(); + let first = acc.unified_diff.clone().unwrap(); + assert!(first.contains("+bar")); + + // Next: introduce a brand-new path b.txt that exists on disk to trigger a new baseline commit. + let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_b).unwrap(); + acc.on_patch_end(del_b).unwrap(); + + let combined = acc.unified_diff.clone().unwrap(); + // The combined diff must still include the update to a.txt, even after committing a new baseline for b.txt. + assert!(combined.contains("+bar")); + // And also reflect the deletion of b.txt. + assert!(combined.contains("-z")); + } } From eeaf2ca0b61d715d90201a8497453a01453fd225 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Thu, 31 Jul 2025 14:58:28 -0700 Subject: [PATCH 06/20] Updated for non-git --- codex-rs/core/src/apply_patch.rs | 2 +- codex-rs/core/src/patch_accumulator.rs | 542 ++++++++++++------------- 2 files changed, 257 insertions(+), 287 deletions(-) diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index a4daf5ff5a..218c757316 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -247,7 +247,7 @@ pub(crate) async fn apply_patch( }) .await; - let _ = patch_accumulator.on_patch_end(convert_apply_patch_to_protocol(&action)); + let _ = patch_accumulator.update_unified_diff(); if let Some(unified_diff) = &patch_accumulator.unified_diff { let _ = sess diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/patch_accumulator.rs index 8d8eb3dd17..1cbcdd4594 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/patch_accumulator.rs @@ -12,25 +12,24 @@ use uuid::Uuid; use crate::protocol::FileChange; /// Tracks sets of changes to files and exposes the overall unified diff. -/// Internally, the way this works is: -/// 1. Start by creating an empty git repo in a temp directory. -/// 2. Front-run apply patch calls to add the initial state of any changed/created/deleted files to the temp repo. -/// 3. Files are added to the temp repo in a flat structure named by a uuid and the same extension and a mapping is created to go back and forth. -/// 4. A baseline snapshot sha is created and updated any time a new file is added to the repo to ensure that there is a starting state for the diff. +/// Internally, the way this works is now: +/// 1. Create a temp directory to store baseline snapshots of files when they are first seen. +/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk. +/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null). +/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs. +/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using +/// `git diff --no-index` and rewrite paths to external paths. #[derive(Default)] pub struct PatchAccumulator { - /// Temporary git repository for building accumulated diffs. - temp_git_repo: Option, - /// Baseline commit that includes snapshots of all files seen so far. - baseline_commit: Option, + /// Temp directory holding baseline snapshots of files as first seen. + baseline_files_dir: Option, /// Map external path -> internal filename (uuid + same extension). - file_mapping: HashMap, - /// Internal filename -> external path as of baseline commit. - internal_to_baseline_external: HashMap, + external_to_temp_name: HashMap, + /// Internal filename -> external path as of baseline snapshot. + temp_name_to_baseline_external: HashMap, /// Internal filename -> external path as of current accumulated state (after applying all changes). /// This is where renames are tracked. - internal_to_current_external: HashMap, - changes: Vec>, + temp_name_to_current_external: HashMap, /// Aggregated unified diff for all accumulated changes across files. pub unified_diff: Option, } @@ -41,182 +40,189 @@ impl PatchAccumulator { } /// Front-run apply patch calls to track the starting contents of any modified files. + /// - Creates a baseline snapshot for files that already exist on disk when first seen. + /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions. + /// - Also updates internal mappings for move/rename events. pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { - self.ensure_repo_init()?; - let repo_dir = self.repo_dir()?.to_path_buf(); + self.ensure_baseline_dir()?; + let baseline_dir = self.baseline_dir()?.to_path_buf(); - let mut baseline_paths: Vec = Vec::new(); - for path in changes.keys() { - if !self.file_mapping.contains_key(path) { - // Assign a stable internal filename for this external path. + for (path, change) in changes.iter() { + // Ensure a stable internal filename exists for this external path. + if !self.external_to_temp_name.contains_key(path) { let internal = uuid_filename_for(path); - self.file_mapping.insert(path.clone(), internal.clone()); - self.internal_to_baseline_external + self.external_to_temp_name + .insert(path.clone(), internal.clone()); + self.temp_name_to_baseline_external .insert(internal.clone(), path.clone()); - self.internal_to_current_external + self.temp_name_to_current_external .insert(internal.clone(), path.clone()); - // If the file exists on disk, copy its contents into the repo and stage it. - // If it doesn't exist, it's a new file and it will get added in on_patch_end. + // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. if path.exists() { let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; - let internal_path = repo_dir.join(&internal); + let internal_path = baseline_dir.join(&internal); fs::write(&internal_path, contents).with_context(|| { format!("failed to write baseline file {}", internal_path.display()) })?; - // Stage the starting contents of the file to be included in the baseline snapshot. - run_git(&repo_dir, &["add", &internal])?; - baseline_paths.push(internal.clone()); } } - } - // If new baseline files were staged, commit them and update the baseline commit id. - // Only the original file contents are staged so the baseline will always contain only the starting contents of each file. - if !baseline_paths.is_empty() { - // Commit only the newly staged baseline files to avoid sweeping other staged changes into the baseline. - let mut args: Vec<&str> = vec!["commit", "-m", "Baseline snapshot", "--"]; - let baseline_refs: Vec = baseline_paths; - for p in &baseline_refs { - args.push(p.as_str()); + // Track rename/move in current mapping if provided in an Update. + let move_path = match change { + FileChange::Update { + move_path: Some(dest), + .. + } => Some(dest), + _ => None, + }; + if let Some(dest) = move_path { + let uuid_filename = match self.external_to_temp_name.get(path) { + Some(i) => i.clone(), + None => { + // This should be rare, but if we haven't mapped the source, create it with no baseline. + let i = uuid_filename_for(path); + self.external_to_temp_name.insert(path.clone(), i.clone()); + self.temp_name_to_baseline_external + .insert(i.clone(), path.clone()); + i + } + }; + // Update current external mapping for temp file name. + self.temp_name_to_current_external + .insert(uuid_filename.clone(), dest.clone()); + // Update forward file_mapping: external current -> internal name. + self.external_to_temp_name.remove(path); + self.external_to_temp_name + .insert(dest.clone(), uuid_filename); } - run_git(&repo_dir, &args)?; - let id = run_git(&repo_dir, &["rev-parse", "HEAD"])?; - self.baseline_commit = Some(id.trim().to_string()); } Ok(()) } - /// Record this change set and recompute the aggregated unified diff by - /// applying all change sets to the repo working tree and diffing against the baseline commit. - pub fn on_patch_end(&mut self, changes: HashMap) -> Result<()> { - let repo_dir = self.repo_dir()?.to_path_buf(); - let baseline_commit = self - .baseline_commit - .as_ref() - .ok_or_else(|| anyhow::anyhow!("baseline commit missing"))?; - - // Apply only the incoming change set to the already-updated working tree. - for (ext_path, change) in &changes { - let uuid_filename = match self.file_mapping.get(ext_path) { - Some(i) => i.clone(), - None => { - // Newly referenced path; create mapping (no baseline tracked -> add shows up as new file). - let i = uuid_filename_for(ext_path); - self.file_mapping.insert(ext_path.clone(), i.clone()); - self.internal_to_baseline_external - .insert(i.clone(), ext_path.clone()); - self.internal_to_current_external - .insert(i.clone(), ext_path.clone()); - i - } + /// Recompute the aggregated unified diff by comparing all baseline snapshots against + /// current files on disk using `git diff --no-index` and rewriting paths to external paths. + pub fn update_unified_diff(&mut self) -> Result<()> { + let baseline_dir = self.baseline_dir()?.to_path_buf(); + let current_dir = baseline_dir.join("current"); + if current_dir.exists() { + // Best-effort cleanup of previous run's mirror. + let _ = fs::remove_dir_all(¤t_dir); + } + fs::create_dir_all(¤t_dir).with_context(|| { + format!( + "failed to create current mirror dir {}", + current_dir.display() + ) + })?; + + let mut aggregated = String::new(); + + // Compute diffs per tracked internal file. + for (internal, baseline_external) in &self.temp_name_to_baseline_external { + let baseline_path = baseline_dir.join(internal); + let current_external = self + .temp_name_to_current_external + .get(internal) + .cloned() + .unwrap_or_else(|| baseline_external.clone()); + + let left_is_dev_null = !baseline_path.exists(); + let right_exists = current_external.exists(); + + // Prepare right side mirror file if exists; otherwise use /dev/null for deletions. + let right_arg = if right_exists { + let mirror_path = current_dir.join(internal); + let contents = fs::read(¤t_external).with_context(|| { + format!( + "failed to read current file for diff {}", + current_external.display() + ) + })?; + fs::write(&mirror_path, contents).with_context(|| { + format!( + "failed to write current mirror file {}", + mirror_path.display() + ) + })?; + // Use relative path from baseline_dir (so headers say a/ b/current/). + format!("current/{internal}") + } else { + // Deletion: right side is /dev/null to show proper deleted file diff. + "/dev/null".to_string() }; - match change { - FileChange::Add { content } => { - // Create/overwrite internal file with provided content. - let file_path = repo_dir.join(&uuid_filename); - if let Some(parent) = file_path.parent() { - if !parent.as_os_str().is_empty() { - fs::create_dir_all(parent).ok(); - } - } - fs::write(&file_path, content) - .with_context(|| format!("failed to write {}", file_path.display()))?; - // Ensure current external path mapping is present - self.internal_to_current_external - .insert(uuid_filename.clone(), ext_path.clone()); - } - FileChange::Delete => { - let file_path = repo_dir.join(&uuid_filename); - if file_path.exists() { - let _ = fs::remove_file(&file_path); - } - // Keep current mapping entry as-is; diff will show deletion. - } - FileChange::Update { - unified_diff, - move_path, - } => { - // Apply unified diff to the current contents of internal file. - let file_path = repo_dir.join(&uuid_filename); - let base = fs::read_to_string(&file_path).unwrap_or_default(); - let new_content = - apply_unified_diff(&base, unified_diff).with_context(|| { - format!("apply unified diff for {}", ext_path.display()) - })?; - if let Some(parent) = file_path.parent() { - if !parent.as_os_str().is_empty() { - fs::create_dir_all(parent).ok(); - } - } - fs::write(&file_path, &new_content) - .with_context(|| format!("failed to write {}", file_path.display()))?; - - if let Some(dest_path) = move_path { - // Update current external mapping for this internal id to the new external path. - self.internal_to_current_external - .insert(uuid_filename.clone(), dest_path.clone()); - // Also update forward file_mapping: external current -> internal name. - self.file_mapping.remove(ext_path); - self.file_mapping - .insert(dest_path.clone(), uuid_filename.clone()); - } + + // Prepare left arg: baseline file path or /dev/null for additions. + let left_arg = if left_is_dev_null { + "/dev/null".to_string() + } else { + internal.clone() + }; + + // Run git diff --no-index from baseline_dir to keep paths predictable. + let raw = run_git_allow_exit_codes( + &baseline_dir, + &[ + "-c", + "color.ui=false", + "diff", + "--no-color", + "--no-index", + "--", + &left_arg, + &right_arg, + ], + &[0, 1], // 0: no changes, 1: differences + )?; + + if raw.trim().is_empty() { + continue; + } + let rewritten = self.rewrite_diff_paths(&raw); + if !rewritten.trim().is_empty() { + if !aggregated.is_empty() && !aggregated.ends_with('\n') { + aggregated.push('\n'); } + aggregated.push_str(&rewritten); } } - // Generate unified diff with git and rewrite internal paths to external paths. - // Stage all changes so new files and deletions are included in the diff against the baseline. - let _ = run_git(&repo_dir, &["add", "-A"])?; - // Diff baseline commit against the index to capture all staged changes. - let raw = run_git( - &repo_dir, - &["diff", "--no-color", "--cached", baseline_commit], - )?; - let rewritten = self.rewrite_diff_paths(&raw); - self.unified_diff = if rewritten.trim().is_empty() { + self.unified_diff = if aggregated.trim().is_empty() { None } else { - Some(rewritten) + Some(aggregated) }; - // Record this change set for history after applying. - self.changes.push(changes); + // Clean up the curent dir. + let _ = fs::remove_dir_all(¤t_dir); Ok(()) } - fn repo_dir(&self) -> Result<&Path> { - self.temp_git_repo + fn baseline_dir(&self) -> Result<&Path> { + self.baseline_files_dir .as_ref() .map(|d| d.path()) - .ok_or_else(|| anyhow::anyhow!("temp git repo not initialized")) + .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized")) } - fn ensure_repo_init(&mut self) -> Result<()> { - if self.temp_git_repo.is_some() { + fn ensure_baseline_dir(&mut self) -> Result<()> { + if self.baseline_files_dir.is_some() { return Ok(()); } - let tmp = TempDir::new().context("create temp git dir")?; - // Initialize git repo. - run_git(tmp.path(), &["init"])?; - // Configure identity to allow commits. - let _ = run_git(tmp.path(), &["config", "user.email", "codex@openai.com"]); - let _ = run_git(tmp.path(), &["config", "user.name", "Codex"]); - // Create an initial empty commit. - run_git( - tmp.path(), - &["commit", "--allow-empty", "-m", "Initial commit"], - )?; - let id = run_git(tmp.path(), &["rev-parse", "HEAD"])?; - self.baseline_commit = Some(id.trim().to_string()); - self.temp_git_repo = Some(tmp); + let tmp = TempDir::new().context("create baseline temp dir")?; + self.baseline_files_dir = Some(tmp); Ok(()) } - /// Rewrites the internal repo filenames to external paths in diff headers. + /// Rewrites the internal filenames to external paths in diff headers. + /// Handles inputs like: + /// diff --git a/ b/current/ + /// --- a/ | /dev/null + /// +++ b/current/ | /dev/null + /// and replaces uuid with the external paths tracking baseline/current. fn rewrite_diff_paths(&self, diff: &str) -> String { let mut out = String::new(); for line in diff.lines() { @@ -226,43 +232,78 @@ impl PatchAccumulator { if parts.len() == 2 { let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]); let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]); - let (a_ext, b_ext) = ( - self.internal_to_baseline_external - .get(a) + + let a_ext_display = if a == "/dev/null" { + "/dev/null".to_string() + } else { + let a_base = Path::new(a) + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(a); + let mapped = self + .temp_name_to_baseline_external + .get(a_base) .cloned() - .unwrap_or_else(|| PathBuf::from(a)), - self.internal_to_current_external - .get(b) + .unwrap_or_else(|| PathBuf::from(a)); + mapped.display().to_string() + }; + + let b_ext_display = if b == "/dev/null" { + "/dev/null".to_string() + } else { + let b_base = Path::new(b) + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(b); + let mapped = self + .temp_name_to_current_external + .get(b_base) .cloned() - .unwrap_or_else(|| PathBuf::from(b)), - ); - out.push_str(&format!( - "diff --git a/{} b/{}\n", - a_ext.display(), - b_ext.display() - )); + .unwrap_or_else(|| PathBuf::from(b)); + mapped.display().to_string() + }; + + out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n")); continue; } } if let Some(rest) = line.strip_prefix("--- ") { if let Some(path) = rest.strip_prefix("a/") { - let external = self - .internal_to_baseline_external - .get(path) - .cloned() - .unwrap_or_else(|| PathBuf::from(path)); - out.push_str(&format!("--- {}\n", external.display())); + let external_display = if path == "/dev/null" { + "/dev/null".to_string() + } else { + let p_base = Path::new(path) + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(path); + self.temp_name_to_baseline_external + .get(p_base) + .cloned() + .unwrap_or_else(|| PathBuf::from(path)) + .display() + .to_string() + }; + out.push_str(&format!("--- {external_display}\n")); continue; } } if let Some(rest) = line.strip_prefix("+++ ") { if let Some(path) = rest.strip_prefix("b/") { - let external = self - .internal_to_current_external - .get(path) - .cloned() - .unwrap_or_else(|| PathBuf::from(path)); - out.push_str(&format!("+++ {}\n", external.display())); + let external_display = if path == "/dev/null" { + "/dev/null".to_string() + } else { + let p_base = Path::new(path) + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(path); + self.temp_name_to_current_external + .get(p_base) + .cloned() + .unwrap_or_else(|| PathBuf::from(path)) + .display() + .to_string() + }; + out.push_str(&format!("+++ {external_display}\n")); continue; } } @@ -281,15 +322,21 @@ fn uuid_filename_for(path: &Path) -> String { } } -fn run_git(repo: &Path, args: &[&str]) -> Result { +fn run_git_allow_exit_codes( + repo: &Path, + args: &[&str], + allowed_exit_codes: &[i32], +) -> Result { let output = Command::new("git") .current_dir(repo) .args(args) .output() - .with_context(|| format!("failed to run git {args:?} in {}", repo.display()))?; - if !output.status.success() { + .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?; + let code = output.status.code().unwrap_or(-1); + if !allowed_exit_codes.contains(&code) { anyhow::bail!( - "git {args:?} failed with status {:?}: {}", + "git {:?} failed with status {:?}: {}", + args, output.status, String::from_utf8_lossy(&output.stderr) ); @@ -297,87 +344,6 @@ fn run_git(repo: &Path, args: &[&str]) -> Result { Ok(String::from_utf8_lossy(&output.stdout).into_owned()) } -fn apply_unified_diff(base: &str, unified_diff: &str) -> Result { - let base_lines: Vec<&str> = if base.is_empty() { - Vec::new() - } else { - base.split_inclusive('\n').collect() - }; - - let mut result: Vec = Vec::new(); - let mut pos: usize = 0; // index in base_lines - - let mut it = unified_diff.lines().peekable(); - while let Some(line) = it.next() { - if line.starts_with("---") || line.starts_with("+++") { - continue; - } - if line.starts_with("@@") { - // Parse old start index from header: "@@ -a,b +c,d @@" - let middle = if let (Some(s), Some(e)) = (line.find("@@ "), line.rfind(" @@")) { - &line[s + 3..e] - } else { - "" - }; - let old_range = middle.split_whitespace().next().unwrap_or(""); // "-a,b" - let old_start_str = old_range - .strip_prefix('-') - .unwrap_or(old_range) - .split(',') - .next() - .unwrap_or("1"); - let old_start: usize = old_start_str.parse().unwrap_or(1); - - // Append unchanged lines up to this hunk - let target = old_start.saturating_sub(1); - while pos < target && pos < base_lines.len() { - result.push(base_lines[pos].to_string()); - pos += 1; - } - - // Apply hunk body until next header or EOF - while let Some(peek) = it.peek() { - let body_line = *peek; - if body_line.starts_with("@@") - || body_line.starts_with("---") - || body_line.starts_with("+++") - { - break; - } - let _ = it.next(); - if body_line.starts_with(' ') { - if let Some(src) = base_lines.get(pos) { - result.push((*src).to_string()); - } - pos += 1; - } else if body_line.starts_with('-') { - pos += 1; - } else if body_line.starts_with('+') { - result.push(format!( - "{}\n", - body_line.strip_prefix('+').unwrap_or(body_line) - )); - } else if body_line.is_empty() { - result.push("\n".to_string()); - } else { - if let Some(src) = base_lines.get(pos) { - result.push((*src).to_string()); - } - pos += 1; - } - } - } - } - - // Append remaining - while pos < base_lines.len() { - result.push(base_lines[pos].to_string()); - pos += 1; - } - - Ok(result.concat()) -} - #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -391,7 +357,7 @@ mod tests { let mut acc = PatchAccumulator::new(); - // First patch: add file + // First patch: add file (baseline should be /dev/null). let add_changes = HashMap::from([( file.clone(), FileChange::Add { @@ -399,25 +365,27 @@ mod tests { }, )]); acc.on_patch_begin(&add_changes).unwrap(); - acc.on_patch_end(add_changes).unwrap(); + + // Simulate apply: create the file on disk. + fs::write(&file, "foo\n").unwrap(); + acc.update_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); assert!(first.contains("+foo")); + assert!(first.contains("/dev/null") || first.contains("new file")); - // Second patch: update + // Second patch: update the file on disk. let update_changes = HashMap::from([( file.clone(), FileChange::Update { - unified_diff: r#"--- -+++ -@@ -1,1 +1,2 @@ - foo -+bar"# - .to_owned(), + unified_diff: "".to_owned(), move_path: None, }, )]); acc.on_patch_begin(&update_changes).unwrap(); - acc.on_patch_end(update_changes).unwrap(); + + // Simulate apply: append a new line. + fs::write(&file, "foo\nbar\n").unwrap(); + acc.update_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); assert!(combined.contains("+bar")); } @@ -431,7 +399,10 @@ mod tests { let mut acc = PatchAccumulator::new(); let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_changes).unwrap(); - acc.on_patch_end(del_changes).unwrap(); + + // Simulate apply: delete the file from disk. + fs::remove_file(&file).unwrap(); + acc.update_unified_diff().unwrap(); let diff = acc.unified_diff.clone().unwrap(); assert!(diff.contains("-x")); } @@ -447,17 +418,17 @@ mod tests { let mv_changes = HashMap::from([( src.clone(), FileChange::Update { - unified_diff: r#"--- -++++ -@@ -1,1 +1,2 @@ --line -+line2"# - .to_owned(), + unified_diff: "".to_owned(), move_path: Some(dest.clone()), }, )]); acc.on_patch_begin(&mv_changes).unwrap(); - acc.on_patch_end(mv_changes).unwrap(); + + // Simulate apply: move and update content. + fs::rename(&src, &dest).unwrap(); + fs::write(&dest, "line2\n").unwrap(); + + acc.update_unified_diff().unwrap(); let out = acc.unified_diff.clone().unwrap(); assert!(out.contains("-line")); assert!(out.contains("+line2")); @@ -473,31 +444,30 @@ mod tests { let mut acc = PatchAccumulator::new(); - // First: update existing a.txt (stages the update in index) + // First: update existing a.txt (baseline snapshot is created for a). let update_a = HashMap::from([( a.clone(), FileChange::Update { - unified_diff: r#"--- -+++ -@@ -1,1 +1,2 @@ - foo -+bar"# - .to_owned(), + unified_diff: "".to_owned(), move_path: None, }, )]); acc.on_patch_begin(&update_a).unwrap(); - acc.on_patch_end(update_a).unwrap(); + // Simulate apply: modify a.txt on disk. + fs::write(&a, "foo\nbar\n").unwrap(); + acc.update_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); assert!(first.contains("+bar")); - // Next: introduce a brand-new path b.txt that exists on disk to trigger a new baseline commit. + // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_b).unwrap(); - acc.on_patch_end(del_b).unwrap(); + // Simulate apply: delete b.txt. + fs::remove_file(&b).unwrap(); + acc.update_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); - // The combined diff must still include the update to a.txt, even after committing a new baseline for b.txt. + // The combined diff must still include the update to a.txt. assert!(combined.contains("+bar")); // And also reflect the deletion of b.txt. assert!(combined.contains("-z")); From 50c408b9fd85c2840ce6be5a07ac2b0b605c9609 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Thu, 31 Jul 2025 15:36:29 -0700 Subject: [PATCH 07/20] Emit events. Builds --- codex-rs/core/Cargo.toml | 1 - codex-rs/core/src/apply_patch.rs | 5 - codex-rs/core/src/codex.rs | 99 ++++++++++++++----- codex-rs/core/src/lib.rs | 2 +- codex-rs/core/src/protocol.rs | 2 - ...ch_accumulator.rs => turn_diff_tracker.rs} | 31 +++--- codex-rs/mcp-server/src/mcp_protocol.rs | 2 - 7 files changed, 92 insertions(+), 50 deletions(-) rename codex-rs/core/src/{patch_accumulator.rs => turn_diff_tracker.rs} (96%) diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 67d338c6c2..8d9d314237 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -72,7 +72,6 @@ core_test_support = { path = "tests/common" } maplit = "1.0.2" predicates = "3" pretty_assertions = "1.4.1" -tempfile = "3" tokio-test = "0.4" walkdir = "2.5.0" wiremock = "0.6" diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index 2e9ff9ebff..dc11aed023 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -1,12 +1,8 @@ use crate::codex::Session; use crate::models::FunctionCallOutputPayload; use crate::models::ResponseInputItem; -use crate::patch_accumulator::PatchAccumulator; -use crate::protocol::Event; -use crate::protocol::EventMsg; use crate::protocol::FileChange; use crate::protocol::ReviewDecision; -use crate::protocol::TurnDiffEvent; use crate::safety::SafetyCheck; use crate::safety::assess_patch_safety; use codex_apply_patch::ApplyPatchAction; @@ -45,7 +41,6 @@ impl From for InternalApplyPatchInvocation { pub(crate) async fn apply_patch( sess: &Session, - patch_accumulator: &mut PatchAccumulator, sub_id: &str, call_id: &str, action: ApplyPatchAction, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 117157788f..16a06ea9a3 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -59,7 +59,6 @@ use crate::models::ReasoningItemReasoningSummary; use crate::models::ResponseInputItem; use crate::models::ResponseItem; use crate::models::ShellToolCallParams; -use crate::patch_accumulator::PatchAccumulator; use crate::plan_tool::handle_update_plan; use crate::project_doc::get_user_instructions; use crate::protocol::AgentMessageDeltaEvent; @@ -85,11 +84,13 @@ use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::Submission; use crate::protocol::TaskCompleteEvent; +use crate::protocol::TurnDiffEvent; use crate::rollout::RolloutRecorder; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_safety_for_untrusted_command; use crate::shell; +use crate::turn_diff_tracker::TurnDiffTracker; use crate::user_notification::UserNotification; use crate::util::backoff; @@ -362,7 +363,11 @@ impl Session { } } - async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) { + async fn on_exec_command_begin( + &self, + turn_diff_tracker: &mut TurnDiffTracker, + exec_command_context: ExecCommandContext, + ) { let ExecCommandContext { sub_id, call_id, @@ -374,11 +379,15 @@ impl Session { Some(ApplyPatchCommandContext { user_explicitly_approved_this_action, changes, - }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent { - call_id, - auto_approved: !user_explicitly_approved_this_action, - changes, - }), + }) => { + let _ = turn_diff_tracker.on_patch_begin(&changes); + + EventMsg::PatchApplyBegin(PatchApplyBeginEvent { + call_id, + auto_approved: !user_explicitly_approved_this_action, + changes, + }) + } None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent { call_id, command: command_for_display.clone(), @@ -392,8 +401,10 @@ impl Session { let _ = self.tx_event.send(event).await; } - async fn notify_exec_command_end( + #[allow(clippy::too_many_arguments)] + async fn on_exec_command_end( &self, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, call_id: &str, stdout: &str, @@ -428,6 +439,20 @@ impl Session { msg, }; let _ = self.tx_event.send(event).await; + + // If this is an apply_patch, after we emit the end patch, emit a second event + // with the full turn diff if there is one. + if is_apply_patch { + let unified_diff = turn_diff_tracker.update_and_get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.into(), + msg, + }; + let _ = self.tx_event.send(event).await; + } + } } /// Helper that emits a BackgroundEvent with the given message. This keeps @@ -951,6 +976,10 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { .await; let last_agent_message: Option; + // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains + // many turns, from the perspective of the user, it is a single turn. + let mut turn_diff_tracker = TurnDiffTracker::new(); + loop { // Note that pending_input would be something like a message the user // submitted through the UI while the model was running. Though the UI @@ -983,7 +1012,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { }) }) .collect(); - match run_turn(&sess, sub_id.clone(), turn_input).await { + match run_turn(&sess, &mut turn_diff_tracker, sub_id.clone(), turn_input).await { Ok(turn_output) => { let mut items_to_record_in_conversation_history = Vec::::new(); let mut responses = Vec::::new(); @@ -1109,6 +1138,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { async fn run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, input: Vec, ) -> CodexResult> { @@ -1120,11 +1150,10 @@ async fn run_turn( extra_tools, base_instructions_override: sess.base_instructions.clone(), }; - let mut patch_accumulator = PatchAccumulator::new(); let mut retries = 0; loop { - match try_run_turn(sess, &sub_id, &prompt, &mut patch_accumulator).await { + match try_run_turn(sess, turn_diff_tracker, &sub_id, &prompt).await { Ok(output) => return Ok(output), Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), @@ -1170,9 +1199,9 @@ struct ProcessedResponseItem { async fn try_run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, prompt: &Prompt, - patch_accumulator: &mut PatchAccumulator, ) -> CodexResult> { // call_ids that are part of this response. let completed_call_ids = prompt @@ -1259,7 +1288,7 @@ async fn try_run_turn( ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { let response = - handle_response_item(sess, patch_accumulator, sub_id, item.clone()).await?; + handle_response_item(sess, turn_diff_tracker, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } @@ -1277,6 +1306,16 @@ async fn try_run_turn( .ok(); } + let unified_diff = turn_diff_tracker.update_and_get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.to_string(), + msg, + }; + let _ = sess.tx_event.send(event).await; + } + return Ok(output); } ResponseEvent::OutputTextDelta(delta) => { @@ -1299,7 +1338,7 @@ async fn try_run_turn( async fn handle_response_item( sess: &Session, - patch_accumulator: &mut PatchAccumulator, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, item: ResponseItem, ) -> CodexResult> { @@ -1340,7 +1379,7 @@ async fn handle_response_item( Some( handle_function_call( sess, - patch_accumulator, + turn_diff_tracker, sub_id.to_string(), name, arguments, @@ -1382,7 +1421,7 @@ async fn handle_response_item( handle_container_exec_with_params( exec_params, sess, - patch_accumulator, + turn_diff_tracker, sub_id.to_string(), effective_call_id, ) @@ -1400,7 +1439,7 @@ async fn handle_response_item( async fn handle_function_call( sess: &Session, - patch_accumulator: &mut PatchAccumulator, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, name: String, arguments: String, @@ -1414,7 +1453,7 @@ async fn handle_function_call( return *output; } }; - handle_container_exec_with_params(params, sess, patch_accumulator, sub_id, call_id) + handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id) .await } "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, @@ -1489,7 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams async fn handle_container_exec_with_params( params: ExecParams, sess: &Session, - patch_accumulator: &mut PatchAccumulator, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, call_id: String, ) -> ResponseInputItem { @@ -1637,7 +1676,7 @@ async fn handle_container_exec_with_params( }, ), }; - sess.notify_exec_command_begin(exec_command_context.clone()) + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone()) .await; let params = maybe_run_with_user_profile(params, sess); @@ -1659,7 +1698,8 @@ async fn handle_container_exec_with_params( duration, } = output; - sess.notify_exec_command_end( + sess.on_exec_command_end( + turn_diff_tracker, &sub_id, &call_id, &stdout, @@ -1685,7 +1725,15 @@ async fn handle_container_exec_with_params( } } Err(CodexErr::Sandbox(error)) => { - handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await + handle_sandbox_error( + turn_diff_tracker, + params, + exec_command_context, + error, + sandbox_type, + sess, + ) + .await } Err(e) => { // Handle non-sandbox errors @@ -1701,6 +1749,7 @@ async fn handle_container_exec_with_params( } async fn handle_sandbox_error( + turn_diff_tracker: &mut TurnDiffTracker, params: ExecParams, exec_command_context: ExecCommandContext, error: SandboxErr, @@ -1757,7 +1806,8 @@ async fn handle_sandbox_error( sess.notify_background_event(&sub_id, "retrying command without sandbox") .await; - sess.notify_exec_command_begin(exec_command_context).await; + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context) + .await; // This is an escalated retry; the policy will not be // examined and the sandbox has been set to `None`. @@ -1779,7 +1829,8 @@ async fn handle_sandbox_error( duration, } = retry_output; - sess.notify_exec_command_end( + sess.on_exec_command_end( + turn_diff_tracker, &sub_id, &call_id, &stdout, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 104ce497e3..c6eb1a2089 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -34,7 +34,6 @@ pub use model_provider_info::built_in_model_providers; mod models; mod openai_model_info; mod openai_tools; -pub mod patch_accumulator; pub mod plan_tool; mod project_doc; pub mod protocol; @@ -43,6 +42,7 @@ mod safety; pub mod seatbelt; pub mod shell; pub mod spawn; +pub mod turn_diff_tracker; mod user_notification; pub mod util; diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index 4440202c0d..17389c0734 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -525,8 +525,6 @@ pub struct PatchApplyEndEvent { pub stderr: String, /// Whether the patch was applied successfully. pub success: bool, - /// The changes that were applied. - pub changes: HashMap, } #[derive(Debug, Clone, Deserialize, Serialize)] diff --git a/codex-rs/core/src/patch_accumulator.rs b/codex-rs/core/src/turn_diff_tracker.rs similarity index 96% rename from codex-rs/core/src/patch_accumulator.rs rename to codex-rs/core/src/turn_diff_tracker.rs index 1cbcdd4594..e9155b332f 100644 --- a/codex-rs/core/src/patch_accumulator.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -20,7 +20,7 @@ use crate::protocol::FileChange; /// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using /// `git diff --no-index` and rewrite paths to external paths. #[derive(Default)] -pub struct PatchAccumulator { +pub struct TurnDiffTracker { /// Temp directory holding baseline snapshots of files as first seen. baseline_files_dir: Option, /// Map external path -> internal filename (uuid + same extension). @@ -34,7 +34,7 @@ pub struct PatchAccumulator { pub unified_diff: Option, } -impl PatchAccumulator { +impl TurnDiffTracker { pub fn new() -> Self { Self::default() } @@ -104,7 +104,7 @@ impl PatchAccumulator { /// Recompute the aggregated unified diff by comparing all baseline snapshots against /// current files on disk using `git diff --no-index` and rewriting paths to external paths. - pub fn update_unified_diff(&mut self) -> Result<()> { + pub fn update_and_get_unified_diff(&mut self) -> Result> { let baseline_dir = self.baseline_dir()?.to_path_buf(); let current_dir = baseline_dir.join("current"); if current_dir.exists() { @@ -198,7 +198,7 @@ impl PatchAccumulator { // Clean up the curent dir. let _ = fs::remove_dir_all(¤t_dir); - Ok(()) + Ok(self.unified_diff.clone()) } fn baseline_dir(&self) -> Result<&Path> { @@ -352,11 +352,11 @@ mod tests { #[test] fn accumulates_add_and_update() { + let mut acc = TurnDiffTracker::new(); + let dir = tempdir().unwrap(); let file = dir.path().join("a.txt"); - let mut acc = PatchAccumulator::new(); - // First patch: add file (baseline should be /dev/null). let add_changes = HashMap::from([( file.clone(), @@ -367,8 +367,9 @@ mod tests { acc.on_patch_begin(&add_changes).unwrap(); // Simulate apply: create the file on disk. + // This must happen after on_patch_begin. fs::write(&file, "foo\n").unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); assert!(first.contains("+foo")); assert!(first.contains("/dev/null") || first.contains("new file")); @@ -385,7 +386,7 @@ mod tests { // Simulate apply: append a new line. fs::write(&file, "foo\nbar\n").unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); assert!(combined.contains("+bar")); } @@ -396,13 +397,13 @@ mod tests { let file = dir.path().join("b.txt"); fs::write(&file, "x\n").unwrap(); - let mut acc = PatchAccumulator::new(); + let mut acc = TurnDiffTracker::new(); let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_changes).unwrap(); // Simulate apply: delete the file from disk. fs::remove_file(&file).unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let diff = acc.unified_diff.clone().unwrap(); assert!(diff.contains("-x")); } @@ -414,7 +415,7 @@ mod tests { let dest = dir.path().join("dst.txt"); fs::write(&src, "line\n").unwrap(); - let mut acc = PatchAccumulator::new(); + let mut acc = TurnDiffTracker::new(); let mv_changes = HashMap::from([( src.clone(), FileChange::Update { @@ -428,7 +429,7 @@ mod tests { fs::rename(&src, &dest).unwrap(); fs::write(&dest, "line2\n").unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let out = acc.unified_diff.clone().unwrap(); assert!(out.contains("-line")); assert!(out.contains("+line2")); @@ -442,7 +443,7 @@ mod tests { fs::write(&a, "foo\n").unwrap(); fs::write(&b, "z\n").unwrap(); - let mut acc = PatchAccumulator::new(); + let mut acc = TurnDiffTracker::new(); // First: update existing a.txt (baseline snapshot is created for a). let update_a = HashMap::from([( @@ -455,7 +456,7 @@ mod tests { acc.on_patch_begin(&update_a).unwrap(); // Simulate apply: modify a.txt on disk. fs::write(&a, "foo\nbar\n").unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); assert!(first.contains("+bar")); @@ -464,7 +465,7 @@ mod tests { acc.on_patch_begin(&del_b).unwrap(); // Simulate apply: delete b.txt. fs::remove_file(&b).unwrap(); - acc.update_unified_diff().unwrap(); + acc.update_and_get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); // The combined diff must still include the update to a.txt. diff --git a/codex-rs/mcp-server/src/mcp_protocol.rs b/codex-rs/mcp-server/src/mcp_protocol.rs index b26787c2bc..e507376c16 100644 --- a/codex-rs/mcp-server/src/mcp_protocol.rs +++ b/codex-rs/mcp-server/src/mcp_protocol.rs @@ -247,7 +247,6 @@ pub enum ClientNotification { #[allow(clippy::expect_used)] #[allow(clippy::unwrap_used)] mod tests { - use std::collections::HashMap; use std::path::PathBuf; use super::*; @@ -904,7 +903,6 @@ mod tests { stdout: "ok".into(), stderr: "".into(), success: true, - changes: HashMap::new(), }), }; From c8e37631d408c8af77fe9553f076f6602fbb9872 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Thu, 31 Jul 2025 16:46:08 -0700 Subject: [PATCH 08/20] Spelling --- codex-rs/core/src/turn_diff_tracker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index e9155b332f..10cbd67a9e 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -195,7 +195,7 @@ impl TurnDiffTracker { Some(aggregated) }; - // Clean up the curent dir. + // Clean up the current dir. let _ = fs::remove_dir_all(¤t_dir); Ok(self.unified_diff.clone()) From e3f8b121db778c5b88b8ebbc62adfb64cac965ca Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 14:35:29 -0500 Subject: [PATCH 09/20] Migrated to similar --- codex-rs/Cargo.lock | 1 + codex-rs/core/Cargo.toml | 3 +- codex-rs/core/src/turn_diff_tracker.rs | 272 ++++++------------------- 3 files changed, 60 insertions(+), 216 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 87d59b21be..fcff5f8f76 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -698,6 +698,7 @@ dependencies = [ "serde_json", "sha1", "shlex", + "similar", "strum_macros 0.27.2", "tempfile", "thiserror 2.0.12", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 8d9d314237..2b40c80e6f 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -33,6 +33,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" sha1 = "0.10.6" shlex = "1.3.0" +similar = "2" strum_macros = "0.27.2" thiserror = "2.0.12" time = { version = "0.3", features = ["formatting", "local-offset", "macros"] } @@ -51,7 +52,6 @@ tree-sitter-bash = "0.25.0" uuid = { version = "1", features = ["serde", "v4"] } whoami = "1.6.0" wildmatch = "2.4.0" -tempfile = "3" [target.'cfg(target_os = "linux")'.dependencies] @@ -72,6 +72,7 @@ core_test_support = { path = "tests/common" } maplit = "1.0.2" predicates = "3" pretty_assertions = "1.4.1" +tempfile = "3" tokio-test = "0.4" walkdir = "2.5.0" wiremock = "0.6" diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 10cbd67a9e..b077aac07e 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -2,27 +2,22 @@ use std::collections::HashMap; use std::fs; use std::path::Path; use std::path::PathBuf; -use std::process::Command; use anyhow::Context; use anyhow::Result; -use tempfile::TempDir; use uuid::Uuid; use crate::protocol::FileChange; /// Tracks sets of changes to files and exposes the overall unified diff. /// Internally, the way this works is now: -/// 1. Create a temp directory to store baseline snapshots of files when they are first seen. -/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk. -/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null). -/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs. -/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using -/// `git diff --no-index` and rewrite paths to external paths. +/// 1. Maintain an in-memory baseline snapshot of files when they are first seen. +/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null). +/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking. +/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory +/// using the `similar` crate and emit unified diffs with rewritten external paths. #[derive(Default)] pub struct TurnDiffTracker { - /// Temp directory holding baseline snapshots of files as first seen. - baseline_files_dir: Option, /// Map external path -> internal filename (uuid + same extension). external_to_temp_name: HashMap, /// Internal filename -> external path as of baseline snapshot. @@ -30,6 +25,8 @@ pub struct TurnDiffTracker { /// Internal filename -> external path as of current accumulated state (after applying all changes). /// This is where renames are tracked. temp_name_to_current_external: HashMap, + /// Internal filename -> baseline file contents (None means the file did not exist, i.e. /dev/null). + baseline_contents: HashMap>, /// Aggregated unified diff for all accumulated changes across files. pub unified_diff: Option, } @@ -40,13 +37,10 @@ impl TurnDiffTracker { } /// Front-run apply patch calls to track the starting contents of any modified files. - /// - Creates a baseline snapshot for files that already exist on disk when first seen. + /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen. /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions. /// - Also updates internal mappings for move/rename events. pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { - self.ensure_baseline_dir()?; - let baseline_dir = self.baseline_dir()?.to_path_buf(); - for (path, change) in changes.iter() { // Ensure a stable internal filename exists for this external path. if !self.external_to_temp_name.contains_key(path) { @@ -59,14 +53,14 @@ impl TurnDiffTracker { .insert(internal.clone(), path.clone()); // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. - if path.exists() { + let baseline = if path.exists() { let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; - let internal_path = baseline_dir.join(&internal); - fs::write(&internal_path, contents).with_context(|| { - format!("failed to write baseline file {}", internal_path.display()) - })?; - } + Some(String::from_utf8_lossy(&contents).into_owned()) + } else { + None + }; + self.baseline_contents.insert(internal.clone(), baseline); } // Track rename/move in current mapping if provided in an Update. @@ -86,6 +80,8 @@ impl TurnDiffTracker { self.external_to_temp_name.insert(path.clone(), i.clone()); self.temp_name_to_baseline_external .insert(i.clone(), path.clone()); + // No on-disk file read here; treat as addition. + self.baseline_contents.insert(i.clone(), None); i } }; @@ -103,89 +99,72 @@ impl TurnDiffTracker { } /// Recompute the aggregated unified diff by comparing all baseline snapshots against - /// current files on disk using `git diff --no-index` and rewriting paths to external paths. + /// current files on disk using the `similar` crate and rewriting paths to external paths. pub fn update_and_get_unified_diff(&mut self) -> Result> { - let baseline_dir = self.baseline_dir()?.to_path_buf(); - let current_dir = baseline_dir.join("current"); - if current_dir.exists() { - // Best-effort cleanup of previous run's mirror. - let _ = fs::remove_dir_all(¤t_dir); - } - fs::create_dir_all(¤t_dir).with_context(|| { - format!( - "failed to create current mirror dir {}", - current_dir.display() - ) - })?; - let mut aggregated = String::new(); // Compute diffs per tracked internal file. for (internal, baseline_external) in &self.temp_name_to_baseline_external { - let baseline_path = baseline_dir.join(internal); let current_external = self .temp_name_to_current_external .get(internal) .cloned() .unwrap_or_else(|| baseline_external.clone()); - let left_is_dev_null = !baseline_path.exists(); - let right_exists = current_external.exists(); + let left_content = self + .baseline_contents + .get(internal) + .cloned() + .unwrap_or(None); - // Prepare right side mirror file if exists; otherwise use /dev/null for deletions. - let right_arg = if right_exists { - let mirror_path = current_dir.join(internal); + let right_content = if current_external.exists() { let contents = fs::read(¤t_external).with_context(|| { format!( "failed to read current file for diff {}", current_external.display() ) })?; - fs::write(&mirror_path, contents).with_context(|| { - format!( - "failed to write current mirror file {}", - mirror_path.display() - ) - })?; - // Use relative path from baseline_dir (so headers say a/ b/current/). - format!("current/{internal}") + Some(String::from_utf8_lossy(&contents).into_owned()) } else { - // Deletion: right side is /dev/null to show proper deleted file diff. - "/dev/null".to_string() + None }; - // Prepare left arg: baseline file path or /dev/null for additions. - let left_arg = if left_is_dev_null { + let left_text = left_content.as_deref().unwrap_or(""); + let right_text = right_content.as_deref().unwrap_or(""); + + if left_text == right_text { + continue; + } + + let left_display = baseline_external.display().to_string(); + let right_display = current_external.display().to_string(); + + // Diff the contents. + let diff = similar::TextDiff::from_lines(left_text, right_text); + + // Emit a git-style header for better readability and parity with previous behavior. + aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); + + let old_header = if left_content.is_some() { + format!("a/{left_display}") + } else { "/dev/null".to_string() + }; + let new_header = if right_content.is_some() { + format!("b/{right_display}") } else { - internal.clone() + "/dev/null".to_string() }; - // Run git diff --no-index from baseline_dir to keep paths predictable. - let raw = run_git_allow_exit_codes( - &baseline_dir, - &[ - "-c", - "color.ui=false", - "diff", - "--no-color", - "--no-index", - "--", - &left_arg, - &right_arg, - ], - &[0, 1], // 0: no changes, 1: differences - )?; - - if raw.trim().is_empty() { - continue; - } - let rewritten = self.rewrite_diff_paths(&raw); - if !rewritten.trim().is_empty() { - if !aggregated.is_empty() && !aggregated.ends_with('\n') { - aggregated.push('\n'); - } - aggregated.push_str(&rewritten); + let unified = diff + .unified_diff() + .context_radius(3) + .header(&old_header, &new_header) + .to_string(); + + aggregated.push_str(&unified); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); } } @@ -195,123 +174,8 @@ impl TurnDiffTracker { Some(aggregated) }; - // Clean up the current dir. - let _ = fs::remove_dir_all(¤t_dir); - Ok(self.unified_diff.clone()) } - - fn baseline_dir(&self) -> Result<&Path> { - self.baseline_files_dir - .as_ref() - .map(|d| d.path()) - .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized")) - } - - fn ensure_baseline_dir(&mut self) -> Result<()> { - if self.baseline_files_dir.is_some() { - return Ok(()); - } - let tmp = TempDir::new().context("create baseline temp dir")?; - self.baseline_files_dir = Some(tmp); - Ok(()) - } - - /// Rewrites the internal filenames to external paths in diff headers. - /// Handles inputs like: - /// diff --git a/ b/current/ - /// --- a/ | /dev/null - /// +++ b/current/ | /dev/null - /// and replaces uuid with the external paths tracking baseline/current. - fn rewrite_diff_paths(&self, diff: &str) -> String { - let mut out = String::new(); - for line in diff.lines() { - if let Some(rest) = line.strip_prefix("diff --git ") { - // Format: diff --git a/ b/ - let parts: Vec<&str> = rest.split_whitespace().collect(); - if parts.len() == 2 { - let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]); - let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]); - - let a_ext_display = if a == "/dev/null" { - "/dev/null".to_string() - } else { - let a_base = Path::new(a) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or(a); - let mapped = self - .temp_name_to_baseline_external - .get(a_base) - .cloned() - .unwrap_or_else(|| PathBuf::from(a)); - mapped.display().to_string() - }; - - let b_ext_display = if b == "/dev/null" { - "/dev/null".to_string() - } else { - let b_base = Path::new(b) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or(b); - let mapped = self - .temp_name_to_current_external - .get(b_base) - .cloned() - .unwrap_or_else(|| PathBuf::from(b)); - mapped.display().to_string() - }; - - out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n")); - continue; - } - } - if let Some(rest) = line.strip_prefix("--- ") { - if let Some(path) = rest.strip_prefix("a/") { - let external_display = if path == "/dev/null" { - "/dev/null".to_string() - } else { - let p_base = Path::new(path) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or(path); - self.temp_name_to_baseline_external - .get(p_base) - .cloned() - .unwrap_or_else(|| PathBuf::from(path)) - .display() - .to_string() - }; - out.push_str(&format!("--- {external_display}\n")); - continue; - } - } - if let Some(rest) = line.strip_prefix("+++ ") { - if let Some(path) = rest.strip_prefix("b/") { - let external_display = if path == "/dev/null" { - "/dev/null".to_string() - } else { - let p_base = Path::new(path) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or(path); - self.temp_name_to_current_external - .get(p_base) - .cloned() - .unwrap_or_else(|| PathBuf::from(path)) - .display() - .to_string() - }; - out.push_str(&format!("+++ {external_display}\n")); - continue; - } - } - out.push_str(line); - out.push('\n'); - } - out - } } fn uuid_filename_for(path: &Path) -> String { @@ -322,28 +186,6 @@ fn uuid_filename_for(path: &Path) -> String { } } -fn run_git_allow_exit_codes( - repo: &Path, - args: &[&str], - allowed_exit_codes: &[i32], -) -> Result { - let output = Command::new("git") - .current_dir(repo) - .args(args) - .output() - .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?; - let code = output.status.code().unwrap_or(-1); - if !allowed_exit_codes.contains(&code) { - anyhow::bail!( - "git {:?} failed with status {:?}: {}", - args, - output.status, - String::from_utf8_lossy(&output.stderr) - ); - } - Ok(String::from_utf8_lossy(&output.stdout).into_owned()) -} - #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] From 6bd7ef3d3a833b23f67b34fec613183795dab67c Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 14:51:30 -0500 Subject: [PATCH 10/20] assert_eq --- codex-rs/core/src/codex.rs | 4 +- codex-rs/core/src/turn_diff_tracker.rs | 176 +++++++++++++++++++------ 2 files changed, 140 insertions(+), 40 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 16a06ea9a3..ee9f46dbe3 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -443,7 +443,7 @@ impl Session { // If this is an apply_patch, after we emit the end patch, emit a second event // with the full turn diff if there is one. if is_apply_patch { - let unified_diff = turn_diff_tracker.update_and_get_unified_diff(); + let unified_diff = turn_diff_tracker.get_unified_diff(); if let Ok(Some(unified_diff)) = unified_diff { let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); let event = Event { @@ -1306,7 +1306,7 @@ async fn try_run_turn( .ok(); } - let unified_diff = turn_diff_tracker.update_and_get_unified_diff(); + let unified_diff = turn_diff_tracker.get_unified_diff(); if let Ok(Some(unified_diff)) = unified_diff { let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); let event = Event { diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index b077aac07e..398d59e476 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -64,14 +64,11 @@ impl TurnDiffTracker { } // Track rename/move in current mapping if provided in an Update. - let move_path = match change { - FileChange::Update { - move_path: Some(dest), - .. - } => Some(dest), - _ => None, - }; - if let Some(dest) = move_path { + if let FileChange::Update { + move_path: Some(dest), + .. + } = change + { let uuid_filename = match self.external_to_temp_name.get(path) { Some(i) => i.clone(), None => { @@ -92,28 +89,58 @@ impl TurnDiffTracker { self.external_to_temp_name.remove(path); self.external_to_temp_name .insert(dest.clone(), uuid_filename); - } + }; } Ok(()) } - /// Recompute the aggregated unified diff by comparing all baseline snapshots against - /// current files on disk using the `similar` crate and rewriting paths to external paths. - pub fn update_and_get_unified_diff(&mut self) -> Result> { + fn get_path_for_internal(&self, internal: &str) -> Option { + self.temp_name_to_current_external + .get(internal) + .or_else(|| self.temp_name_to_baseline_external.get(internal)) + .cloned() + } + + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were + /// collected before the first time they were touched by apply_patch during this turn with + /// the current repo state. + pub fn get_unified_diff(&mut self) -> Result> { let mut aggregated = String::new(); - // Compute diffs per tracked internal file. - for (internal, baseline_external) in &self.temp_name_to_baseline_external { - let current_external = self - .temp_name_to_current_external - .get(internal) - .cloned() - .unwrap_or_else(|| baseline_external.clone()); + // Compute diffs per tracked internal file in a stable order by external path. + let mut internals: Vec = self + .temp_name_to_baseline_external + .keys() + .cloned() + .collect(); + // Sort lexicographically by external path to match git behavior. + internals.sort_by_key(|a| { + let path = self.get_path_for_internal(a); + match path { + Some(p) => p + .file_name() + .and_then(|s| s.to_str()) + .map(|s| s.to_owned()) + .unwrap_or_default(), + None => String::new(), + } + }); + + for internal in internals { + // Baseline external must exist for any tracked internal. + let baseline_external = match self.temp_name_to_baseline_external.get(&internal) { + Some(p) => p.clone(), + None => continue, + }; + let current_external = match self.get_path_for_internal(&internal) { + Some(p) => p, + None => continue, + }; let left_content = self .baseline_contents - .get(internal) + .get(&internal) .cloned() .unwrap_or(None); @@ -190,8 +217,36 @@ fn uuid_filename_for(path: &Path) -> String { mod tests { #![allow(clippy::unwrap_used)] use super::*; + use pretty_assertions::assert_eq; use tempfile::tempdir; + fn normalize_diff_for_test(input: &str, root: &Path) -> String { + let root_str = root.display().to_string(); + let replaced = input.replace(&root_str, ""); + // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin + let mut blocks: Vec = Vec::new(); + let mut current = String::new(); + for line in replaced.lines() { + if line.starts_with("diff --git ") && !current.is_empty() { + blocks.push(current); + current = String::new(); + } + if !current.is_empty() { + current.push('\n'); + } + current.push_str(line); + } + if !current.is_empty() { + blocks.push(current); + } + blocks.sort(); + let mut out = blocks.join("\n"); + if !out.ends_with('\n') { + out.push('\n'); + } + out + } + #[test] fn accumulates_add_and_update() { let mut acc = TurnDiffTracker::new(); @@ -209,12 +264,17 @@ mod tests { acc.on_patch_begin(&add_changes).unwrap(); // Simulate apply: create the file on disk. - // This must happen after on_patch_begin. fs::write(&file, "foo\n").unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); - assert!(first.contains("+foo")); - assert!(first.contains("/dev/null") || first.contains("new file")); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = r#"diff --git a//a.txt b//a.txt +--- /dev/null ++++ b//a.txt +@@ -0,0 +1 @@ ++foo +"#; + assert_eq!(first, expected_first); // Second patch: update the file on disk. let update_changes = HashMap::from([( @@ -228,9 +288,17 @@ mod tests { // Simulate apply: append a new line. fs::write(&file, "foo\nbar\n").unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); - assert!(combined.contains("+bar")); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected_combined = r#"diff --git a//a.txt b//a.txt +--- /dev/null ++++ b//a.txt +@@ -0,0 +1,2 @@ ++foo ++bar +"#; + assert_eq!(combined, expected_combined); } #[test] @@ -245,9 +313,16 @@ mod tests { // Simulate apply: delete the file from disk. fs::remove_file(&file).unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let diff = acc.unified_diff.clone().unwrap(); - assert!(diff.contains("-x")); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = r#"diff --git a//b.txt b//b.txt +--- a//b.txt ++++ /dev/null +@@ -1 +0,0 @@ +-x +"#; + assert_eq!(diff, expected); } #[test] @@ -271,10 +346,17 @@ mod tests { fs::rename(&src, &dest).unwrap(); fs::write(&dest, "line2\n").unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let out = acc.unified_diff.clone().unwrap(); - assert!(out.contains("-line")); - assert!(out.contains("+line2")); + let out = normalize_diff_for_test(&out, dir.path()); + let expected = r#"diff --git a//src.txt b//dst.txt +--- a//src.txt ++++ b//dst.txt +@@ -1 +1 @@ +-line ++line2 +"#; + assert_eq!(out, expected); } #[test] @@ -298,21 +380,39 @@ mod tests { acc.on_patch_begin(&update_a).unwrap(); // Simulate apply: modify a.txt on disk. fs::write(&a, "foo\nbar\n").unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); - assert!(first.contains("+bar")); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = r#"diff --git a//a.txt b//a.txt +--- a//a.txt ++++ b//a.txt +@@ -1 +1,2 @@ + foo ++bar +"#; + assert_eq!(first, expected_first); // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_b).unwrap(); // Simulate apply: delete b.txt. fs::remove_file(&b).unwrap(); - acc.update_and_get_unified_diff().unwrap(); + acc.get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); - // The combined diff must still include the update to a.txt. - assert!(combined.contains("+bar")); - // And also reflect the deletion of b.txt. - assert!(combined.contains("-z")); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected = r#"diff --git a//a.txt b//a.txt +--- a//a.txt ++++ b//a.txt +@@ -1 +1,2 @@ + foo ++bar +diff --git a//b.txt b//b.txt +--- a//b.txt ++++ /dev/null +@@ -1 +0,0 @@ +-z +"#; + assert_eq!(combined, expected); } } From feeb266968dce9bf4a948704b3df188130a5f100 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 16:19:02 -0500 Subject: [PATCH 11/20] Added index --- codex-rs/core/src/turn_diff_tracker.rs | 130 ++++++++++++++++--------- 1 file changed, 85 insertions(+), 45 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 398d59e476..91110d2d01 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -27,6 +27,8 @@ pub struct TurnDiffTracker { temp_name_to_current_external: HashMap, /// Internal filename -> baseline file contents (None means the file did not exist, i.e. /dev/null). baseline_contents: HashMap>, + /// Internal filename -> baseline file mode (100644 or 100755). Only set when baseline file existed. + baseline_mode: HashMap, /// Aggregated unified diff for all accumulated changes across files. pub unified_diff: Option, } @@ -56,6 +58,10 @@ impl TurnDiffTracker { let baseline = if path.exists() { let contents = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; + // Capture baseline mode for later file mode lines. + if let Some(mode) = file_mode_for_path(path) { + self.baseline_mode.insert(internal.clone(), mode); + } Some(String::from_utf8_lossy(&contents).into_owned()) } else { None @@ -172,6 +178,30 @@ impl TurnDiffTracker { // Emit a git-style header for better readability and parity with previous behavior. aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); + // Emit file mode lines and index line similar to git. + let is_add = left_content.is_none() && right_content.is_some(); + let is_delete = left_content.is_some() && right_content.is_none(); + + // Determine modes. + let baseline_mode = self + .baseline_mode + .get(&internal) + .cloned() + .unwrap_or_else(|| "100644".to_string()); + let current_mode = + file_mode_for_path(¤t_external).unwrap_or_else(|| "100644".to_string()); + + if is_add { + aggregated.push_str(&format!("new file mode {current_mode}\n")); + } else if is_delete { + aggregated.push_str(&format!("deleted file mode {baseline_mode}\n")); + } else if baseline_mode != current_mode { + aggregated.push_str(&format!("old mode {baseline_mode}\n")); + aggregated.push_str(&format!("new mode {current_mode}\n")); + } + + aggregated.push_str(&format!("index {ZERO_OID}..{ZERO_OID}\n")); + let old_header = if left_content.is_some() { format!("a/{left_display}") } else { @@ -213,6 +243,28 @@ fn uuid_filename_for(path: &Path) -> String { } } +const ZERO_OID: &str = "0000000000000000000000000000000000000000"; + +fn file_mode_for_path(path: &Path) -> Option { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let meta = fs::metadata(path).ok()?; + let mode = meta.permissions().mode(); + let is_exec = (mode & 0o111) != 0; + Some(if is_exec { + "100755".to_string() + } else { + "100644".to_string() + }) + } + #[cfg(not(unix))] + { + // Default to non-executable on non-unix. + Some("100644".to_string()) + } +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -268,12 +320,12 @@ mod tests { acc.get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); - let expected_first = r#"diff --git a//a.txt b//a.txt ---- /dev/null -+++ b//a.txt -@@ -0,0 +1 @@ -+foo -"#; + let expected_first = { + let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + format!( + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1 @@\n+foo\n", + ) + }; assert_eq!(first, expected_first); // Second patch: update the file on disk. @@ -291,13 +343,12 @@ mod tests { acc.get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); - let expected_combined = r#"diff --git a//a.txt b//a.txt ---- /dev/null -+++ b//a.txt -@@ -0,0 +1,2 @@ -+foo -+bar -"#; + let expected_combined = { + let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + format!( + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n", + ) + }; assert_eq!(combined, expected_combined); } @@ -312,16 +363,14 @@ mod tests { acc.on_patch_begin(&del_changes).unwrap(); // Simulate apply: delete the file from disk. + let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); fs::remove_file(&file).unwrap(); acc.get_unified_diff().unwrap(); let diff = acc.unified_diff.clone().unwrap(); let diff = normalize_diff_for_test(&diff, dir.path()); - let expected = r#"diff --git a//b.txt b//b.txt ---- a//b.txt -+++ /dev/null -@@ -1 +0,0 @@ --x -"#; + let expected = format!( + "diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n", + ); assert_eq!(diff, expected); } @@ -349,13 +398,11 @@ mod tests { acc.get_unified_diff().unwrap(); let out = acc.unified_diff.clone().unwrap(); let out = normalize_diff_for_test(&out, dir.path()); - let expected = r#"diff --git a//src.txt b//dst.txt ---- a//src.txt -+++ b//dst.txt -@@ -1 +1 @@ --line -+line2 -"#; + let expected = { + format!( + "diff --git a//src.txt b//dst.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//src.txt\n+++ b//dst.txt\n@@ -1 +1 @@\n-line\n+line2\n" + ) + }; assert_eq!(out, expected); } @@ -383,36 +430,29 @@ mod tests { acc.get_unified_diff().unwrap(); let first = acc.unified_diff.clone().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); - let expected_first = r#"diff --git a//a.txt b//a.txt ---- a//a.txt -+++ b//a.txt -@@ -1 +1,2 @@ - foo -+bar -"#; + let expected_first = { + format!( + "diff --git a//a.txt b//a.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n" + ) + }; assert_eq!(first, expected_first); // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_b).unwrap(); // Simulate apply: delete b.txt. + let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| "100644".to_string()); fs::remove_file(&b).unwrap(); acc.get_unified_diff().unwrap(); let combined = acc.unified_diff.clone().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); - let expected = r#"diff --git a//a.txt b//a.txt ---- a//a.txt -+++ b//a.txt -@@ -1 +1,2 @@ - foo -+bar -diff --git a//b.txt b//b.txt ---- a//b.txt -+++ /dev/null -@@ -1 +0,0 @@ --z -"#; + let expected = { + format!( + "diff --git a//a.txt b//a.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n\ + diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-z\n", + ) + }; assert_eq!(combined, expected); } } From 69fb63979fdd0bb014bedea48f68c080e9549ada Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 16:43:57 -0500 Subject: [PATCH 12/20] Find git root works --- codex-rs/core/src/turn_diff_tracker.rs | 53 +++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 91110d2d01..373da8688b 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -31,6 +31,8 @@ pub struct TurnDiffTracker { baseline_mode: HashMap, /// Aggregated unified diff for all accumulated changes across files. pub unified_diff: Option, + /// Cache of known git worktree roots to avoid repeated filesystem walks. + git_root_cache: Vec, } impl TurnDiffTracker { @@ -108,6 +110,53 @@ impl TurnDiffTracker { .cloned() } + /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry. + /// Uses a simple cache of known roots and avoids negative-result caching for simplicity. + fn find_git_root_cached(&mut self, start: &Path) -> Option { + let dir = if start.is_dir() { + start + } else { + start.parent()? + }; + + // Fast path: if any cached root is an ancestor of this path, use it. + if let Some(root) = self + .git_root_cache + .iter() + .find(|r| dir.starts_with(r)) + .cloned() + { + return Some(root); + } + + // Walk up to find a `.git` marker. + let mut cur = dir.to_path_buf(); + loop { + let git_marker = cur.join(".git"); + if git_marker.is_dir() || git_marker.is_file() { + if !self.git_root_cache.iter().any(|r| r == &cur) { + self.git_root_cache.push(cur.clone()); + } + return Some(cur); + } + if let Some(parent) = cur.parent() { + cur = parent.to_path_buf(); + } else { + return None; + } + } + } + + /// Return a display string for `path` relative to its git root if found, else absolute. + fn relative_to_git_root_str(&mut self, path: &Path) -> String { + if let Some(root) = self.find_git_root_cached(path) { + if let Ok(rel) = path.strip_prefix(&root) { + return rel.display().to_string(); + } + } + path.display().to_string() + } + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were /// collected before the first time they were touched by apply_patch during this turn with /// the current repo state. @@ -169,8 +218,8 @@ impl TurnDiffTracker { continue; } - let left_display = baseline_external.display().to_string(); - let right_display = current_external.display().to_string(); + let left_display = self.relative_to_git_root_str(&baseline_external); + let right_display = self.relative_to_git_root_str(¤t_external); // Diff the contents. let diff = similar::TextDiff::from_lines(left_text, right_text); From 75bab53f64b5f3492de032460a28f4e422e56bf4 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 17:17:24 -0500 Subject: [PATCH 13/20] Add sha1 --- codex-rs/core/src/turn_diff_tracker.rs | 59 +++++++++++++++++++++----- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 373da8688b..eefe998102 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -5,6 +5,7 @@ use std::path::PathBuf; use anyhow::Context; use anyhow::Result; +use sha1::Digest; use uuid::Uuid; use crate::protocol::FileChange; @@ -249,7 +250,17 @@ impl TurnDiffTracker { aggregated.push_str(&format!("new mode {current_mode}\n")); } - aggregated.push_str(&format!("index {ZERO_OID}..{ZERO_OID}\n")); + // Compute blob object IDs for left and right contents. + let left_oid = left_content + .as_ref() + .map(|s| git_blob_sha1_hex(s)) + .unwrap_or_else(|| ZERO_OID.to_string()); + let right_oid = right_content + .as_ref() + .map(|s| git_blob_sha1_hex(s)) + .unwrap_or_else(|| ZERO_OID.to_string()); + + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); let old_header = if left_content.is_some() { format!("a/{left_display}") @@ -294,6 +305,22 @@ fn uuid_filename_for(path: &Path) -> String { const ZERO_OID: &str = "0000000000000000000000000000000000000000"; +/// Compute the Git SHA-1 blob object ID for the given content. +fn git_blob_sha1_hex(data: &str) -> String { + // Git blob hash is sha1 of: "blob \0" + let header = format!("blob {}\0", data.len()); + let mut hasher = sha1::Sha1::new(); + hasher.update(header.as_bytes()); + hasher.update(data.as_bytes()); + let digest = hasher.finalize(); + let mut out = String::with_capacity(40); + for b in digest { + use std::fmt::Write; + let _ = write!(&mut out, "{b:02x}"); + } + out +} + fn file_mode_for_path(path: &Path) -> Option { #[cfg(unix)] { @@ -371,8 +398,9 @@ mod tests { let first = normalize_diff_for_test(&first, dir.path()); let expected_first = { let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + let right_oid = git_blob_sha1_hex("foo\n"); format!( - "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1 @@\n+foo\n", + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1 @@\n+foo\n", ) }; assert_eq!(first, expected_first); @@ -394,8 +422,9 @@ mod tests { let combined = normalize_diff_for_test(&combined, dir.path()); let expected_combined = { let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); format!( - "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n", + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n", ) }; assert_eq!(combined, expected_combined); @@ -417,9 +446,12 @@ mod tests { acc.get_unified_diff().unwrap(); let diff = acc.unified_diff.clone().unwrap(); let diff = normalize_diff_for_test(&diff, dir.path()); - let expected = format!( - "diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n", - ); + let expected = { + let left_oid = git_blob_sha1_hex("x\n"); + format!( + "diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n", + ) + }; assert_eq!(diff, expected); } @@ -448,8 +480,10 @@ mod tests { let out = acc.unified_diff.clone().unwrap(); let out = normalize_diff_for_test(&out, dir.path()); let expected = { + let left_oid = git_blob_sha1_hex("line\n"); + let right_oid = git_blob_sha1_hex("line2\n"); format!( - "diff --git a//src.txt b//dst.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//src.txt\n+++ b//dst.txt\n@@ -1 +1 @@\n-line\n+line2\n" + "diff --git a//src.txt b//dst.txt\nindex {left_oid}..{right_oid}\n--- a//src.txt\n+++ b//dst.txt\n@@ -1 +1 @@\n-line\n+line2\n" ) }; assert_eq!(out, expected); @@ -480,8 +514,10 @@ mod tests { let first = acc.unified_diff.clone().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); let expected_first = { + let left_oid = git_blob_sha1_hex("foo\n"); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); format!( - "diff --git a//a.txt b//a.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n" + "diff --git a//a.txt b//a.txt\nindex {left_oid}..{right_oid}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n" ) }; assert_eq!(first, expected_first); @@ -497,9 +533,12 @@ mod tests { let combined = acc.unified_diff.clone().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); let expected = { + let left_oid_a = git_blob_sha1_hex("foo\n"); + let right_oid_a = git_blob_sha1_hex("foo\nbar\n"); + let left_oid_b = git_blob_sha1_hex("z\n"); format!( - "diff --git a//a.txt b//a.txt\nindex {ZERO_OID}..{ZERO_OID}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n\ - diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {ZERO_OID}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-z\n", + "diff --git a//a.txt b//a.txt\nindex {left_oid_a}..{right_oid_a}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n\ + diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid_b}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-z\n", ) }; assert_eq!(combined, expected); From ffdc6fb5efe887f68a27d9b51ccb7fd202ce59d7 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 17:35:30 -0500 Subject: [PATCH 14/20] Remove field --- codex-rs/core/src/turn_diff_tracker.rs | 30 +++++++++----------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index eefe998102..2d88b36e5f 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -30,8 +30,6 @@ pub struct TurnDiffTracker { baseline_contents: HashMap>, /// Internal filename -> baseline file mode (100644 or 100755). Only set when baseline file existed. baseline_mode: HashMap, - /// Aggregated unified diff for all accumulated changes across files. - pub unified_diff: Option, /// Cache of known git worktree roots to avoid repeated filesystem walks. git_root_cache: Vec, } @@ -285,13 +283,11 @@ impl TurnDiffTracker { } } - self.unified_diff = if aggregated.trim().is_empty() { - None + if aggregated.trim().is_empty() { + Ok(None) } else { - Some(aggregated) - }; - - Ok(self.unified_diff.clone()) + Ok(Some(aggregated)) + } } } @@ -393,8 +389,7 @@ mod tests { // Simulate apply: create the file on disk. fs::write(&file, "foo\n").unwrap(); - acc.get_unified_diff().unwrap(); - let first = acc.unified_diff.clone().unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); let expected_first = { let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); @@ -417,8 +412,7 @@ mod tests { // Simulate apply: append a new line. fs::write(&file, "foo\nbar\n").unwrap(); - acc.get_unified_diff().unwrap(); - let combined = acc.unified_diff.clone().unwrap(); + let combined = acc.get_unified_diff().unwrap().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); let expected_combined = { let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); @@ -443,8 +437,7 @@ mod tests { // Simulate apply: delete the file from disk. let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); fs::remove_file(&file).unwrap(); - acc.get_unified_diff().unwrap(); - let diff = acc.unified_diff.clone().unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); let diff = normalize_diff_for_test(&diff, dir.path()); let expected = { let left_oid = git_blob_sha1_hex("x\n"); @@ -476,8 +469,7 @@ mod tests { fs::rename(&src, &dest).unwrap(); fs::write(&dest, "line2\n").unwrap(); - acc.get_unified_diff().unwrap(); - let out = acc.unified_diff.clone().unwrap(); + let out = acc.get_unified_diff().unwrap().unwrap(); let out = normalize_diff_for_test(&out, dir.path()); let expected = { let left_oid = git_blob_sha1_hex("line\n"); @@ -510,8 +502,7 @@ mod tests { acc.on_patch_begin(&update_a).unwrap(); // Simulate apply: modify a.txt on disk. fs::write(&a, "foo\nbar\n").unwrap(); - acc.get_unified_diff().unwrap(); - let first = acc.unified_diff.clone().unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); let expected_first = { let left_oid = git_blob_sha1_hex("foo\n"); @@ -528,9 +519,8 @@ mod tests { // Simulate apply: delete b.txt. let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| "100644".to_string()); fs::remove_file(&b).unwrap(); - acc.get_unified_diff().unwrap(); - let combined = acc.unified_diff.clone().unwrap(); + let combined = acc.get_unified_diff().unwrap().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); let expected = { let left_oid_a = git_blob_sha1_hex("foo\n"); From de85ee8ba999c2878e55517099fa672a73ab6d32 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Fri, 1 Aug 2025 18:17:58 -0500 Subject: [PATCH 15/20] Get actual oid --- codex-rs/core/src/turn_diff_tracker.rs | 63 +++++++++++++++++--- codex-rs/mcp-server/src/conversation_loop.rs | 1 + 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 2d88b36e5f..de6d8cdcd0 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use std::fs; use std::path::Path; use std::path::PathBuf; +use std::process::Command; use anyhow::Context; use anyhow::Result; @@ -32,6 +33,9 @@ pub struct TurnDiffTracker { baseline_mode: HashMap, /// Cache of known git worktree roots to avoid repeated filesystem walks. git_root_cache: Vec, + /// Internal filename -> baseline blob OID as observed at the time the change was first seen. + /// ZERO_OID when the file did not exist. + baseline_oid: HashMap, } impl TurnDiffTracker { @@ -63,8 +67,17 @@ impl TurnDiffTracker { if let Some(mode) = file_mode_for_path(path) { self.baseline_mode.insert(internal.clone(), mode); } - Some(String::from_utf8_lossy(&contents).into_owned()) + let s = String::from_utf8_lossy(&contents).into_owned(); + // Record baseline OID from git for the file's repository, falling back to content hash. + let oid = self + .git_blob_oid_for_path(path) + .unwrap_or_else(|| git_blob_sha1_hex(&s)); + self.baseline_oid.insert(internal.clone(), oid); + Some(s) } else { + // When the file did not exist, record ZERO_OID as baseline. + self.baseline_oid + .insert(internal.clone(), ZERO_OID.to_string()); None }; self.baseline_contents.insert(internal.clone(), baseline); @@ -86,6 +99,7 @@ impl TurnDiffTracker { .insert(i.clone(), path.clone()); // No on-disk file read here; treat as addition. self.baseline_contents.insert(i.clone(), None); + self.baseline_oid.insert(i.clone(), ZERO_OID.to_string()); i } }; @@ -156,6 +170,27 @@ impl TurnDiffTracker { path.display().to_string() } + /// Ask git to compute the blob SHA-1 for the file at `path` within its repository. + /// Returns None if no repository is found or git invocation fails. + fn git_blob_oid_for_path(&mut self, path: &Path) -> Option { + let root = self.find_git_root_cached(path)?; + // Compute a path relative to the repo root for better portability across platforms. + let rel = path.strip_prefix(&root).unwrap_or(path); + let output = Command::new("git") + .arg("-C") + .arg(&root) + .arg("hash-object") + .arg("--") + .arg(rel) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let s = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if s.len() == 40 { Some(s) } else { None } + } + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were /// collected before the first time they were touched by apply_patch during this turn with /// the current repo state. @@ -248,15 +283,25 @@ impl TurnDiffTracker { aggregated.push_str(&format!("new mode {current_mode}\n")); } - // Compute blob object IDs for left and right contents. - let left_oid = left_content - .as_ref() - .map(|s| git_blob_sha1_hex(s)) - .unwrap_or_else(|| ZERO_OID.to_string()); - let right_oid = right_content - .as_ref() - .map(|s| git_blob_sha1_hex(s)) + // Determine blob object IDs for left and right contents. Prefer stored OIDs + // captured from the original repo state when the change was first seen. + let left_oid = self + .baseline_oid + .get(&internal) + .cloned() + .or_else(|| { + left_content + .as_ref() + .map(|s| git_blob_sha1_hex(s)) + .or(Some(ZERO_OID.to_string())) + }) .unwrap_or_else(|| ZERO_OID.to_string()); + let right_oid = if right_content.is_some() { + self.git_blob_oid_for_path(¤t_external) + .unwrap_or_else(|| git_blob_sha1_hex(right_text)) + } else { + ZERO_OID.to_string() + }; aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); diff --git a/codex-rs/mcp-server/src/conversation_loop.rs b/codex-rs/mcp-server/src/conversation_loop.rs index 534275181a..1db39a2306 100644 --- a/codex-rs/mcp-server/src/conversation_loop.rs +++ b/codex-rs/mcp-server/src/conversation_loop.rs @@ -97,6 +97,7 @@ pub async fn run_conversation_loop( | EventMsg::McpToolCallEnd(_) | EventMsg::ExecCommandBegin(_) | EventMsg::ExecCommandEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::BackgroundEvent(_) | EventMsg::ExecCommandOutputDelta(_) | EventMsg::PatchApplyBegin(_) From c52f7ef6db84c8b8f55288e823b49bdf30ade446 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Sat, 2 Aug 2025 12:20:00 -0500 Subject: [PATCH 16/20] Migrate to BaselineFileInfo --- codex-rs/core/src/turn_diff_tracker.rs | 99 ++++++++++++++------------ 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index de6d8cdcd0..f8738d1bcc 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -11,6 +11,13 @@ use uuid::Uuid; use crate::protocol::FileChange; +struct BaselineFileInfo { + path: Option, + contents: Option, + mode: Option, + oid: Option, +} + /// Tracks sets of changes to files and exposes the overall unified diff. /// Internally, the way this works is now: /// 1. Maintain an in-memory baseline snapshot of files when they are first seen. @@ -22,20 +29,13 @@ use crate::protocol::FileChange; pub struct TurnDiffTracker { /// Map external path -> internal filename (uuid + same extension). external_to_temp_name: HashMap, - /// Internal filename -> external path as of baseline snapshot. - temp_name_to_baseline_external: HashMap, /// Internal filename -> external path as of current accumulated state (after applying all changes). /// This is where renames are tracked. temp_name_to_current_external: HashMap, - /// Internal filename -> baseline file contents (None means the file did not exist, i.e. /dev/null). - baseline_contents: HashMap>, - /// Internal filename -> baseline file mode (100644 or 100755). Only set when baseline file existed. - baseline_mode: HashMap, + /// Internal filename -> baseline file info. + baseline_file_info: HashMap, /// Cache of known git worktree roots to avoid repeated filesystem walks. git_root_cache: Vec, - /// Internal filename -> baseline blob OID as observed at the time the change was first seen. - /// ZERO_OID when the file did not exist. - baseline_oid: HashMap, } impl TurnDiffTracker { @@ -54,33 +54,32 @@ impl TurnDiffTracker { let internal = uuid_filename_for(path); self.external_to_temp_name .insert(path.clone(), internal.clone()); - self.temp_name_to_baseline_external - .insert(internal.clone(), path.clone()); self.temp_name_to_current_external .insert(internal.clone(), path.clone()); // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. - let baseline = if path.exists() { - let contents = fs::read(path) + let (contents, mode, oid) = if path.exists() { + let contents_bytes = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; - // Capture baseline mode for later file mode lines. - if let Some(mode) = file_mode_for_path(path) { - self.baseline_mode.insert(internal.clone(), mode); - } - let s = String::from_utf8_lossy(&contents).into_owned(); - // Record baseline OID from git for the file's repository, falling back to content hash. + let s = String::from_utf8_lossy(&contents_bytes).into_owned(); + let mode = file_mode_for_path(path); let oid = self .git_blob_oid_for_path(path) .unwrap_or_else(|| git_blob_sha1_hex(&s)); - self.baseline_oid.insert(internal.clone(), oid); - Some(s) + (Some(s), mode, Some(oid)) } else { - // When the file did not exist, record ZERO_OID as baseline. - self.baseline_oid - .insert(internal.clone(), ZERO_OID.to_string()); - None + (None, None, Some(ZERO_OID.to_string())) }; - self.baseline_contents.insert(internal.clone(), baseline); + + self.baseline_file_info.insert( + internal.clone(), + BaselineFileInfo { + path: Some(path.clone()), + contents, + mode, + oid, + }, + ); } // Track rename/move in current mapping if provided in an Update. @@ -95,11 +94,16 @@ impl TurnDiffTracker { // This should be rare, but if we haven't mapped the source, create it with no baseline. let i = uuid_filename_for(path); self.external_to_temp_name.insert(path.clone(), i.clone()); - self.temp_name_to_baseline_external - .insert(i.clone(), path.clone()); // No on-disk file read here; treat as addition. - self.baseline_contents.insert(i.clone(), None); - self.baseline_oid.insert(i.clone(), ZERO_OID.to_string()); + self.baseline_file_info.insert( + i.clone(), + BaselineFileInfo { + path: Some(path.clone()), + contents: None, + mode: None, + oid: Some(ZERO_OID.to_string()), + }, + ); i } }; @@ -119,8 +123,12 @@ impl TurnDiffTracker { fn get_path_for_internal(&self, internal: &str) -> Option { self.temp_name_to_current_external .get(internal) - .or_else(|| self.temp_name_to_baseline_external.get(internal)) .cloned() + .or_else(|| { + self.baseline_file_info + .get(internal) + .and_then(|info| info.path.clone()) + }) } /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry. @@ -198,11 +206,7 @@ impl TurnDiffTracker { let mut aggregated = String::new(); // Compute diffs per tracked internal file in a stable order by external path. - let mut internals: Vec = self - .temp_name_to_baseline_external - .keys() - .cloned() - .collect(); + let mut internals: Vec = self.baseline_file_info.keys().cloned().collect(); // Sort lexicographically by external path to match git behavior. internals.sort_by_key(|a| { let path = self.get_path_for_internal(a); @@ -218,8 +222,12 @@ impl TurnDiffTracker { for internal in internals { // Baseline external must exist for any tracked internal. - let baseline_external = match self.temp_name_to_baseline_external.get(&internal) { - Some(p) => p.clone(), + let baseline_external = match self + .baseline_file_info + .get(&internal) + .and_then(|i| i.path.clone()) + { + Some(p) => p, None => continue, }; let current_external = match self.get_path_for_internal(&internal) { @@ -228,10 +236,9 @@ impl TurnDiffTracker { }; let left_content = self - .baseline_contents + .baseline_file_info .get(&internal) - .cloned() - .unwrap_or(None); + .and_then(|i| i.contents.clone()); let right_content = if current_external.exists() { let contents = fs::read(¤t_external).with_context(|| { @@ -267,9 +274,9 @@ impl TurnDiffTracker { // Determine modes. let baseline_mode = self - .baseline_mode + .baseline_file_info .get(&internal) - .cloned() + .and_then(|i| i.mode.clone()) .unwrap_or_else(|| "100644".to_string()); let current_mode = file_mode_for_path(¤t_external).unwrap_or_else(|| "100644".to_string()); @@ -286,9 +293,9 @@ impl TurnDiffTracker { // Determine blob object IDs for left and right contents. Prefer stored OIDs // captured from the original repo state when the change was first seen. let left_oid = self - .baseline_oid + .baseline_file_info .get(&internal) - .cloned() + .and_then(|i| i.oid.clone()) .or_else(|| { left_content .as_ref() @@ -347,6 +354,8 @@ fn uuid_filename_for(path: &Path) -> String { const ZERO_OID: &str = "0000000000000000000000000000000000000000"; /// Compute the Git SHA-1 blob object ID for the given content. +/// This should only be used as a fallback if we can't get the OID from the original repo state +/// or if the original file is not in a git repo. fn git_blob_sha1_hex(data: &str) -> String { // Git blob hash is sha1 of: "blob \0" let header = format!("blob {}\0", data.len()); From 02a72219d358f3e24ff984229333e66c84dc2558 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Sat, 2 Aug 2025 12:42:26 -0500 Subject: [PATCH 17/20] Better handling for binary files and windows --- codex-rs/core/src/turn_diff_tracker.rs | 279 +++++++++++++++++++------ 1 file changed, 211 insertions(+), 68 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index f8738d1bcc..5fb99adfef 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -6,14 +6,13 @@ use std::process::Command; use anyhow::Context; use anyhow::Result; -use sha1::Digest; use uuid::Uuid; use crate::protocol::FileChange; struct BaselineFileInfo { path: Option, - contents: Option, + contents_bytes: Option>, mode: Option, oid: Option, } @@ -58,15 +57,14 @@ impl TurnDiffTracker { .insert(internal.clone(), path.clone()); // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. - let (contents, mode, oid) = if path.exists() { + let (contents_bytes, mode, oid) = if path.exists() { let contents_bytes = fs::read(path) .with_context(|| format!("failed to read original {}", path.display()))?; - let s = String::from_utf8_lossy(&contents_bytes).into_owned(); let mode = file_mode_for_path(path); let oid = self .git_blob_oid_for_path(path) - .unwrap_or_else(|| git_blob_sha1_hex(&s)); - (Some(s), mode, Some(oid)) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes)); + (Some(contents_bytes), mode, Some(oid)) } else { (None, None, Some(ZERO_OID.to_string())) }; @@ -75,7 +73,7 @@ impl TurnDiffTracker { internal.clone(), BaselineFileInfo { path: Some(path.clone()), - contents, + contents_bytes, mode, oid, }, @@ -99,7 +97,7 @@ impl TurnDiffTracker { i.clone(), BaselineFileInfo { path: Some(path.clone()), - contents: None, + contents_bytes: None, mode: None, oid: Some(ZERO_OID.to_string()), }, @@ -160,6 +158,15 @@ impl TurnDiffTracker { } return Some(cur); } + + // On Windows, avoid walking above the drive or UNC share root. + #[cfg(windows)] + { + if is_windows_drive_or_unc_root(&cur) { + return None; + } + } + if let Some(parent) = cur.parent() { cur = parent.to_path_buf(); } else { @@ -170,12 +177,16 @@ impl TurnDiffTracker { /// Return a display string for `path` relative to its git root if found, else absolute. fn relative_to_git_root_str(&mut self, path: &Path) -> String { - if let Some(root) = self.find_git_root_cached(path) { + let s = if let Some(root) = self.find_git_root_cached(path) { if let Ok(rel) = path.strip_prefix(&root) { - return rel.display().to_string(); + rel.display().to_string() + } else { + path.display().to_string() } - } - path.display().to_string() + } else { + path.display().to_string() + }; + s.replace('\\', "/") } /// Ask git to compute the blob SHA-1 for the file at `path` within its repository. @@ -206,21 +217,16 @@ impl TurnDiffTracker { let mut aggregated = String::new(); // Compute diffs per tracked internal file in a stable order by external path. - let mut internals: Vec = self.baseline_file_info.keys().cloned().collect(); - // Sort lexicographically by external path to match git behavior. - internals.sort_by_key(|a| { - let path = self.get_path_for_internal(a); - match path { - Some(p) => p - .file_name() - .and_then(|s| s.to_str()) - .map(|s| s.to_owned()) - .unwrap_or_default(), - None => String::new(), - } + let mut baseline_file_names: Vec = + self.baseline_file_info.keys().cloned().collect(); + // Sort lexicographically by full repo-relative path to match git behavior. + baseline_file_names.sort_by_key(|internal| { + self.get_path_for_internal(internal) + .map(|p| self.relative_to_git_root_str(&p)) + .unwrap_or_default() }); - for internal in internals { + for internal in baseline_file_names { // Baseline external must exist for any tracked internal. let baseline_external = match self .baseline_file_info @@ -235,42 +241,36 @@ impl TurnDiffTracker { None => continue, }; - let left_content = self + let left_bytes = self .baseline_file_info .get(&internal) - .and_then(|i| i.contents.clone()); + .and_then(|i| i.contents_bytes.clone()); - let right_content = if current_external.exists() { + let right_bytes = if current_external.exists() { let contents = fs::read(¤t_external).with_context(|| { format!( "failed to read current file for diff {}", current_external.display() ) })?; - Some(String::from_utf8_lossy(&contents).into_owned()) + Some(contents) } else { None }; - let left_text = left_content.as_deref().unwrap_or(""); - let right_text = right_content.as_deref().unwrap_or(""); - - if left_text == right_text { + // Fast path: identical bytes or both missing. + if left_bytes.as_deref() == right_bytes.as_deref() { continue; } let left_display = self.relative_to_git_root_str(&baseline_external); let right_display = self.relative_to_git_root_str(¤t_external); - // Diff the contents. - let diff = similar::TextDiff::from_lines(left_text, right_text); - // Emit a git-style header for better readability and parity with previous behavior. aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); - // Emit file mode lines and index line similar to git. - let is_add = left_content.is_none() && right_content.is_some(); - let is_delete = left_content.is_some() && right_content.is_none(); + let is_add = left_bytes.is_none() && right_bytes.is_some(); + let is_delete = left_bytes.is_some() && right_bytes.is_none(); // Determine modes. let baseline_mode = self @@ -297,41 +297,86 @@ impl TurnDiffTracker { .get(&internal) .and_then(|i| i.oid.clone()) .or_else(|| { - left_content + left_bytes .as_ref() - .map(|s| git_blob_sha1_hex(s)) + .map(|b| git_blob_sha1_hex_bytes(b)) .or(Some(ZERO_OID.to_string())) }) .unwrap_or_else(|| ZERO_OID.to_string()); - let right_oid = if right_content.is_some() { + let right_oid = if let Some(b) = right_bytes.as_ref() { self.git_blob_oid_for_path(¤t_external) - .unwrap_or_else(|| git_blob_sha1_hex(right_text)) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(b)) } else { ZERO_OID.to_string() }; - aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); - - let old_header = if left_content.is_some() { - format!("a/{left_display}") - } else { - "/dev/null".to_string() - }; - let new_header = if right_content.is_some() { - format!("b/{right_display}") - } else { - "/dev/null".to_string() + // If either side isn't valid UTF-8, emit a binary diff header and continue. + let left_text = left_bytes + .as_deref() + .and_then(|b| std::str::from_utf8(b).ok()); + let right_text = right_bytes + .as_deref() + .and_then(|b| std::str::from_utf8(b).ok()); + + // Prefer text diffs when possible: + // - both sides are valid UTF-8 + // - OR one side is missing (add/delete) and the present side is valid UTF-8 + let can_text_diff = match (left_text, right_text, is_add, is_delete) { + (Some(_), Some(_), _, _) => true, + (_, Some(_), true, _) => true, // add: left missing, right text + (Some(_), _, _, true) => true, // delete: left text, right missing + _ => false, }; - let unified = diff - .unified_diff() - .context_radius(3) - .header(&old_header, &new_header) - .to_string(); + if can_text_diff { + // Diff the contents as text, treating missing side as empty string. + let l = left_text.unwrap_or(""); + let r = right_text.unwrap_or(""); + + // Emit index line without mode suffix to preserve current test expectations. + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + + let old_header = if left_bytes.is_some() { + format!("a/{left_display}") + } else { + "/dev/null".to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + "/dev/null".to_string() + }; + + let diff = similar::TextDiff::from_lines(l, r); + let unified = diff + .unified_diff() + .context_radius(3) + .header(&old_header, &new_header) + .to_string(); - aggregated.push_str(&unified); - if !aggregated.ends_with('\n') { - aggregated.push('\n'); + aggregated.push_str(&unified); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } + } else { + // Binary or invalid UTF-8: emit header only. + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + let old_header = if left_bytes.is_some() { + format!("a/{left_display}") + } else { + "/dev/null".to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + "/dev/null".to_string() + }; + aggregated.push_str(&format!("--- {old_header}\n")); + aggregated.push_str(&format!("+++ {new_header}\n")); + aggregated.push_str("Binary files differ\n"); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } } } @@ -353,15 +398,14 @@ fn uuid_filename_for(path: &Path) -> String { const ZERO_OID: &str = "0000000000000000000000000000000000000000"; -/// Compute the Git SHA-1 blob object ID for the given content. -/// This should only be used as a fallback if we can't get the OID from the original repo state -/// or if the original file is not in a git repo. -fn git_blob_sha1_hex(data: &str) -> String { +/// Compute the Git SHA-1 blob object ID for the given content (bytes). +fn git_blob_sha1_hex_bytes(data: &[u8]) -> String { // Git blob hash is sha1 of: "blob \0" let header = format!("blob {}\0", data.len()); + use sha1::Digest; let mut hasher = sha1::Sha1::new(); hasher.update(header.as_bytes()); - hasher.update(data.as_bytes()); + hasher.update(data); let digest = hasher.finalize(); let mut out = String::with_capacity(40); for b in digest { @@ -391,6 +435,16 @@ fn file_mode_for_path(path: &Path) -> Option { } } +#[cfg(windows)] +fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool { + use std::path::Component; + let mut comps = p.components(); + matches!( + (comps.next(), comps.next(), comps.next()), + (Some(Component::Prefix(_)), Some(Component::RootDir), None) + ) +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -398,8 +452,14 @@ mod tests { use pretty_assertions::assert_eq; use tempfile::tempdir; + /// Compute the Git SHA-1 blob object ID for the given content (string). + /// This delegates to the bytes version to avoid UTF-8 lossy conversions here. + fn git_blob_sha1_hex(data: &str) -> String { + git_blob_sha1_hex_bytes(data.as_bytes()) + } + fn normalize_diff_for_test(input: &str, root: &Path) -> String { - let root_str = root.display().to_string(); + let root_str = root.display().to_string().replace('\\', "/"); let replaced = input.replace(&root_str, ""); // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin let mut blocks: Vec = Vec::new(); @@ -535,6 +595,52 @@ mod tests { assert_eq!(out, expected); } + #[test] + fn move_without_content_change_yields_no_diff() { + let dir = tempdir().unwrap(); + let src = dir.path().join("moved.txt"); + let dest = dir.path().join("renamed.txt"); + fs::write(&src, "same\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes).unwrap(); + + // Simulate apply: move only, no content change. + fs::rename(&src, &dest).unwrap(); + + let diff = acc.get_unified_diff().unwrap(); + assert_eq!(diff, None); + } + + #[test] + fn move_declared_but_file_only_appears_at_dest_is_add() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dest.txt"); + let mut acc = TurnDiffTracker::new(); + let mv = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".into(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv).unwrap(); + // No file existed initially; create only dest + fs::write(&dest, "hello\n").unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); + assert!(diff.contains("new file mode")); + assert!(diff.contains("--- /dev/null")); + assert!(diff.contains("+++ b/")); + } + #[test] fn update_persists_across_new_baseline_for_new_file() { let dir = tempdir().unwrap(); @@ -587,4 +693,41 @@ mod tests { }; assert_eq!(combined, expected); } + + #[test] + fn binary_files_differ_update() { + let dir = tempdir().unwrap(); + let file = dir.path().join("bin.dat"); + + // Initial non-UTF8 bytes + let left_bytes: Vec = vec![0xff, 0xfe, 0xfd, 0x00]; + // Updated non-UTF8 bytes + let right_bytes: Vec = vec![0x01, 0x02, 0x03, 0x00]; + + fs::write(&file, &left_bytes).unwrap(); + + let mut acc = TurnDiffTracker::new(); + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes).unwrap(); + + // Apply update on disk + fs::write(&file, &right_bytes).unwrap(); + + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex_bytes(&left_bytes); + let right_oid = git_blob_sha1_hex_bytes(&right_bytes); + format!( + "diff --git a//bin.dat b//bin.dat\nindex {left_oid}..{right_oid}\n--- a//bin.dat\n+++ b//bin.dat\nBinary files differ\n" + ) + }; + assert_eq!(diff, expected); + } } From df0f792663e515ca4a444745bb0a0a4bd32338e8 Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Sat, 2 Aug 2025 18:54:14 -0500 Subject: [PATCH 18/20] CI --- codex-rs/core/src/turn_diff_tracker.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 5fb99adfef..62d5afef14 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -430,6 +430,8 @@ fn file_mode_for_path(path: &Path) -> Option { } #[cfg(not(unix))] { + // Suppress unused variable warning + let _ = path; // Default to non-executable on non-unix. Some("100644".to_string()) } From 7d354d7b12c75218ebd65c0da3a068c15edab29a Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Sun, 3 Aug 2025 09:03:57 -0500 Subject: [PATCH 19/20] Improved symlink handling --- codex-rs/core/src/turn_diff_tracker.rs | 146 +++++++++++++++---------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 62d5afef14..022d914a2a 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -6,6 +6,7 @@ use std::process::Command; use anyhow::Context; use anyhow::Result; +use anyhow::anyhow; use uuid::Uuid; use crate::protocol::FileChange; @@ -28,11 +29,11 @@ struct BaselineFileInfo { pub struct TurnDiffTracker { /// Map external path -> internal filename (uuid + same extension). external_to_temp_name: HashMap, - /// Internal filename -> external path as of current accumulated state (after applying all changes). - /// This is where renames are tracked. - temp_name_to_current_external: HashMap, /// Internal filename -> baseline file info. baseline_file_info: HashMap, + /// Internal filename -> external path as of current accumulated state (after applying all changes). + /// This is where renames are tracked. + temp_name_to_current_path: HashMap, /// Cache of known git worktree roots to avoid repeated filesystem walks. git_root_cache: Vec, } @@ -53,17 +54,22 @@ impl TurnDiffTracker { let internal = uuid_filename_for(path); self.external_to_temp_name .insert(path.clone(), internal.clone()); - self.temp_name_to_current_external + self.temp_name_to_current_path .insert(internal.clone(), path.clone()); // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. let (contents_bytes, mode, oid) = if path.exists() { - let contents_bytes = fs::read(path) - .with_context(|| format!("failed to read original {}", path.display()))?; let mode = file_mode_for_path(path); - let oid = self - .git_blob_oid_for_path(path) - .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes)); + let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE); + let contents_bytes = blob_bytes(path, mode_str) + .unwrap_or_default() + .unwrap_or_default(); + let oid = if mode.as_deref() == Some(SYMLINK_MODE) { + git_blob_sha1_hex_bytes(&contents_bytes) + } else { + self.git_blob_oid_for_path(path) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes)) + }; (Some(contents_bytes), mode, Some(oid)) } else { (None, None, Some(ZERO_OID.to_string())) @@ -91,8 +97,6 @@ impl TurnDiffTracker { None => { // This should be rare, but if we haven't mapped the source, create it with no baseline. let i = uuid_filename_for(path); - self.external_to_temp_name.insert(path.clone(), i.clone()); - // No on-disk file read here; treat as addition. self.baseline_file_info.insert( i.clone(), BaselineFileInfo { @@ -106,7 +110,7 @@ impl TurnDiffTracker { } }; // Update current external mapping for temp file name. - self.temp_name_to_current_external + self.temp_name_to_current_path .insert(uuid_filename.clone(), dest.clone()); // Update forward file_mapping: external current -> internal name. self.external_to_temp_name.remove(path); @@ -119,7 +123,7 @@ impl TurnDiffTracker { } fn get_path_for_internal(&self, internal: &str) -> Option { - self.temp_name_to_current_external + self.temp_name_to_current_path .get(internal) .cloned() .or_else(|| { @@ -241,22 +245,21 @@ impl TurnDiffTracker { None => continue, }; + // Determine modes early; needed to read symlink content correctly. + let baseline_mode = self + .baseline_file_info + .get(&internal) + .and_then(|i| i.mode.clone()) + .unwrap_or_else(|| REGULAR_MODE.to_string()); + let current_mode = + file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string()); + let left_bytes = self .baseline_file_info .get(&internal) .and_then(|i| i.contents_bytes.clone()); - let right_bytes = if current_external.exists() { - let contents = fs::read(¤t_external).with_context(|| { - format!( - "failed to read current file for diff {}", - current_external.display() - ) - })?; - Some(contents) - } else { - None - }; + let right_bytes = blob_bytes(¤t_external, ¤t_mode)?; // Fast path: identical bytes or both missing. if left_bytes.as_deref() == right_bytes.as_deref() { @@ -272,15 +275,6 @@ impl TurnDiffTracker { let is_add = left_bytes.is_none() && right_bytes.is_some(); let is_delete = left_bytes.is_some() && right_bytes.is_none(); - // Determine modes. - let baseline_mode = self - .baseline_file_info - .get(&internal) - .and_then(|i| i.mode.clone()) - .unwrap_or_else(|| "100644".to_string()); - let current_mode = - file_mode_for_path(¤t_external).unwrap_or_else(|| "100644".to_string()); - if is_add { aggregated.push_str(&format!("new file mode {current_mode}\n")); } else if is_delete { @@ -304,8 +298,12 @@ impl TurnDiffTracker { }) .unwrap_or_else(|| ZERO_OID.to_string()); let right_oid = if let Some(b) = right_bytes.as_ref() { - self.git_blob_oid_for_path(¤t_external) - .unwrap_or_else(|| git_blob_sha1_hex_bytes(b)) + if current_mode == SYMLINK_MODE { + git_blob_sha1_hex_bytes(b) + } else { + self.git_blob_oid_for_path(¤t_external) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(b)) + } } else { ZERO_OID.to_string() }; @@ -396,8 +394,6 @@ fn uuid_filename_for(path: &Path) -> String { } } -const ZERO_OID: &str = "0000000000000000000000000000000000000000"; - /// Compute the Git SHA-1 blob object ID for the given content (bytes). fn git_blob_sha1_hex_bytes(data: &[u8]) -> String { // Git blob hash is sha1 of: "blob \0" @@ -415,28 +411,62 @@ fn git_blob_sha1_hex_bytes(data: &[u8]) -> String { out } +const ZERO_OID: &str = "0000000000000000000000000000000000000000"; +const REGULAR_MODE: &str = "100644"; +const EXECUTABLE_MODE: &str = "100755"; +const SYMLINK_MODE: &str = "120000"; + +#[cfg(unix)] fn file_mode_for_path(path: &Path) -> Option { - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - let meta = fs::metadata(path).ok()?; - let mode = meta.permissions().mode(); - let is_exec = (mode & 0o111) != 0; - Some(if is_exec { - "100755".to_string() - } else { - "100644".to_string() - }) + use std::os::unix::fs::PermissionsExt; + let meta = fs::symlink_metadata(path).ok()?; + let ft = meta.file_type(); + if ft.is_symlink() { + return Some(SYMLINK_MODE.to_string()); } - #[cfg(not(unix))] - { - // Suppress unused variable warning - let _ = path; - // Default to non-executable on non-unix. - Some("100644".to_string()) + let mode = meta.permissions().mode(); + let is_exec = (mode & 0o111) != 0; + Some(if is_exec { + EXECUTABLE_MODE.into() + } else { + REGULAR_MODE.into() + }) +} + +#[cfg(not(unix))] +fn file_mode_for_path(_path: &Path) -> Option { + // Default to non-executable on non-unix. + Some(REGULAR_MODE.to_string()) +} + +fn blob_bytes(path: &Path, mode: &str) -> Result>> { + if path.exists() { + let contents = if mode == SYMLINK_MODE { + symlink_blob_bytes(path) + .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))? + } else { + fs::read(path).with_context(|| { + format!("failed to read current file for diff {}", path.display()) + })? + }; + Ok(Some(contents)) + } else { + Ok(None) } } +#[cfg(unix)] +fn symlink_blob_bytes(path: &Path) -> Option> { + use std::os::unix::ffi::OsStrExt; + let target = std::fs::read_link(path).ok()?; + Some(target.as_os_str().as_bytes().to_vec()) +} + +#[cfg(not(unix))] +fn symlink_blob_bytes(_path: &Path) -> Option> { + None +} + #[cfg(windows)] fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool { use std::path::Component; @@ -508,7 +538,7 @@ mod tests { let first = acc.get_unified_diff().unwrap().unwrap(); let first = normalize_diff_for_test(&first, dir.path()); let expected_first = { - let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); let right_oid = git_blob_sha1_hex("foo\n"); format!( "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1 @@\n+foo\n", @@ -531,7 +561,7 @@ mod tests { let combined = acc.get_unified_diff().unwrap().unwrap(); let combined = normalize_diff_for_test(&combined, dir.path()); let expected_combined = { - let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); let right_oid = git_blob_sha1_hex("foo\nbar\n"); format!( "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n", @@ -551,7 +581,7 @@ mod tests { acc.on_patch_begin(&del_changes).unwrap(); // Simulate apply: delete the file from disk. - let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string()); + let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); fs::remove_file(&file).unwrap(); let diff = acc.get_unified_diff().unwrap().unwrap(); let diff = normalize_diff_for_test(&diff, dir.path()); @@ -679,7 +709,7 @@ mod tests { let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); acc.on_patch_begin(&del_b).unwrap(); // Simulate apply: delete b.txt. - let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| "100644".to_string()); + let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| REGULAR_MODE.to_string()); fs::remove_file(&b).unwrap(); let combined = acc.get_unified_diff().unwrap().unwrap(); From 592e253dd9fe87e05c4e393c2ea9de7effe22e6b Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Sun, 3 Aug 2025 21:02:55 -0500 Subject: [PATCH 20/20] Clippy --- codex-rs/core/src/turn_diff_tracker.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 022d914a2a..69af4b6569 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -413,6 +413,7 @@ fn git_blob_sha1_hex_bytes(data: &[u8]) -> String { const ZERO_OID: &str = "0000000000000000000000000000000000000000"; const REGULAR_MODE: &str = "100644"; +#[cfg(unix)] const EXECUTABLE_MODE: &str = "100755"; const SYMLINK_MODE: &str = "120000";