diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 7d4e41d0b..eb4eccd89 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -699,6 +699,7 @@ dependencies = [ "serde_json", "sha1", "shlex", + "similar", "strum_macros 0.27.2", "tempfile", "thiserror 2.0.12", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index db3fd4f83..9586470b7 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -34,6 +34,7 @@ serde_json = "1" serde_bytes = "0.11" sha1 = "0.10.6" shlex = "1.3.0" +similar = "2" strum_macros = "0.27.2" thiserror = "2.0.12" time = { version = "0.3", features = ["formatting", "local-offset", "macros"] } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 7004dcfcb..a850faf99 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -85,11 +85,13 @@ use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::Submission; use crate::protocol::TaskCompleteEvent; +use crate::protocol::TurnDiffEvent; use crate::rollout::RolloutRecorder; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_safety_for_untrusted_command; use crate::shell; +use crate::turn_diff_tracker::TurnDiffTracker; use crate::user_notification::UserNotification; use crate::util::backoff; @@ -362,7 +364,11 @@ impl Session { } } - async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) { + async fn on_exec_command_begin( + &self, + turn_diff_tracker: &mut TurnDiffTracker, + exec_command_context: ExecCommandContext, + ) { let ExecCommandContext { sub_id, call_id, @@ -374,11 +380,15 @@ impl Session { Some(ApplyPatchCommandContext { user_explicitly_approved_this_action, changes, - }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent { - call_id, - auto_approved: !user_explicitly_approved_this_action, - changes, - }), + }) => { + let _ = turn_diff_tracker.on_patch_begin(&changes); + + EventMsg::PatchApplyBegin(PatchApplyBeginEvent { + call_id, + auto_approved: !user_explicitly_approved_this_action, + changes, + }) + } None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent { call_id, command: command_for_display.clone(), @@ -392,8 +402,10 @@ impl Session { let _ = self.tx_event.send(event).await; } - async fn notify_exec_command_end( + #[allow(clippy::too_many_arguments)] + async fn on_exec_command_end( &self, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, call_id: &str, output: &ExecToolCallOutput, @@ -433,6 +445,20 @@ impl Session { msg, }; let _ = self.tx_event.send(event).await; + + // If this is an apply_patch, after we emit the end patch, emit a second event + // with the full turn diff if there is one. + if is_apply_patch { + let unified_diff = turn_diff_tracker.get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.into(), + msg, + }; + let _ = self.tx_event.send(event).await; + } + } } /// Helper that emits a BackgroundEvent with the given message. This keeps @@ -1006,6 +1032,10 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { .await; let last_agent_message: Option; + // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains + // many turns, from the perspective of the user, it is a single turn. + let mut turn_diff_tracker = TurnDiffTracker::new(); + loop { // Note that pending_input would be something like a message the user // submitted through the UI while the model was running. Though the UI @@ -1037,7 +1067,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { }) }) .collect(); - match run_turn(&sess, sub_id.clone(), turn_input).await { + match run_turn(&sess, &mut turn_diff_tracker, sub_id.clone(), turn_input).await { Ok(turn_output) => { let mut items_to_record_in_conversation_history = Vec::::new(); let mut responses = Vec::::new(); @@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { async fn run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, input: Vec, ) -> CodexResult> { @@ -1177,7 +1208,7 @@ async fn run_turn( let mut retries = 0; loop { - match try_run_turn(sess, &sub_id, &prompt).await { + match try_run_turn(sess, turn_diff_tracker, &sub_id, &prompt).await { Ok(output) => return Ok(output), Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), @@ -1223,6 +1254,7 @@ struct ProcessedResponseItem { async fn try_run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, prompt: &Prompt, ) -> CodexResult> { @@ -1310,7 +1342,8 @@ async fn try_run_turn( match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { - let response = handle_response_item(sess, sub_id, item.clone()).await?; + let response = + handle_response_item(sess, turn_diff_tracker, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } @@ -1328,6 +1361,16 @@ async fn try_run_turn( .ok(); } + let unified_diff = turn_diff_tracker.get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.to_string(), + msg, + }; + let _ = sess.tx_event.send(event).await; + } + return Ok(output); } ResponseEvent::OutputTextDelta(delta) => { @@ -1432,6 +1475,7 @@ async fn run_compact_task( async fn handle_response_item( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, item: ResponseItem, ) -> CodexResult> { @@ -1469,7 +1513,17 @@ async fn handle_response_item( .. } => { info!("FunctionCall: {arguments}"); - Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await) + Some( + handle_function_call( + sess, + turn_diff_tracker, + sub_id.to_string(), + name, + arguments, + call_id, + ) + .await, + ) } ResponseItem::LocalShellCall { id, @@ -1504,6 +1558,7 @@ async fn handle_response_item( handle_container_exec_with_params( exec_params, sess, + turn_diff_tracker, sub_id.to_string(), effective_call_id, ) @@ -1521,6 +1576,7 @@ async fn handle_response_item( async fn handle_function_call( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, name: String, arguments: String, @@ -1534,7 +1590,8 @@ async fn handle_function_call( return *output; } }; - handle_container_exec_with_params(params, sess, sub_id, call_id).await + handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id) + .await } "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, _ => { @@ -1608,6 +1665,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams async fn handle_container_exec_with_params( params: ExecParams, sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, call_id: String, ) -> ResponseInputItem { @@ -1755,7 +1813,7 @@ async fn handle_container_exec_with_params( }, ), }; - sess.notify_exec_command_begin(exec_command_context.clone()) + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone()) .await; let params = maybe_run_with_user_profile(params, sess); @@ -1782,7 +1840,8 @@ async fn handle_container_exec_with_params( duration, } = &output; - sess.notify_exec_command_end( + sess.on_exec_command_end( + turn_diff_tracker, &sub_id, &call_id, &output, @@ -1806,7 +1865,15 @@ async fn handle_container_exec_with_params( } } Err(CodexErr::Sandbox(error)) => { - handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await + handle_sandbox_error( + turn_diff_tracker, + params, + exec_command_context, + error, + sandbox_type, + sess, + ) + .await } Err(e) => { // Handle non-sandbox errors @@ -1822,6 +1889,7 @@ async fn handle_container_exec_with_params( } async fn handle_sandbox_error( + turn_diff_tracker: &mut TurnDiffTracker, params: ExecParams, exec_command_context: ExecCommandContext, error: SandboxErr, @@ -1878,7 +1946,8 @@ async fn handle_sandbox_error( sess.notify_background_event(&sub_id, "retrying command without sandbox") .await; - sess.notify_exec_command_begin(exec_command_context).await; + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context) + .await; // This is an escalated retry; the policy will not be // examined and the sandbox has been set to `None`. @@ -1905,8 +1974,14 @@ async fn handle_sandbox_error( duration, } = &retry_output; - sess.notify_exec_command_end(&sub_id, &call_id, &retry_output, is_apply_patch) - .await; + sess.on_exec_command_end( + turn_diff_tracker, + &sub_id, + &call_id, + &retry_output, + is_apply_patch, + ) + .await; let is_success = *exit_code == 0; let content = format_exec_output( diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 80f901495..4f083d9e5 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -42,6 +42,7 @@ pub(crate) mod safety; pub mod seatbelt; pub mod shell; pub mod spawn; +pub mod turn_diff_tracker; mod user_notification; pub mod util; diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index cbb211d95..82591a2c7 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -387,6 +387,8 @@ pub enum EventMsg { /// Notification that a patch application has finished. PatchApplyEnd(PatchApplyEndEvent), + TurnDiff(TurnDiffEvent), + /// Response to GetHistoryEntryRequest. GetHistoryEntryResponse(GetHistoryEntryResponseEvent), @@ -598,6 +600,11 @@ pub struct PatchApplyEndEvent { pub success: bool, } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct TurnDiffEvent { + pub unified_diff: String, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct GetHistoryEntryResponseEvent { pub offset: usize, diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs new file mode 100644 index 000000000..69af4b656 --- /dev/null +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -0,0 +1,766 @@ +use std::collections::HashMap; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +use anyhow::Context; +use anyhow::Result; +use anyhow::anyhow; +use uuid::Uuid; + +use crate::protocol::FileChange; + +struct BaselineFileInfo { + path: Option, + contents_bytes: Option>, + mode: Option, + oid: Option, +} + +/// Tracks sets of changes to files and exposes the overall unified diff. +/// Internally, the way this works is now: +/// 1. Maintain an in-memory baseline snapshot of files when they are first seen. +/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null). +/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking. +/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory +/// using the `similar` crate and emit unified diffs with rewritten external paths. +#[derive(Default)] +pub struct TurnDiffTracker { + /// Map external path -> internal filename (uuid + same extension). + external_to_temp_name: HashMap, + /// Internal filename -> baseline file info. + baseline_file_info: HashMap, + /// Internal filename -> external path as of current accumulated state (after applying all changes). + /// This is where renames are tracked. + temp_name_to_current_path: HashMap, + /// Cache of known git worktree roots to avoid repeated filesystem walks. + git_root_cache: Vec, +} + +impl TurnDiffTracker { + pub fn new() -> Self { + Self::default() + } + + /// Front-run apply patch calls to track the starting contents of any modified files. + /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen. + /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions. + /// - Also updates internal mappings for move/rename events. + pub fn on_patch_begin(&mut self, changes: &HashMap) -> Result<()> { + for (path, change) in changes.iter() { + // Ensure a stable internal filename exists for this external path. + if !self.external_to_temp_name.contains_key(path) { + let internal = uuid_filename_for(path); + self.external_to_temp_name + .insert(path.clone(), internal.clone()); + self.temp_name_to_current_path + .insert(internal.clone(), path.clone()); + + // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. + let (contents_bytes, mode, oid) = if path.exists() { + let mode = file_mode_for_path(path); + let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE); + let contents_bytes = blob_bytes(path, mode_str) + .unwrap_or_default() + .unwrap_or_default(); + let oid = if mode.as_deref() == Some(SYMLINK_MODE) { + git_blob_sha1_hex_bytes(&contents_bytes) + } else { + self.git_blob_oid_for_path(path) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes)) + }; + (Some(contents_bytes), mode, Some(oid)) + } else { + (None, None, Some(ZERO_OID.to_string())) + }; + + self.baseline_file_info.insert( + internal.clone(), + BaselineFileInfo { + path: Some(path.clone()), + contents_bytes, + mode, + oid, + }, + ); + } + + // Track rename/move in current mapping if provided in an Update. + if let FileChange::Update { + move_path: Some(dest), + .. + } = change + { + let uuid_filename = match self.external_to_temp_name.get(path) { + Some(i) => i.clone(), + None => { + // This should be rare, but if we haven't mapped the source, create it with no baseline. + let i = uuid_filename_for(path); + self.baseline_file_info.insert( + i.clone(), + BaselineFileInfo { + path: Some(path.clone()), + contents_bytes: None, + mode: None, + oid: Some(ZERO_OID.to_string()), + }, + ); + i + } + }; + // Update current external mapping for temp file name. + self.temp_name_to_current_path + .insert(uuid_filename.clone(), dest.clone()); + // Update forward file_mapping: external current -> internal name. + self.external_to_temp_name.remove(path); + self.external_to_temp_name + .insert(dest.clone(), uuid_filename); + }; + } + + Ok(()) + } + + fn get_path_for_internal(&self, internal: &str) -> Option { + self.temp_name_to_current_path + .get(internal) + .cloned() + .or_else(|| { + self.baseline_file_info + .get(internal) + .and_then(|info| info.path.clone()) + }) + } + + /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry. + /// Uses a simple cache of known roots and avoids negative-result caching for simplicity. + fn find_git_root_cached(&mut self, start: &Path) -> Option { + let dir = if start.is_dir() { + start + } else { + start.parent()? + }; + + // Fast path: if any cached root is an ancestor of this path, use it. + if let Some(root) = self + .git_root_cache + .iter() + .find(|r| dir.starts_with(r)) + .cloned() + { + return Some(root); + } + + // Walk up to find a `.git` marker. + let mut cur = dir.to_path_buf(); + loop { + let git_marker = cur.join(".git"); + if git_marker.is_dir() || git_marker.is_file() { + if !self.git_root_cache.iter().any(|r| r == &cur) { + self.git_root_cache.push(cur.clone()); + } + return Some(cur); + } + + // On Windows, avoid walking above the drive or UNC share root. + #[cfg(windows)] + { + if is_windows_drive_or_unc_root(&cur) { + return None; + } + } + + if let Some(parent) = cur.parent() { + cur = parent.to_path_buf(); + } else { + return None; + } + } + } + + /// Return a display string for `path` relative to its git root if found, else absolute. + fn relative_to_git_root_str(&mut self, path: &Path) -> String { + let s = if let Some(root) = self.find_git_root_cached(path) { + if let Ok(rel) = path.strip_prefix(&root) { + rel.display().to_string() + } else { + path.display().to_string() + } + } else { + path.display().to_string() + }; + s.replace('\\', "/") + } + + /// Ask git to compute the blob SHA-1 for the file at `path` within its repository. + /// Returns None if no repository is found or git invocation fails. + fn git_blob_oid_for_path(&mut self, path: &Path) -> Option { + let root = self.find_git_root_cached(path)?; + // Compute a path relative to the repo root for better portability across platforms. + let rel = path.strip_prefix(&root).unwrap_or(path); + let output = Command::new("git") + .arg("-C") + .arg(&root) + .arg("hash-object") + .arg("--") + .arg(rel) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let s = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if s.len() == 40 { Some(s) } else { None } + } + + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were + /// collected before the first time they were touched by apply_patch during this turn with + /// the current repo state. + pub fn get_unified_diff(&mut self) -> Result> { + let mut aggregated = String::new(); + + // Compute diffs per tracked internal file in a stable order by external path. + let mut baseline_file_names: Vec = + self.baseline_file_info.keys().cloned().collect(); + // Sort lexicographically by full repo-relative path to match git behavior. + baseline_file_names.sort_by_key(|internal| { + self.get_path_for_internal(internal) + .map(|p| self.relative_to_git_root_str(&p)) + .unwrap_or_default() + }); + + for internal in baseline_file_names { + // Baseline external must exist for any tracked internal. + let baseline_external = match self + .baseline_file_info + .get(&internal) + .and_then(|i| i.path.clone()) + { + Some(p) => p, + None => continue, + }; + let current_external = match self.get_path_for_internal(&internal) { + Some(p) => p, + None => continue, + }; + + // Determine modes early; needed to read symlink content correctly. + let baseline_mode = self + .baseline_file_info + .get(&internal) + .and_then(|i| i.mode.clone()) + .unwrap_or_else(|| REGULAR_MODE.to_string()); + let current_mode = + file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string()); + + let left_bytes = self + .baseline_file_info + .get(&internal) + .and_then(|i| i.contents_bytes.clone()); + + let right_bytes = blob_bytes(¤t_external, ¤t_mode)?; + + // Fast path: identical bytes or both missing. + if left_bytes.as_deref() == right_bytes.as_deref() { + continue; + } + + let left_display = self.relative_to_git_root_str(&baseline_external); + let right_display = self.relative_to_git_root_str(¤t_external); + + // Emit a git-style header for better readability and parity with previous behavior. + aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); + + let is_add = left_bytes.is_none() && right_bytes.is_some(); + let is_delete = left_bytes.is_some() && right_bytes.is_none(); + + if is_add { + aggregated.push_str(&format!("new file mode {current_mode}\n")); + } else if is_delete { + aggregated.push_str(&format!("deleted file mode {baseline_mode}\n")); + } else if baseline_mode != current_mode { + aggregated.push_str(&format!("old mode {baseline_mode}\n")); + aggregated.push_str(&format!("new mode {current_mode}\n")); + } + + // Determine blob object IDs for left and right contents. Prefer stored OIDs + // captured from the original repo state when the change was first seen. + let left_oid = self + .baseline_file_info + .get(&internal) + .and_then(|i| i.oid.clone()) + .or_else(|| { + left_bytes + .as_ref() + .map(|b| git_blob_sha1_hex_bytes(b)) + .or(Some(ZERO_OID.to_string())) + }) + .unwrap_or_else(|| ZERO_OID.to_string()); + let right_oid = if let Some(b) = right_bytes.as_ref() { + if current_mode == SYMLINK_MODE { + git_blob_sha1_hex_bytes(b) + } else { + self.git_blob_oid_for_path(¤t_external) + .unwrap_or_else(|| git_blob_sha1_hex_bytes(b)) + } + } else { + ZERO_OID.to_string() + }; + + // If either side isn't valid UTF-8, emit a binary diff header and continue. + let left_text = left_bytes + .as_deref() + .and_then(|b| std::str::from_utf8(b).ok()); + let right_text = right_bytes + .as_deref() + .and_then(|b| std::str::from_utf8(b).ok()); + + // Prefer text diffs when possible: + // - both sides are valid UTF-8 + // - OR one side is missing (add/delete) and the present side is valid UTF-8 + let can_text_diff = match (left_text, right_text, is_add, is_delete) { + (Some(_), Some(_), _, _) => true, + (_, Some(_), true, _) => true, // add: left missing, right text + (Some(_), _, _, true) => true, // delete: left text, right missing + _ => false, + }; + + if can_text_diff { + // Diff the contents as text, treating missing side as empty string. + let l = left_text.unwrap_or(""); + let r = right_text.unwrap_or(""); + + // Emit index line without mode suffix to preserve current test expectations. + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + + let old_header = if left_bytes.is_some() { + format!("a/{left_display}") + } else { + "/dev/null".to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + "/dev/null".to_string() + }; + + let diff = similar::TextDiff::from_lines(l, r); + let unified = diff + .unified_diff() + .context_radius(3) + .header(&old_header, &new_header) + .to_string(); + + aggregated.push_str(&unified); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } + } else { + // Binary or invalid UTF-8: emit header only. + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + let old_header = if left_bytes.is_some() { + format!("a/{left_display}") + } else { + "/dev/null".to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + "/dev/null".to_string() + }; + aggregated.push_str(&format!("--- {old_header}\n")); + aggregated.push_str(&format!("+++ {new_header}\n")); + aggregated.push_str("Binary files differ\n"); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } + } + } + + if aggregated.trim().is_empty() { + Ok(None) + } else { + Ok(Some(aggregated)) + } + } +} + +fn uuid_filename_for(path: &Path) -> String { + let id = Uuid::new_v4().to_string(); + match path.extension().and_then(|e| e.to_str()) { + Some(ext) if !ext.is_empty() => format!("{id}.{ext}"), + _ => id, + } +} + +/// Compute the Git SHA-1 blob object ID for the given content (bytes). +fn git_blob_sha1_hex_bytes(data: &[u8]) -> String { + // Git blob hash is sha1 of: "blob \0" + let header = format!("blob {}\0", data.len()); + use sha1::Digest; + let mut hasher = sha1::Sha1::new(); + hasher.update(header.as_bytes()); + hasher.update(data); + let digest = hasher.finalize(); + let mut out = String::with_capacity(40); + for b in digest { + use std::fmt::Write; + let _ = write!(&mut out, "{b:02x}"); + } + out +} + +const ZERO_OID: &str = "0000000000000000000000000000000000000000"; +const REGULAR_MODE: &str = "100644"; +#[cfg(unix)] +const EXECUTABLE_MODE: &str = "100755"; +const SYMLINK_MODE: &str = "120000"; + +#[cfg(unix)] +fn file_mode_for_path(path: &Path) -> Option { + use std::os::unix::fs::PermissionsExt; + let meta = fs::symlink_metadata(path).ok()?; + let ft = meta.file_type(); + if ft.is_symlink() { + return Some(SYMLINK_MODE.to_string()); + } + let mode = meta.permissions().mode(); + let is_exec = (mode & 0o111) != 0; + Some(if is_exec { + EXECUTABLE_MODE.into() + } else { + REGULAR_MODE.into() + }) +} + +#[cfg(not(unix))] +fn file_mode_for_path(_path: &Path) -> Option { + // Default to non-executable on non-unix. + Some(REGULAR_MODE.to_string()) +} + +fn blob_bytes(path: &Path, mode: &str) -> Result>> { + if path.exists() { + let contents = if mode == SYMLINK_MODE { + symlink_blob_bytes(path) + .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))? + } else { + fs::read(path).with_context(|| { + format!("failed to read current file for diff {}", path.display()) + })? + }; + Ok(Some(contents)) + } else { + Ok(None) + } +} + +#[cfg(unix)] +fn symlink_blob_bytes(path: &Path) -> Option> { + use std::os::unix::ffi::OsStrExt; + let target = std::fs::read_link(path).ok()?; + Some(target.as_os_str().as_bytes().to_vec()) +} + +#[cfg(not(unix))] +fn symlink_blob_bytes(_path: &Path) -> Option> { + None +} + +#[cfg(windows)] +fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool { + use std::path::Component; + let mut comps = p.components(); + matches!( + (comps.next(), comps.next(), comps.next()), + (Some(Component::Prefix(_)), Some(Component::RootDir), None) + ) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + use super::*; + use pretty_assertions::assert_eq; + use tempfile::tempdir; + + /// Compute the Git SHA-1 blob object ID for the given content (string). + /// This delegates to the bytes version to avoid UTF-8 lossy conversions here. + fn git_blob_sha1_hex(data: &str) -> String { + git_blob_sha1_hex_bytes(data.as_bytes()) + } + + fn normalize_diff_for_test(input: &str, root: &Path) -> String { + let root_str = root.display().to_string().replace('\\', "/"); + let replaced = input.replace(&root_str, ""); + // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin + let mut blocks: Vec = Vec::new(); + let mut current = String::new(); + for line in replaced.lines() { + if line.starts_with("diff --git ") && !current.is_empty() { + blocks.push(current); + current = String::new(); + } + if !current.is_empty() { + current.push('\n'); + } + current.push_str(line); + } + if !current.is_empty() { + blocks.push(current); + } + blocks.sort(); + let mut out = blocks.join("\n"); + if !out.ends_with('\n') { + out.push('\n'); + } + out + } + + #[test] + fn accumulates_add_and_update() { + let mut acc = TurnDiffTracker::new(); + + let dir = tempdir().unwrap(); + let file = dir.path().join("a.txt"); + + // First patch: add file (baseline should be /dev/null). + let add_changes = HashMap::from([( + file.clone(), + FileChange::Add { + content: "foo\n".to_string(), + }, + )]); + acc.on_patch_begin(&add_changes).unwrap(); + + // Simulate apply: create the file on disk. + fs::write(&file, "foo\n").unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = { + let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); + let right_oid = git_blob_sha1_hex("foo\n"); + format!( + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1 @@\n+foo\n", + ) + }; + assert_eq!(first, expected_first); + + // Second patch: update the file on disk. + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes).unwrap(); + + // Simulate apply: append a new line. + fs::write(&file, "foo\nbar\n").unwrap(); + let combined = acc.get_unified_diff().unwrap().unwrap(); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected_combined = { + let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); + format!( + "diff --git a//a.txt b//a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b//a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n", + ) + }; + assert_eq!(combined, expected_combined); + } + + #[test] + fn accumulates_delete() { + let dir = tempdir().unwrap(); + let file = dir.path().join("b.txt"); + fs::write(&file, "x\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_changes).unwrap(); + + // Simulate apply: delete the file from disk. + let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string()); + fs::remove_file(&file).unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex("x\n"); + format!( + "diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n", + ) + }; + assert_eq!(diff, expected); + } + + #[test] + fn accumulates_move_and_update() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dst.txt"); + fs::write(&src, "line\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes).unwrap(); + + // Simulate apply: move and update content. + fs::rename(&src, &dest).unwrap(); + fs::write(&dest, "line2\n").unwrap(); + + let out = acc.get_unified_diff().unwrap().unwrap(); + let out = normalize_diff_for_test(&out, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex("line\n"); + let right_oid = git_blob_sha1_hex("line2\n"); + format!( + "diff --git a//src.txt b//dst.txt\nindex {left_oid}..{right_oid}\n--- a//src.txt\n+++ b//dst.txt\n@@ -1 +1 @@\n-line\n+line2\n" + ) + }; + assert_eq!(out, expected); + } + + #[test] + fn move_without_content_change_yields_no_diff() { + let dir = tempdir().unwrap(); + let src = dir.path().join("moved.txt"); + let dest = dir.path().join("renamed.txt"); + fs::write(&src, "same\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes).unwrap(); + + // Simulate apply: move only, no content change. + fs::rename(&src, &dest).unwrap(); + + let diff = acc.get_unified_diff().unwrap(); + assert_eq!(diff, None); + } + + #[test] + fn move_declared_but_file_only_appears_at_dest_is_add() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dest.txt"); + let mut acc = TurnDiffTracker::new(); + let mv = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".into(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv).unwrap(); + // No file existed initially; create only dest + fs::write(&dest, "hello\n").unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); + assert!(diff.contains("new file mode")); + assert!(diff.contains("--- /dev/null")); + assert!(diff.contains("+++ b/")); + } + + #[test] + fn update_persists_across_new_baseline_for_new_file() { + let dir = tempdir().unwrap(); + let a = dir.path().join("a.txt"); + let b = dir.path().join("b.txt"); + fs::write(&a, "foo\n").unwrap(); + fs::write(&b, "z\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + + // First: update existing a.txt (baseline snapshot is created for a). + let update_a = HashMap::from([( + a.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_a).unwrap(); + // Simulate apply: modify a.txt on disk. + fs::write(&a, "foo\nbar\n").unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = { + let left_oid = git_blob_sha1_hex("foo\n"); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); + format!( + "diff --git a//a.txt b//a.txt\nindex {left_oid}..{right_oid}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n" + ) + }; + assert_eq!(first, expected_first); + + // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. + let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_b).unwrap(); + // Simulate apply: delete b.txt. + let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| REGULAR_MODE.to_string()); + fs::remove_file(&b).unwrap(); + + let combined = acc.get_unified_diff().unwrap().unwrap(); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected = { + let left_oid_a = git_blob_sha1_hex("foo\n"); + let right_oid_a = git_blob_sha1_hex("foo\nbar\n"); + let left_oid_b = git_blob_sha1_hex("z\n"); + format!( + "diff --git a//a.txt b//a.txt\nindex {left_oid_a}..{right_oid_a}\n--- a//a.txt\n+++ b//a.txt\n@@ -1 +1,2 @@\n foo\n+bar\n\ + diff --git a//b.txt b//b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid_b}..{ZERO_OID}\n--- a//b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-z\n", + ) + }; + assert_eq!(combined, expected); + } + + #[test] + fn binary_files_differ_update() { + let dir = tempdir().unwrap(); + let file = dir.path().join("bin.dat"); + + // Initial non-UTF8 bytes + let left_bytes: Vec = vec![0xff, 0xfe, 0xfd, 0x00]; + // Updated non-UTF8 bytes + let right_bytes: Vec = vec![0x01, 0x02, 0x03, 0x00]; + + fs::write(&file, &left_bytes).unwrap(); + + let mut acc = TurnDiffTracker::new(); + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes).unwrap(); + + // Apply update on disk + fs::write(&file, &right_bytes).unwrap(); + + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex_bytes(&left_bytes); + let right_oid = git_blob_sha1_hex_bytes(&right_bytes); + format!( + "diff --git a//bin.dat b//bin.dat\nindex {left_oid}..{right_oid}\n--- a//bin.dat\n+++ b//bin.dat\nBinary files differ\n" + ) + }; + assert_eq!(diff, expected); + } +} diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 72e2f9298..c290d9336 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TokenUsage; +use codex_core::protocol::TurnDiffEvent; use owo_colors::OwoColorize; use owo_colors::Style; use shlex::try_join; @@ -399,6 +400,7 @@ impl EventProcessor for EventProcessorWithHumanOutput { stdout, stderr, success, + .. }) => { let patch_begin = self.call_id_to_patch.remove(&call_id); @@ -428,6 +430,10 @@ impl EventProcessor for EventProcessorWithHumanOutput { println!("{}", line.style(self.dimmed)); } } + EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => { + ts_println!(self, "{}", "turn diff:".style(self.magenta)); + println!("{unified_diff}"); + } EventMsg::ExecApprovalRequest(_) => { // Should we exit? } diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index d489ffe07..205dfa463 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner( | EventMsg::BackgroundEvent(_) | EventMsg::PatchApplyBegin(_) | EventMsg::PatchApplyEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::GetHistoryEntryResponse(_) | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete => { diff --git a/codex-rs/mcp-server/src/conversation_loop.rs b/codex-rs/mcp-server/src/conversation_loop.rs index 534275181..1db39a230 100644 --- a/codex-rs/mcp-server/src/conversation_loop.rs +++ b/codex-rs/mcp-server/src/conversation_loop.rs @@ -97,6 +97,7 @@ pub async fn run_conversation_loop( | EventMsg::McpToolCallEnd(_) | EventMsg::ExecCommandBegin(_) | EventMsg::ExecCommandEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::BackgroundEvent(_) | EventMsg::ExecCommandOutputDelta(_) | EventMsg::PatchApplyBegin(_)