Skip to content

Commit 89fba63

Browse files
committed
Improved symlink handling
1 parent df0f792 commit 89fba63

File tree

1 file changed

+86
-56
lines changed

1 file changed

+86
-56
lines changed

codex-rs/core/src/turn_diff_tracker.rs

Lines changed: 86 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::process::Command;
66

77
use anyhow::Context;
88
use anyhow::Result;
9+
use anyhow::anyhow;
910
use uuid::Uuid;
1011

1112
use crate::protocol::FileChange;
@@ -28,11 +29,11 @@ struct BaselineFileInfo {
2829
pub struct TurnDiffTracker {
2930
/// Map external path -> internal filename (uuid + same extension).
3031
external_to_temp_name: HashMap<PathBuf, String>,
31-
/// Internal filename -> external path as of current accumulated state (after applying all changes).
32-
/// This is where renames are tracked.
33-
temp_name_to_current_external: HashMap<String, PathBuf>,
3432
/// Internal filename -> baseline file info.
3533
baseline_file_info: HashMap<String, BaselineFileInfo>,
34+
/// Internal filename -> external path as of current accumulated state (after applying all changes).
35+
/// This is where renames are tracked.
36+
temp_name_to_current_path: HashMap<String, PathBuf>,
3637
/// Cache of known git worktree roots to avoid repeated filesystem walks.
3738
git_root_cache: Vec<PathBuf>,
3839
}
@@ -53,17 +54,22 @@ impl TurnDiffTracker {
5354
let internal = uuid_filename_for(path);
5455
self.external_to_temp_name
5556
.insert(path.clone(), internal.clone());
56-
self.temp_name_to_current_external
57+
self.temp_name_to_current_path
5758
.insert(internal.clone(), path.clone());
5859

5960
// If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
6061
let (contents_bytes, mode, oid) = if path.exists() {
61-
let contents_bytes = fs::read(path)
62-
.with_context(|| format!("failed to read original {}", path.display()))?;
6362
let mode = file_mode_for_path(path);
64-
let oid = self
65-
.git_blob_oid_for_path(path)
66-
.unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes));
63+
let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
64+
let contents_bytes = blob_bytes(path, mode_str)
65+
.unwrap_or_default()
66+
.unwrap_or_default();
67+
let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
68+
git_blob_sha1_hex_bytes(&contents_bytes)
69+
} else {
70+
self.git_blob_oid_for_path(path)
71+
.unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
72+
};
6773
(Some(contents_bytes), mode, Some(oid))
6874
} else {
6975
(None, None, Some(ZERO_OID.to_string()))
@@ -91,8 +97,6 @@ impl TurnDiffTracker {
9197
None => {
9298
// This should be rare, but if we haven't mapped the source, create it with no baseline.
9399
let i = uuid_filename_for(path);
94-
self.external_to_temp_name.insert(path.clone(), i.clone());
95-
// No on-disk file read here; treat as addition.
96100
self.baseline_file_info.insert(
97101
i.clone(),
98102
BaselineFileInfo {
@@ -106,7 +110,7 @@ impl TurnDiffTracker {
106110
}
107111
};
108112
// Update current external mapping for temp file name.
109-
self.temp_name_to_current_external
113+
self.temp_name_to_current_path
110114
.insert(uuid_filename.clone(), dest.clone());
111115
// Update forward file_mapping: external current -> internal name.
112116
self.external_to_temp_name.remove(path);
@@ -119,7 +123,7 @@ impl TurnDiffTracker {
119123
}
120124

121125
fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
122-
self.temp_name_to_current_external
126+
self.temp_name_to_current_path
123127
.get(internal)
124128
.cloned()
125129
.or_else(|| {
@@ -241,22 +245,21 @@ impl TurnDiffTracker {
241245
None => continue,
242246
};
243247

248+
// Determine modes early; needed to read symlink content correctly.
249+
let baseline_mode = self
250+
.baseline_file_info
251+
.get(&internal)
252+
.and_then(|i| i.mode.clone())
253+
.unwrap_or_else(|| REGULAR_MODE.to_string());
254+
let current_mode =
255+
file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
256+
244257
let left_bytes = self
245258
.baseline_file_info
246259
.get(&internal)
247260
.and_then(|i| i.contents_bytes.clone());
248261

249-
let right_bytes = if current_external.exists() {
250-
let contents = fs::read(&current_external).with_context(|| {
251-
format!(
252-
"failed to read current file for diff {}",
253-
current_external.display()
254-
)
255-
})?;
256-
Some(contents)
257-
} else {
258-
None
259-
};
262+
let right_bytes = blob_bytes(&current_external, &current_mode)?;
260263

261264
// Fast path: identical bytes or both missing.
262265
if left_bytes.as_deref() == right_bytes.as_deref() {
@@ -272,15 +275,6 @@ impl TurnDiffTracker {
272275
let is_add = left_bytes.is_none() && right_bytes.is_some();
273276
let is_delete = left_bytes.is_some() && right_bytes.is_none();
274277

275-
// Determine modes.
276-
let baseline_mode = self
277-
.baseline_file_info
278-
.get(&internal)
279-
.and_then(|i| i.mode.clone())
280-
.unwrap_or_else(|| "100644".to_string());
281-
let current_mode =
282-
file_mode_for_path(&current_external).unwrap_or_else(|| "100644".to_string());
283-
284278
if is_add {
285279
aggregated.push_str(&format!("new file mode {current_mode}\n"));
286280
} else if is_delete {
@@ -304,8 +298,12 @@ impl TurnDiffTracker {
304298
})
305299
.unwrap_or_else(|| ZERO_OID.to_string());
306300
let right_oid = if let Some(b) = right_bytes.as_ref() {
307-
self.git_blob_oid_for_path(&current_external)
308-
.unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
301+
if current_mode == SYMLINK_MODE {
302+
git_blob_sha1_hex_bytes(b)
303+
} else {
304+
self.git_blob_oid_for_path(&current_external)
305+
.unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
306+
}
309307
} else {
310308
ZERO_OID.to_string()
311309
};
@@ -397,6 +395,9 @@ fn uuid_filename_for(path: &Path) -> String {
397395
}
398396

399397
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
398+
const REGULAR_MODE: &str = "100644";
399+
const EXECUTABLE_MODE: &str = "100755";
400+
const SYMLINK_MODE: &str = "120000";
400401

401402
/// Compute the Git SHA-1 blob object ID for the given content (bytes).
402403
fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
@@ -415,28 +416,57 @@ fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
415416
out
416417
}
417418

419+
#[cfg(unix)]
418420
fn file_mode_for_path(path: &Path) -> Option<String> {
419-
#[cfg(unix)]
420-
{
421-
use std::os::unix::fs::PermissionsExt;
422-
let meta = fs::metadata(path).ok()?;
423-
let mode = meta.permissions().mode();
424-
let is_exec = (mode & 0o111) != 0;
425-
Some(if is_exec {
426-
"100755".to_string()
427-
} else {
428-
"100644".to_string()
429-
})
421+
use std::os::unix::fs::PermissionsExt;
422+
let meta = fs::symlink_metadata(path).ok()?;
423+
let ft = meta.file_type();
424+
if ft.is_symlink() {
425+
return Some(SYMLINK_MODE.to_string());
430426
}
431-
#[cfg(not(unix))]
432-
{
433-
// Suppress unused variable warning
434-
let _ = path;
435-
// Default to non-executable on non-unix.
436-
Some("100644".to_string())
427+
let mode = meta.permissions().mode();
428+
let is_exec = (mode & 0o111) != 0;
429+
Some(if is_exec {
430+
EXECUTABLE_MODE.into()
431+
} else {
432+
REGULAR_MODE.into()
433+
})
434+
}
435+
436+
#[cfg(not(unix))]
437+
fn file_mode_for_path(_path: &Path) -> Option<String> {
438+
// Default to non-executable on non-unix.
439+
Some(REGULAR_MODE.to_string())
440+
}
441+
442+
fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
443+
if path.exists() {
444+
let contents = if mode == SYMLINK_MODE {
445+
symlink_blob_bytes(path)
446+
.ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
447+
} else {
448+
fs::read(path).with_context(|| {
449+
format!("failed to read current file for diff {}", path.display())
450+
})?
451+
};
452+
Ok(Some(contents))
453+
} else {
454+
Ok(None)
437455
}
438456
}
439457

458+
#[cfg(unix)]
459+
fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
460+
use std::os::unix::ffi::OsStrExt;
461+
let target = std::fs::read_link(path).ok()?;
462+
Some(target.as_os_str().as_bytes().to_vec())
463+
}
464+
465+
#[cfg(not(unix))]
466+
fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
467+
None
468+
}
469+
440470
#[cfg(windows)]
441471
fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
442472
use std::path::Component;
@@ -508,7 +538,7 @@ mod tests {
508538
let first = acc.get_unified_diff().unwrap().unwrap();
509539
let first = normalize_diff_for_test(&first, dir.path());
510540
let expected_first = {
511-
let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string());
541+
let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
512542
let right_oid = git_blob_sha1_hex("foo\n");
513543
format!(
514544
"diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1 @@\n+foo\n",
@@ -531,7 +561,7 @@ mod tests {
531561
let combined = acc.get_unified_diff().unwrap().unwrap();
532562
let combined = normalize_diff_for_test(&combined, dir.path());
533563
let expected_combined = {
534-
let mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string());
564+
let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
535565
let right_oid = git_blob_sha1_hex("foo\nbar\n");
536566
format!(
537567
"diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n",
@@ -551,7 +581,7 @@ mod tests {
551581
acc.on_patch_begin(&del_changes).unwrap();
552582

553583
// Simulate apply: delete the file from disk.
554-
let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| "100644".to_string());
584+
let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
555585
fs::remove_file(&file).unwrap();
556586
let diff = acc.get_unified_diff().unwrap().unwrap();
557587
let diff = normalize_diff_for_test(&diff, dir.path());
@@ -679,7 +709,7 @@ mod tests {
679709
let del_b = HashMap::from([(b.clone(), FileChange::Delete)]);
680710
acc.on_patch_begin(&del_b).unwrap();
681711
// Simulate apply: delete b.txt.
682-
let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| "100644".to_string());
712+
let baseline_mode = file_mode_for_path(&b).unwrap_or_else(|| REGULAR_MODE.to_string());
683713
fs::remove_file(&b).unwrap();
684714

685715
let combined = acc.get_unified_diff().unwrap().unwrap();

0 commit comments

Comments
 (0)