Skip to content

Commit 221ebfc

Browse files
authored
fix: run apply_patch calls through the sandbox (#1705)
Building on the work of #1702, this changes how a shell call to `apply_patch` is handled. Previously, a shell call to `apply_patch` was always handled in-process, never leveraging a sandbox. To determine whether the `apply_patch` operation could be auto-approved, the `is_write_patch_constrained_to_writable_paths()` function would check if all the paths listed in the paths were writable. If so, the agent would apply the changes listed in the patch. Unfortunately, this approach afforded a loophole: symlinks! * For a soft link, we could fix this issue by tracing the link and checking whether the target is in the set of writable paths, however... * ...For a hard link, things are not as simple. We can run `stat FILE` to see if the number of links is greater than 1, but then we would have to do something potentially expensive like `find . -inum <inode_number>` to find the other paths for `FILE`. Further, even if this worked, this approach runs the risk of a [TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use) race condition, so it is not robust. The solution, implemented in this PR, is to take the virtual execution of the `apply_patch` CLI into an _actual_ execution using `codex --codex-run-as-apply-patch PATCH`, which we can run under the sandbox the user specified, just like any other `shell` call. This, of course, assumes that the sandbox prevents writing through symlinks as a mechanism to write to folders that are not in the writable set configured by the sandbox. I verified this by testing the following on both Mac and Linux: ```shell #!/usr/bin/env bash set -euo pipefail # Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR? # Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory. SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) # EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it. EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) echo "SANDBOX_DIR: $SANDBOX_DIR" echo "EXPLOIT_DIR: $EXPLOIT_DIR" cleanup() { # Only remove if it looks sane and still exists [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR" [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR" } trap cleanup EXIT echo "I am the original content" > "${EXPLOIT_DIR}/original.txt" # Drop the -s to test hard links. ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt" cat "${SANDBOX_DIR}/link-to-original.txt" if [[ "$(uname)" == "Linux" ]]; then SANDBOX_SUBCOMMAND=landlock else SANDBOX_SUBCOMMAND=seatbelt fi # Attempt the exploit cd "${SANDBOX_DIR}" codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true cat "${EXPLOIT_DIR}/original.txt" ``` Admittedly, this change merits a proper integration test, but I think I will have to do that in a follow-up PR.
1 parent 301ec72 commit 221ebfc

File tree

8 files changed

+255
-95
lines changed

8 files changed

+255
-95
lines changed

codex-rs/apply-patch/src/lib.rs

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,24 @@ impl PartialEq for IoError {
5858

5959
#[derive(Debug, PartialEq)]
6060
pub enum MaybeApplyPatch {
61-
Body(Vec<Hunk>),
61+
Body(ApplyPatchArgs),
6262
ShellParseError(ExtractHeredocError),
6363
PatchParseError(ParseError),
6464
NotApplyPatch,
6565
}
6666

67+
/// Both the raw PATCH argument to `apply_patch` as well as the PATCH argument
68+
/// parsed into hunks.
69+
#[derive(Debug, PartialEq)]
70+
pub struct ApplyPatchArgs {
71+
pub patch: String,
72+
pub hunks: Vec<Hunk>,
73+
}
74+
6775
pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
6876
match argv {
6977
[cmd, body] if cmd == "apply_patch" => match parse_patch(body) {
70-
Ok(hunks) => MaybeApplyPatch::Body(hunks),
78+
Ok(source) => MaybeApplyPatch::Body(source),
7179
Err(e) => MaybeApplyPatch::PatchParseError(e),
7280
},
7381
[bash, flag, script]
@@ -77,7 +85,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
7785
{
7886
match extract_heredoc_body_from_apply_patch_command(script) {
7987
Ok(body) => match parse_patch(&body) {
80-
Ok(hunks) => MaybeApplyPatch::Body(hunks),
88+
Ok(source) => MaybeApplyPatch::Body(source),
8189
Err(e) => MaybeApplyPatch::PatchParseError(e),
8290
},
8391
Err(e) => MaybeApplyPatch::ShellParseError(e),
@@ -116,11 +124,19 @@ pub enum MaybeApplyPatchVerified {
116124
NotApplyPatch,
117125
}
118126

119-
#[derive(Debug, PartialEq)]
120127
/// ApplyPatchAction is the result of parsing an `apply_patch` command. By
121128
/// construction, all paths should be absolute paths.
129+
#[derive(Debug, PartialEq)]
122130
pub struct ApplyPatchAction {
123131
changes: HashMap<PathBuf, ApplyPatchFileChange>,
132+
133+
/// The raw patch argument that can be used with `apply_patch` as an exec
134+
/// call. i.e., if the original arg was parsed in "lenient" mode with a
135+
/// heredoc, this should be the value without the heredoc wrapper.
136+
pub patch: String,
137+
138+
/// The working directory that was used to resolve relative paths in the patch.
139+
pub cwd: PathBuf,
124140
}
125141

126142
impl ApplyPatchAction {
@@ -140,16 +156,36 @@ impl ApplyPatchAction {
140156
panic!("path must be absolute");
141157
}
142158

159+
#[allow(clippy::expect_used)]
160+
let filename = path
161+
.file_name()
162+
.expect("path should not be empty")
163+
.to_string_lossy();
164+
let patch = format!(
165+
r#"*** Begin Patch
166+
*** Update File: {filename}
167+
@@
168+
+ {content}
169+
*** End Patch"#,
170+
);
143171
let changes = HashMap::from([(path.to_path_buf(), ApplyPatchFileChange::Add { content })]);
144-
Self { changes }
172+
#[allow(clippy::expect_used)]
173+
Self {
174+
changes,
175+
cwd: path
176+
.parent()
177+
.expect("path should have parent")
178+
.to_path_buf(),
179+
patch,
180+
}
145181
}
146182
}
147183

148184
/// cwd must be an absolute path so that we can resolve relative paths in the
149185
/// patch.
150186
pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
151187
match maybe_parse_apply_patch(argv) {
152-
MaybeApplyPatch::Body(hunks) => {
188+
MaybeApplyPatch::Body(ApplyPatchArgs { patch, hunks }) => {
153189
let mut changes = HashMap::new();
154190
for hunk in hunks {
155191
let path = hunk.resolve_path(cwd);
@@ -183,7 +219,11 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
183219
}
184220
}
185221
}
186-
MaybeApplyPatchVerified::Body(ApplyPatchAction { changes })
222+
MaybeApplyPatchVerified::Body(ApplyPatchAction {
223+
changes,
224+
patch,
225+
cwd: cwd.to_path_buf(),
226+
})
187227
}
188228
MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
189229
MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
@@ -264,7 +304,7 @@ pub fn apply_patch(
264304
stderr: &mut impl std::io::Write,
265305
) -> Result<(), ApplyPatchError> {
266306
let hunks = match parse_patch(patch) {
267-
Ok(hunks) => hunks,
307+
Ok(source) => source.hunks,
268308
Err(e) => {
269309
match &e {
270310
InvalidPatchError(message) => {
@@ -652,7 +692,7 @@ mod tests {
652692
]);
653693

654694
match maybe_parse_apply_patch(&args) {
655-
MaybeApplyPatch::Body(hunks) => {
695+
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
656696
assert_eq!(
657697
hunks,
658698
vec![Hunk::AddFile {
@@ -679,7 +719,7 @@ PATCH"#,
679719
]);
680720

681721
match maybe_parse_apply_patch(&args) {
682-
MaybeApplyPatch::Body(hunks) => {
722+
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
683723
assert_eq!(
684724
hunks,
685725
vec![Hunk::AddFile {
@@ -954,7 +994,7 @@ PATCH"#,
954994
));
955995
let patch = parse_patch(&patch).unwrap();
956996

957-
let update_file_chunks = match patch.as_slice() {
997+
let update_file_chunks = match patch.hunks.as_slice() {
958998
[Hunk::UpdateFile { chunks, .. }] => chunks,
959999
_ => panic!("Expected a single UpdateFile hunk"),
9601000
};
@@ -992,7 +1032,7 @@ PATCH"#,
9921032
));
9931033

9941034
let patch = parse_patch(&patch).unwrap();
995-
let chunks = match patch.as_slice() {
1035+
let chunks = match patch.hunks.as_slice() {
9961036
[Hunk::UpdateFile { chunks, .. }] => chunks,
9971037
_ => panic!("Expected a single UpdateFile hunk"),
9981038
};
@@ -1029,7 +1069,7 @@ PATCH"#,
10291069
));
10301070

10311071
let patch = parse_patch(&patch).unwrap();
1032-
let chunks = match patch.as_slice() {
1072+
let chunks = match patch.hunks.as_slice() {
10331073
[Hunk::UpdateFile { chunks, .. }] => chunks,
10341074
_ => panic!("Expected a single UpdateFile hunk"),
10351075
};
@@ -1064,7 +1104,7 @@ PATCH"#,
10641104
));
10651105

10661106
let patch = parse_patch(&patch).unwrap();
1067-
let chunks = match patch.as_slice() {
1107+
let chunks = match patch.hunks.as_slice() {
10681108
[Hunk::UpdateFile { chunks, .. }] => chunks,
10691109
_ => panic!("Expected a single UpdateFile hunk"),
10701110
};
@@ -1110,7 +1150,7 @@ PATCH"#,
11101150

11111151
// Extract chunks then build the unified diff.
11121152
let parsed = parse_patch(&patch).unwrap();
1113-
let chunks = match parsed.as_slice() {
1153+
let chunks = match parsed.hunks.as_slice() {
11141154
[Hunk::UpdateFile { chunks, .. }] => chunks,
11151155
_ => panic!("Expected a single UpdateFile hunk"),
11161156
};
@@ -1193,6 +1233,8 @@ g
11931233
new_content: "updated session directory content\n".to_string(),
11941234
},
11951235
)]),
1236+
patch: argv[1].clone(),
1237+
cwd: session_dir.path().to_path_buf(),
11961238
})
11971239
);
11981240
}

codex-rs/apply-patch/src/parser.rs

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
//!
2323
//! The parser below is a little more lenient than the explicit spec and allows for
2424
//! leading/trailing whitespace around patch markers.
25+
use crate::ApplyPatchArgs;
2526
use std::path::Path;
2627
use std::path::PathBuf;
2728

@@ -102,7 +103,7 @@ pub struct UpdateFileChunk {
102103
pub is_end_of_file: bool,
103104
}
104105

105-
pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
106+
pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
106107
let mode = if PARSE_IN_STRICT_MODE {
107108
ParseMode::Strict
108109
} else {
@@ -150,7 +151,7 @@ enum ParseMode {
150151
Lenient,
151152
}
152153

153-
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseError> {
154+
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
154155
let lines: Vec<&str> = patch.trim().lines().collect();
155156
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
156157
Ok(()) => &lines,
@@ -173,7 +174,8 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseErro
173174
line_number += hunk_lines;
174175
remaining_lines = &remaining_lines[hunk_lines..]
175176
}
176-
Ok(hunks)
177+
let patch = lines.join("\n");
178+
Ok(ApplyPatchArgs { hunks, patch })
177179
}
178180

179181
/// Checks the start and end lines of the patch text for `apply_patch`,
@@ -425,6 +427,7 @@ fn parse_update_file_chunk(
425427
}
426428

427429
#[test]
430+
#[allow(clippy::unwrap_used)]
428431
fn test_parse_patch() {
429432
assert_eq!(
430433
parse_patch_text("bad", ParseMode::Strict),
@@ -455,8 +458,10 @@ fn test_parse_patch() {
455458
"*** Begin Patch\n\
456459
*** End Patch",
457460
ParseMode::Strict
458-
),
459-
Ok(Vec::new())
461+
)
462+
.unwrap()
463+
.hunks,
464+
Vec::new()
460465
);
461466
assert_eq!(
462467
parse_patch_text(
@@ -472,8 +477,10 @@ fn test_parse_patch() {
472477
+ return 123\n\
473478
*** End Patch",
474479
ParseMode::Strict
475-
),
476-
Ok(vec![
480+
)
481+
.unwrap()
482+
.hunks,
483+
vec![
477484
AddFile {
478485
path: PathBuf::from("path/add.py"),
479486
contents: "abc\ndef\n".to_string()
@@ -491,7 +498,7 @@ fn test_parse_patch() {
491498
is_end_of_file: false
492499
}]
493500
}
494-
])
501+
]
495502
);
496503
// Update hunk followed by another hunk (Add File).
497504
assert_eq!(
@@ -504,8 +511,10 @@ fn test_parse_patch() {
504511
+content\n\
505512
*** End Patch",
506513
ParseMode::Strict
507-
),
508-
Ok(vec![
514+
)
515+
.unwrap()
516+
.hunks,
517+
vec![
509518
UpdateFile {
510519
path: PathBuf::from("file.py"),
511520
move_path: None,
@@ -520,7 +529,7 @@ fn test_parse_patch() {
520529
path: PathBuf::from("other.py"),
521530
contents: "content\n".to_string()
522531
}
523-
])
532+
]
524533
);
525534

526535
// Update hunk without an explicit @@ header for the first chunk should parse.
@@ -533,8 +542,10 @@ fn test_parse_patch() {
533542
+bar
534543
*** End Patch"#,
535544
ParseMode::Strict
536-
),
537-
Ok(vec![UpdateFile {
545+
)
546+
.unwrap()
547+
.hunks,
548+
vec![UpdateFile {
538549
path: PathBuf::from("file2.py"),
539550
move_path: None,
540551
chunks: vec![UpdateFileChunk {
@@ -543,7 +554,7 @@ fn test_parse_patch() {
543554
new_lines: vec!["import foo".to_string(), "bar".to_string()],
544555
is_end_of_file: false,
545556
}],
546-
}])
557+
}]
547558
);
548559
}
549560

@@ -574,7 +585,10 @@ fn test_parse_patch_lenient() {
574585
);
575586
assert_eq!(
576587
parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
577-
Ok(expected_patch.clone())
588+
Ok(ApplyPatchArgs {
589+
hunks: expected_patch.clone(),
590+
patch: patch_text.to_string()
591+
})
578592
);
579593

580594
let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
@@ -584,7 +598,10 @@ fn test_parse_patch_lenient() {
584598
);
585599
assert_eq!(
586600
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
587-
Ok(expected_patch.clone())
601+
Ok(ApplyPatchArgs {
602+
hunks: expected_patch.clone(),
603+
patch: patch_text.to_string()
604+
})
588605
);
589606

590607
let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
@@ -594,7 +611,10 @@ fn test_parse_patch_lenient() {
594611
);
595612
assert_eq!(
596613
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
597-
Ok(expected_patch.clone())
614+
Ok(ApplyPatchArgs {
615+
hunks: expected_patch.clone(),
616+
patch: patch_text.to_string()
617+
})
598618
);
599619

600620
let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");

codex-rs/arg0/src/lib.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::future::Future;
22
use std::path::Path;
33
use std::path::PathBuf;
44

5+
use codex_core::CODEX_APPLY_PATCH_ARG1;
6+
57
/// While we want to deploy the Codex CLI as a single executable for simplicity,
68
/// we also want to expose some of its functionality as distinct CLIs, so we use
79
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
@@ -43,7 +45,7 @@ where
4345
}
4446

4547
let argv1 = args.next().unwrap_or_default();
46-
if argv1 == "--codex-run-as-apply-patch" {
48+
if argv1 == CODEX_APPLY_PATCH_ARG1 {
4749
let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
4850
let exit_code = match patch_arg {
4951
Some(patch_arg) => {
@@ -55,7 +57,7 @@ where
5557
}
5658
}
5759
None => {
58-
eprintln!("Error: --codex-run-as-apply-patch requires a UTF-8 PATCH argument.");
60+
eprintln!("Error: {CODEX_APPLY_PATCH_ARG1} requires a UTF-8 PATCH argument.");
5961
1
6062
}
6163
};

0 commit comments

Comments
 (0)