Skip to content

Commit 6e784a4

Browse files
kernel: make syscall handlers naked functions
Signed-off-by: Anhad Singh <[email protected]>
1 parent 4b5f108 commit 6e784a4

File tree

16 files changed

+435
-331
lines changed

16 files changed

+435
-331
lines changed

Makefile

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,33 @@ distro: jinx
1414
SOURCE_DIR := src
1515
USERLAND_DIR := userland
1616
USERLAND_TARGET := builds/userland/target/init
17-
KERNEL_TARGET := src/target/x86_64-aero_os/release/aero_kernel
17+
KERNEL_TARGET := src/target/x86_64-unknown-none/release/aero_kernel
1818

1919
.PHONY: clean
2020
clean:
2121
rm -rf src/target
2222

2323
$(KERNEL_TARGET): $(shell find $(SOURCE_DIR) -type f -not -path '$(SOURCE_DIR)/target/*')
24-
cd src && cargo build --package aero_kernel --target .cargo/x86_64-aero_os.json --release
25-
@$(MAKE) iso
24+
cd src && cargo build --package aero_kernel --release
25+
./build-support/mkiso.sh
2626

2727
$(USERLAND_TARGET): $(shell find $(USERLAND_DIR) -type f -not -path '$(USERLAND_DIR)/target/*')
2828
./target/jinx/jinx rebuild userland
2929
@$(MAKE) distro-image
3030

3131
.PHONY: iso
32-
iso: $(KERNEL_TARGET)
33-
./build-support/mkiso.sh
32+
iso: $(KERNEL_TARGET)
3433

3534
.PHONY: distro-image
3635
distro-image: distro
3736
./build-support/mkimage.sh
3837

3938
.PHONY: qemu
4039
qemu: $(KERNEL_TARGET) $(USERLAND_TARGET)
41-
${QEMU_PATH}/qemu-system-x86_64 -cdrom target/aero.iso -m 8G -serial stdio --boot d -s -enable-kvm -cpu host -drive file=target/disk.img,if=none,id=NVME1,format=raw -device nvme,drive=NVME1,serial=nvme
40+
${QEMU_PATH}/qemu-system-x86_64 -cdrom target/aero.iso -m 8G -serial stdio --boot d -s -enable-kvm -cpu host,+vmx -drive file=target/disk.img,if=none,id=NVME1,format=raw -device nvme,drive=NVME1,serial=nvme
4241

4342
.PHONY: doc
4443
doc:
4544
rm -rf target/doc
46-
cd src && cargo doc --package aero_kernel --target .cargo/x86_64-aero_os.json --release --target-dir=../target/doc/
45+
cd src && cargo doc --package aero_kernel --release --target-dir=../target/doc/
4746
cp web/index.html target/doc/index.html

build-support/mkiso.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -ex
55
rm -rf target/iso_root
66
mkdir -pv target/iso_root/boot
77

8-
cp src/target/x86_64-aero_os/release/aero_kernel target/iso_root/aero
8+
cp src/target/x86_64-unknown-none/release/aero_kernel target/iso_root/aero
99
cp build-support/limine.cfg src/.cargo/term_background.bmp target/iso_root/
1010

1111
# Install the limine binaries

improving-build-times.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
28.36s
2-
1+
total_build: 28.36s
2+
rebuild: 8.27s

src/.cargo/config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ build-std = ["core", "compiler_builtins", "alloc"]
33
build-std-features = ["compiler-builtins-mem"]
44

55
[build]
6-
target = "./.cargo/x86_64-aero_os.json"
6+
target = "x86_64-unknown-none"
77
rustflags = [
88
# Miscellaneous:
99
"-Cforce-frame-pointers=yes",
@@ -14,4 +14,7 @@ rustflags = [
1414
"-Zthreads=8",
1515
# https://blog.rust-lang.org/inside-rust/2023/12/22/trait-system-refactor-initiative.html
1616
"-Znext-solver=coherence",
17+
18+
# Linker flags:
19+
"-Clink-arg=--no-pie",
1720
]

src/.cargo/x86_64-aero_os.json

Lines changed: 0 additions & 21 deletions
This file was deleted.

src/aero_kernel/build.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ fn main() -> Result<(), Box<dyn Error>> {
113113
}
114114
})?;
115115

116+
// Tell cargo to pass the linker script to the linker..
117+
println!("cargo:rustc-link-arg=-T.cargo/kernel.ld");
118+
// ..and to re-run if it changes.
116119
println!("cargo:rerun-if-changed=.cargo/kernel.ld");
117120

118121
Ok(())

src/aero_kernel/src/arch/x86_64/gdt.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ bitflags::bitflags! {
5454
#[derive(Debug, Clone, Copy, PartialEq)]
5555
pub enum Ring {
5656
Ring0 = 0b00,
57+
Ring3 = 0b11,
5758
}
5859

5960
const BOOT_GDT_ENTRY_COUNT: usize = 4;
@@ -175,10 +176,13 @@ impl GdtAccessFlags {
175176

176177
pub struct GdtEntryType;
177178

179+
#[rustfmt::skip]
178180
impl GdtEntryType {
179181
pub const KERNEL_CODE: u16 = 1;
180182
pub const KERNEL_DATA: u16 = 2;
181183
pub const KERNEL_TLS: u16 = 3;
184+
pub const USER_DATA: u16 = 4;
185+
pub const USER_CODE: u16 = 5;
182186
pub const TSS: u16 = 8;
183187
pub const TSS_HI: u16 = 9;
184188
}
@@ -266,7 +270,7 @@ pub struct Tss {
266270
/// The full 64-bit canonical forms of the stack pointers (RSP) for
267271
/// privilege levels 0-2.
268272
pub rsp: [u64; 3], // offset 0x04
269-
reserved2: u64, // offset 0x1C
273+
pub reserved2: u64, // offset 0x1C
270274

271275
/// The full 64-bit canonical forms of the interrupt stack table
272276
/// (IST) pointers.
@@ -317,6 +321,9 @@ pub fn init_boot() {
317321

318322
static STK: [u8; 4096 * 16] = [0; 4096 * 16];
319323

324+
pub const USER_SS: SegmentSelector = SegmentSelector::new(GdtEntryType::USER_DATA, Ring::Ring3);
325+
pub const USER_CS: SegmentSelector = SegmentSelector::new(GdtEntryType::USER_CODE, Ring::Ring3);
326+
320327
/// Initialize the *actual* GDT stored in TLS.
321328
///
322329
/// ## Safety
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
.macro pop_preserved
2+
pop r15
3+
pop r14
4+
pop r13
5+
pop r12
6+
pop rbp
7+
pop rbx
8+
.endm
9+
10+
.macro pop_scratch
11+
pop r11
12+
pop r10
13+
pop r9
14+
pop r8
15+
pop rsi
16+
pop rdi
17+
pop rdx
18+
pop rcx
19+
pop rax
20+
.endm
21+
22+
.macro push_scratch
23+
push rcx
24+
push rdx
25+
push rdi
26+
push rsi
27+
push r8
28+
push r9
29+
push r10
30+
push r11
31+
.endm
32+
33+
.macro push_preserved
34+
push rbx
35+
push rbp
36+
push r12
37+
push r13
38+
push r14
39+
push r15
40+
.endm

src/aero_kernel/src/arch/x86_64/syscall.rs

Lines changed: 148 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,166 @@
11
use aero_syscall::SyscallError;
22
use raw_cpuid::CpuId;
33

4-
use crate::arch::gdt::GdtEntryType;
4+
use crate::arch::gdt::{GdtEntryType, Tss, USER_CS, USER_SS};
55
use crate::mem::paging::VirtAddr;
66
use crate::userland::scheduler::{self, ExitStatus};
77
use crate::utils::sync::IrqGuard;
88

99
use super::interrupts::InterruptErrorStack;
1010
use super::io;
1111

12-
extern "C" {
13-
fn x86_64_syscall_handler();
14-
fn x86_64_sysenter_handler();
15-
}
12+
use core::mem::offset_of;
1613

1714
const ARCH_SET_GS: usize = 0x1001;
1815
const ARCH_SET_FS: usize = 0x1002;
1916
const ARCH_GET_FS: usize = 0x1003;
2017
const ARCH_GET_GS: usize = 0x1004;
2118

19+
core::arch::global_asm!(include_str!("./registers.S"));
20+
21+
/// 64-bit SYSCALL instruction entry point.
22+
///
23+
/// The instruction supports to to 6 arguments in registers.
24+
///
25+
/// Registers state on entry:
26+
/// * `RAX` - system call number
27+
/// * `RCX` - return address
28+
/// * `R11` - saved flags (note: R11 is callee-clobbered register in C ABI)
29+
/// * `RDI` - argument 1
30+
/// * `RSI` - argument 2
31+
/// * `RDX` - argument 3
32+
/// * `R10` - argument 4 (needs to be moved to RCX to conform to C ABI)
33+
/// * `R8` - argument 5
34+
/// * `R9` - argument 6
35+
///
36+
/// (note: `R12`..`R15`, `RBP`, `RBX` are callee-preserved in C ABI)
37+
///
38+
/// The instruction saves the `RIP` to `RCX`, clears `RFLAGS.RF` then saves `RFLAGS` to `R11`.
39+
/// Followed by, it loads the new `SS`, `CS`, and `RIP` from previously programmed MSRs.
40+
///
41+
/// The instruction also does not save anything on the stack and does *not* change the `RSP`.
42+
#[naked]
43+
unsafe extern "C" fn x86_64_syscall_handler() {
44+
asm!(
45+
// make the GS base point to the kernel TLS
46+
"swapgs",
47+
// save the user stack pointer
48+
"mov qword ptr gs:{tss_temp_ustack_off}, rsp",
49+
// restore kernel stack
50+
"mov rsp, qword ptr gs:{tss_rsp0_off}",
51+
"push {userland_ss}",
52+
// push userspace stack ptr
53+
"push qword ptr gs:{tss_temp_ustack_off}",
54+
"push r11",
55+
"push {userland_cs}",
56+
"push rcx",
57+
58+
"push rax",
59+
"push_scratch",
60+
"push_preserved",
61+
62+
// push a fake error code to match with the layout of `InterruptErrorStack`
63+
"push 0",
64+
65+
"mov rdi, rsp",
66+
67+
"cld",
68+
"call {x86_64_do_syscall}",
69+
"cli",
70+
71+
// pop the fake error code
72+
"add rsp, 8",
73+
74+
"pop_preserved",
75+
"pop_scratch",
76+
77+
// cook the sysret frame
78+
"pop rcx",
79+
"add rsp, 8",
80+
"pop r11",
81+
"pop rsp",
82+
83+
// restore user GS
84+
"swapgs",
85+
"sysretq",
86+
87+
// constants:
88+
userland_cs = const USER_CS.bits(),
89+
userland_ss = const USER_SS.bits(),
90+
// XXX: add 8 bytes to skip the x86_64 cpu local self ptr
91+
tss_temp_ustack_off = const offset_of!(Tss, reserved2) + core::mem::size_of::<usize>(),
92+
tss_rsp0_off = const offset_of!(Tss, rsp) + core::mem::size_of::<usize>(),
93+
x86_64_do_syscall = sym x86_64_do_syscall,
94+
options(noreturn)
95+
)
96+
}
97+
98+
/// 64-bit SYSENTER instruction entry point.
99+
///
100+
/// The SYSENTER mechanism performs a fast transition to the kernel.
101+
///
102+
/// The new `CS` is loaded from the `IA32_SYSENTER_CS` MSR, and the new instruction and stack
103+
/// pointers are loaded from `IA32_SYSENTER_EIP` and `IA32_SYSENTER_ESP`, respectively. `RFLAGS.IF`
104+
/// is cleared, but other flags are unchanged.
105+
///
106+
/// As the instruction does not save *any* state, the user is required to provide the return `RIP`
107+
/// and `RSP` in the `RCX` and `R11` registers, respectively. These addresses must be canonical.
108+
///
109+
/// The instruction expects the call number and arguments in the same registers as for SYSCALL.
110+
#[naked]
111+
unsafe extern "C" fn x86_64_sysenter_handler() {
112+
asm!(
113+
"swapgs",
114+
// Build the interrupt frame expected by the kernel.
115+
"push {userland_ss}",
116+
"push r11",
117+
"pushfq",
118+
"push {userland_cs}",
119+
"push rcx",
120+
// Mask the same flags as for SYSCALL.
121+
// XXX: Up to this point the code can be single-stepped if the user sets TF.
122+
"pushfq",
123+
"and dword ptr [rsp], 0x300",
124+
"popfq",
125+
"push rax",
126+
"push_scratch",
127+
"push_preserved",
128+
"push 0",
129+
// Store the stack pointer (interrupt frame ptr) in `RBP` for safe keeping, and align the
130+
// stack as specified by the SysV calling convention.
131+
"mov rbp, rsp",
132+
"and rsp, ~0xf",
133+
"mov rdi, rbp",
134+
"call {x86_64_check_sysenter}",
135+
"mov rdi, rbp",
136+
"call {x86_64_do_syscall}",
137+
// Reload the stack pointer, skipping the error code.
138+
"lea rsp, [rbp + 8]",
139+
"pop_preserved",
140+
"pop_scratch",
141+
// Pop the `IRET` frame into the registers expected by `SYSEXIT`.
142+
"pop rdx", // return `RIP` in `RDX`
143+
"add rsp, 8",
144+
"popfq", // restore saved `RFLAGS`
145+
"pop rcx", // return `RSP` in `RCX`
146+
// SAFETY: The above call to `x86_64_check_sysenter` is guarantees that we execute
147+
// `sysexit` with canonical addresses in RCX and RDX. Otherwise we would fault in the
148+
// kernel having already swapped back to the user's GS.
149+
"swapgs",
150+
// SYSEXIT does *not* restore `IF` to re-enable interrupts.
151+
// This is done here, rather then when restoring `RFLAGS` above, since `STI` will keep
152+
"sti",
153+
// interrupts inhibited until after the *following* instruction executes.
154+
"sysexitq",
155+
// constants:
156+
userland_cs = const USER_CS.bits(),
157+
userland_ss = const USER_SS.bits(),
158+
x86_64_check_sysenter = sym x86_64_check_sysenter,
159+
x86_64_do_syscall = sym x86_64_do_syscall,
160+
options(noreturn)
161+
)
162+
}
163+
22164
fn arch_prctl(command: usize, address: usize) -> Result<usize, SyscallError> {
23165
match command {
24166
ARCH_SET_FS => unsafe {
@@ -63,7 +205,6 @@ fn arch_prctl(command: usize, address: usize) -> Result<usize, SyscallError> {
63205
///
64206
/// We cannot execute `sysexit` on return with non-canonical return addresses, or we
65207
/// will take a fault in the kernel with the user's GS base already swapped back.
66-
#[no_mangle]
67208
pub(super) extern "sysv64" fn x86_64_check_sysenter(stack: &mut InterruptErrorStack) {
68209
let rip = stack.stack.iret.rip;
69210
let rsp = stack.stack.iret.rsp;
@@ -77,7 +218,6 @@ pub(super) extern "sysv64" fn x86_64_check_sysenter(stack: &mut InterruptErrorSt
77218
}
78219
}
79220

80-
#[no_mangle]
81221
pub(super) extern "C" fn x86_64_do_syscall(stack: &mut InterruptErrorStack) {
82222
let stack = &mut stack.stack;
83223

@@ -156,6 +296,7 @@ pub(super) fn init() {
156296
.map_or(false, |i| i.has_sysenter_sysexit());
157297

158298
if has_sysenter {
299+
log::info!("enabling support for sysenter");
159300
unsafe {
160301
io::wrmsr(
161302
io::IA32_SYSENTER_CS,

0 commit comments

Comments
 (0)