From 876555ab30000777f950b05d0a7a94ffd54bade9 Mon Sep 17 00:00:00 2001 From: "jeffery.wsj" Date: Fri, 26 Jan 2024 15:53:49 +0800 Subject: [PATCH 1/2] syscall/linux: support a confidential computing libos occlum based on sgx tee MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tee libos occlum hooks all syscall by providing a special glibc or musl implementation to support java and c++ in sgx tee, but go's syscall is not dependent on glibc and musl,it was implemented by plan9 assembly in go runtime directly. we need to hook all syscalls by replacing syscall instruction and jump into occlum syscall process entry. Reviewed-by: lei.yul Reviewed-by: denghui.ddh Reviewed-by: qingfeng.yy --- src/runtime/extern.go | 11 ++ .../internal/syscall/asm_linux_amd64.s | 2 +- src/runtime/malloc.go | 19 +- src/runtime/os_linux.go | 5 + src/runtime/sys_linux_amd64.s | 176 ++++++++++++++---- src/runtime/textflag.h | 16 ++ src/syscall/asm_linux_amd64.s | 6 +- src/syscall/exec_linux.go | 26 ++- 8 files changed, 205 insertions(+), 56 deletions(-) diff --git a/src/runtime/extern.go b/src/runtime/extern.go index afadc3d17ec341..654031f50c6f39 100644 --- a/src/runtime/extern.go +++ b/src/runtime/extern.go @@ -320,3 +320,14 @@ const GOOS string = goos.GOOS // GOARCH is the running program's architecture target: // one of 386, amd64, arm, s390x, and so on. const GOARCH string = goarch.GOARCH + +// Variable occlumentry is a flag to distinguish a tee environment or not. +// occlumentry's value is from Auxv, 0 is default value, nonzero if execution +// binary was loaded by a tee libos, for example occlum. +var occlumentry uintptr = 0x0 +var teeFlag = false + +// isTeeEnvironment return true if it's in a TEE environment. +func isTeeEnvironment() bool { + return teeFlag +} diff --git a/src/runtime/internal/syscall/asm_linux_amd64.s b/src/runtime/internal/syscall/asm_linux_amd64.s index 3740ef1beb570d..a082516e7dba09 100644 --- a/src/runtime/internal/syscall/asm_linux_amd64.s +++ b/src/runtime/internal/syscall/asm_linux_amd64.s @@ -32,7 +32,7 @@ TEXT ·Syscall6(SB),NOSPLIT,$0 MOVQ CX, SI // a2 MOVQ BX, DI // a1 // num already in AX. - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok NEGQ AX diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 7ff2190876dd1b..5faa10aab6dc94 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -608,7 +608,8 @@ func (h *mheap) sysAlloc(n uintptr, hintList **arenaHint, register bool) (v unsa } // Try to grow the heap at a hint address. - for *hintList != nil { + // Skip hintlist address malloc in Tee environment. + for !teeFlag && *hintList != nil { hint := *hintList p := hint.addr if hint.down { @@ -662,13 +663,15 @@ func (h *mheap) sysAlloc(n uintptr, hintList **arenaHint, register bool) (v unsa return nil, 0 } - // Create new hints for extending this region. - hint := (*arenaHint)(h.arenaHintAlloc.alloc()) - hint.addr, hint.down = uintptr(v), true - hint.next, mheap_.arenaHints = mheap_.arenaHints, hint - hint = (*arenaHint)(h.arenaHintAlloc.alloc()) - hint.addr = uintptr(v) + size - hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + if !teeFlag { + // Create new hints for extending this region. + hint := (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr, hint.down = uintptr(v), true + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + hint = (*arenaHint)(h.arenaHintAlloc.alloc()) + hint.addr = uintptr(v) + size + hint.next, mheap_.arenaHints = mheap_.arenaHints, hint + } } // Check for bad pointers or pointers we can't use. diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 26db4a0cd94459..d2b7916e1c6c28 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -219,6 +219,7 @@ const ( _AT_SECURE = 23 // secure mode boolean _AT_RANDOM = 25 // introduced in 2.6.29 _AT_HWCAP2 = 26 // hardware capability bit vector 2 + _AT_OCCLUM = 48 // gnu syscall ABI entry address ) var procAuxv = []byte("/proc/self/auxv\x00") @@ -304,6 +305,10 @@ func sysauxv(auxv []uintptr) int { case _AT_SECURE: secureMode = val == 1 + + case _AT_OCCLUM: + occlumentry = val + teeFlag = true } archauxv(tag, val) diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index c7a89ba5363405..5a382105bdd7ce 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -51,7 +51,7 @@ TEXT runtime·exit(SB),NOSPLIT,$0-4 MOVL code+0(FP), DI MOVL $SYS_exit_group, AX - SYSCALL + SYSCALL_ENHANCE RET // func exitThread(wait *atomic.Uint32) @@ -61,7 +61,7 @@ TEXT runtime·exitThread(SB),NOSPLIT,$0-8 MOVL $0, (AX) MOVL $0, DI // exit code MOVL $SYS_exit, AX - SYSCALL + SYSCALL_ENHANCE // We may not even have a stack any more. INT $3 JMP 0(PC) @@ -73,7 +73,7 @@ TEXT runtime·open(SB),NOSPLIT,$0-20 MOVL mode+8(FP), DX MOVL perm+12(FP), R10 MOVL $SYS_openat, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $-1, AX @@ -83,7 +83,7 @@ TEXT runtime·open(SB),NOSPLIT,$0-20 TEXT runtime·closefd(SB),NOSPLIT,$0-12 MOVL fd+0(FP), DI MOVL $SYS_close, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $-1, AX @@ -95,7 +95,7 @@ TEXT runtime·write1(SB),NOSPLIT,$0-28 MOVQ p+8(FP), SI MOVL n+16(FP), DX MOVL $SYS_write, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -104,7 +104,7 @@ TEXT runtime·read(SB),NOSPLIT,$0-28 MOVQ p+8(FP), SI MOVL n+16(FP), DX MOVL $SYS_read, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -113,7 +113,7 @@ TEXT runtime·pipe2(SB),NOSPLIT,$0-20 LEAQ r+8(FP), DI MOVL flags+0(FP), SI MOVL $SYS_pipe2, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, errno+16(FP) RET @@ -131,40 +131,40 @@ TEXT runtime·usleep(SB),NOSPLIT,$16 MOVQ SP, DI MOVL $0, SI MOVL $SYS_nanosleep, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·gettid(SB),NOSPLIT,$0-4 MOVL $SYS_gettid, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+0(FP) RET TEXT runtime·raise(SB),NOSPLIT,$0 MOVL $SYS_getpid, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, R12 MOVL $SYS_gettid, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, SI // arg 2 tid MOVL R12, DI // arg 1 pid MOVL sig+0(FP), DX // arg 3 MOVL $SYS_tgkill, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·raiseproc(SB),NOSPLIT,$0 MOVL $SYS_getpid, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, DI // arg 1 pid MOVL sig+0(FP), SI // arg 2 MOVL $SYS_kill, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT ·getpid(SB),NOSPLIT,$0-8 MOVL $SYS_getpid, AX - SYSCALL + SYSCALL_ENHANCE MOVQ AX, ret+0(FP) RET @@ -173,7 +173,7 @@ TEXT ·tgkill(SB),NOSPLIT,$0 MOVQ tid+8(FP), SI MOVQ sig+16(FP), DX MOVL $SYS_tgkill, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·setitimer(SB),NOSPLIT,$0-24 @@ -181,7 +181,7 @@ TEXT runtime·setitimer(SB),NOSPLIT,$0-24 MOVQ new+8(FP), SI MOVQ old+16(FP), DX MOVL $SYS_setittimer, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·timer_create(SB),NOSPLIT,$0-28 @@ -189,7 +189,7 @@ TEXT runtime·timer_create(SB),NOSPLIT,$0-28 MOVQ sevp+8(FP), SI MOVQ timerid+16(FP), DX MOVL $SYS_timer_create, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -199,14 +199,14 @@ TEXT runtime·timer_settime(SB),NOSPLIT,$0-28 MOVQ new+8(FP), DX MOVQ old+16(FP), R10 MOVL $SYS_timer_settime, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET TEXT runtime·timer_delete(SB),NOSPLIT,$0-12 MOVL timerid+0(FP), DI MOVL $SYS_timer_delete, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+8(FP) RET @@ -215,7 +215,7 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-28 MOVQ n+8(FP), SI MOVQ dst+16(FP), DX MOVL $SYS_mincore, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -282,7 +282,7 @@ ret: RET fallback: MOVQ $SYS_clock_gettime, AX - SYSCALL + SYSCALL_ENHANCE JMP ret TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-28 @@ -291,7 +291,7 @@ TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-28 MOVQ old+16(FP), DX MOVL size+24(FP), R10 MOVL $SYS_rt_sigprocmask, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -303,7 +303,7 @@ TEXT runtime·rt_sigaction(SB),NOSPLIT,$0-36 MOVQ old+16(FP), DX MOVQ size+24(FP), R10 MOVL $SYS_rt_sigaction, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+32(FP) RET @@ -466,7 +466,7 @@ sigtrampnog: // https://gcc.gnu.org/viewcvs/gcc/trunk/libgcc/config/i386/linux-unwind.h?revision=219188&view=markup TEXT runtime·sigreturn(SB),NOSPLIT,$0 MOVQ $SYS_rt_sigreturn, AX - SYSCALL + SYSCALL_ENHANCE INT $3 // not reached TEXT runtime·sysMmap(SB),NOSPLIT,$0 @@ -478,7 +478,7 @@ TEXT runtime·sysMmap(SB),NOSPLIT,$0 MOVL off+28(FP), R9 MOVL $SYS_mmap, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS ok NOTQ AX @@ -513,7 +513,7 @@ TEXT runtime·sysMunmap(SB),NOSPLIT,$0 MOVQ addr+0(FP), DI MOVQ n+8(FP), SI MOVQ $SYS_munmap, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -537,7 +537,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0 MOVQ n+8(FP), SI MOVL flags+16(FP), DX MOVQ $SYS_madvise, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -551,12 +551,15 @@ TEXT runtime·futex(SB),NOSPLIT,$0 MOVQ addr2+24(FP), R8 MOVL val3+32(FP), R9 MOVL $SYS_futex, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+40(FP) RET // int32 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void)); TEXT runtime·clone(SB),NOSPLIT,$0 + CMPQ runtime·occlumentry(SB), $0x0 + JNE occlum + MOVL flags+0(FP), DI MOVQ stk+8(FP), SI MOVQ $0, DX @@ -620,11 +623,110 @@ nog2: SYSCALL JMP -3(PC) // keep exiting +occlum: + MOVL flags+0(FP), DI + MOVQ stk+8(FP), SI + MOVQ $0, DX + MOVQ $0, R10 + MOVQ $0, R8 + // Copy mp, gp, fn off parent stack for use by child. + // Careful: Linux system call clobbers CX and R11. + MOVQ mp+16(FP), R13 + MOVQ gp+24(FP), R9 + MOVQ fn+32(FP), R12 + CMPQ R13, $0 // m + JEQ occlum_nog1 + CMPQ R9, $0 // g + JEQ occlum_nog1 + LEAQ m_tls(R13), R8 +#ifdef GOOS_android + // Android stores the TLS offset in runtime·tls_g. + SUBQ runtime·tls_g(SB), R8 +#else + ADDQ $8, R8 // ELF wants to use -8(FS) +#endif + ORQ $0x00080000, DI //add flag CLONE_SETTLS(0x00080000) to call clone +occlum_nog1: + // flags + MOVQ DI, -16(SI) + // fn + MOVQ R12, -24(SI) + // gp + MOVQ R9, -32(SI) + // m + MOVQ R13, -40(SI) + // lea 0x23(%rip),%rax + BYTE $0x48; BYTE $0x8d; BYTE $0x05; BYTE $0x23; BYTE $0x00; BYTE $0x00; BYTE $0x00 + // thread entry point + MOVQ AX, -8(SI) + + SUBQ $8, SI + + MOVL $SYS_clone, AX + // BYTE $0xcc + // lea 0xa(%rip),%rcx + BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0a; BYTE $0x00; BYTE $0x00; BYTE $0x00 + MOVQ runtime·occlumentry(SB), R11 + JMP R11 + + // In parent, return. + MOVL AX, ret+40(FP) + RET + +thread_entrypoint: + // add 8, %rsp + // MOVQ SI, SP + ADDQ $8, SP + + MOVQ SP, SI + + // mov -24(%rsp), %r12 + // fn + BYTE $0x4c; BYTE $0x8b; BYTE $0x64; BYTE $0x24; BYTE $0xe8 + + // mov -32(%rsp), %r9 + // gp + BYTE $0x4c; BYTE $0x8b; BYTE $0x4c; BYTE $0x24; BYTE $0xe0 + + // mov -40(%rsp), %r13 + // m + BYTE $0x4c; BYTE $0x8b; BYTE $0x6c; BYTE $0x24; BYTE $0xd8 + + // BYTE $0xcc + + // If g or m are nil, skip Go-related setup. + CMPQ R13, $0 // m + JEQ occlum_nog2 + CMPQ R9, $0 // g + JEQ occlum_nog2 + + // Initialize m->procid to Linux tid + MOVL $SYS_gettid, AX + SYSCALL_ENHANCE + MOVQ AX, m_procid(R13) + + // In child, set up new stack + get_tls(CX) + MOVQ R13, g_m(R9) + MOVQ R9, g(CX) + MOVQ R9, R14 // set g register + CALL runtime·stackcheck(SB) + +occlum_nog2: + // Call fn + CALL R12 + + // It shouldn't return. If it does, exit that thread. + MOVL $111, DI + MOVL $SYS_exit, AX + SYSCALL_ENHANCE + JMP -3(PC) // keep exiting + TEXT runtime·sigaltstack(SB),NOSPLIT,$-8 MOVQ new+0(FP), DI MOVQ old+8(FP), SI MOVQ $SYS_sigaltstack, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -641,7 +743,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 MOVQ DI, SI MOVQ $0x1002, DI // ARCH_SET_FS MOVQ $SYS_arch_prctl, AX - SYSCALL + SYSCALL_ENHANCE CMPQ AX, $0xfffffffffffff001 JLS 2(PC) MOVL $0xf1, 0xf1 // crash @@ -649,7 +751,7 @@ TEXT runtime·settls(SB),NOSPLIT,$32 TEXT runtime·osyield(SB),NOSPLIT,$0 MOVL $SYS_sched_yield, AX - SYSCALL + SYSCALL_ENHANCE RET TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 @@ -657,7 +759,7 @@ TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 MOVQ len+8(FP), SI MOVQ buf+16(FP), DX MOVL $SYS_sched_getaffinity, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -669,7 +771,7 @@ TEXT runtime·access(SB),NOSPLIT,$0 MOVL mode+8(FP), DX MOVL $0, R10 MOVL $SYS_faccessat, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+16(FP) RET @@ -679,7 +781,7 @@ TEXT runtime·connect(SB),NOSPLIT,$0-28 MOVQ addr+8(FP), SI MOVL len+16(FP), DX MOVL $SYS_connect, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+24(FP) RET @@ -689,7 +791,7 @@ TEXT runtime·socket(SB),NOSPLIT,$0-20 MOVL typ+4(FP), SI MOVL prot+8(FP), DX MOVL $SYS_socket, AX - SYSCALL + SYSCALL_ENHANCE MOVL AX, ret+16(FP) RET @@ -698,6 +800,6 @@ TEXT runtime·sbrk0(SB),NOSPLIT,$0-8 // Implemented as brk(NULL). MOVQ $0, DI MOVL $SYS_brk, AX - SYSCALL + SYSCALL_ENHANCE MOVQ AX, ret+0(FP) RET diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index 214075e360ceb6..cd37d3f1a81ccc 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -37,3 +37,19 @@ #define TOPFRAME 2048 // Function is an ABI wrapper. #define ABIWRAPPER 4096 + +// SYSCALL_ENHANCE is for SGX TEE's syscall enhancement, it will hook syscall +// and callback to a special glibc or musl syscall stub. +// step one: assign syscall return address to register rcx, occlum know where +// should return according to rcx. +// step two: jump to syscall interface address provided by occlum when go +// .bin file loaded. +// +// actually is the assembler instruction: lea 0xc(%rip),%rcx +#define SYSCALL_ENHANCE \ + CMPQ runtime·occlumentry(SB), $0x0 \ + JBE 10(PC) \ + BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0c; BYTE $0x00; BYTE $0x00; BYTE $0x00 \ + MOVQ runtime·occlumentry(SB), R11 \ + JMP R11 \ + SYSCALL diff --git a/src/syscall/asm_linux_amd64.s b/src/syscall/asm_linux_amd64.s index 00d6fedc62b775..7e4b267303a0a6 100644 --- a/src/syscall/asm_linux_amd64.s +++ b/src/syscall/asm_linux_amd64.s @@ -21,7 +21,7 @@ TEXT ·rawVforkSyscall(SB),NOSPLIT|NOFRAME,$0-40 MOVQ $0, R9 MOVQ trap+0(FP), AX // syscall entry POPQ R12 // preserve return address - SYSCALL + SYSCALL_ENHANCE PUSHQ R12 CMPQ AX, $0xfffffffffffff001 JLS ok2 @@ -40,7 +40,7 @@ TEXT ·rawSyscallNoError(SB),NOSPLIT,$0-48 MOVQ a2+16(FP), SI MOVQ a3+24(FP), DX MOVQ trap+0(FP), AX // syscall entry - SYSCALL + SYSCALL_ENHANCE MOVQ AX, r1+32(FP) MOVQ DX, r2+40(FP) RET @@ -61,7 +61,7 @@ ret: RET fallback: MOVL $SYS_gettimeofday, AX - SYSCALL + SYSCALL_ENHANCE JMP ret ok7: MOVQ $0, err+8(FP) diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go index dcb3d51b5f7298..0d2e10b5ecf48e 100644 --- a/src/syscall/exec_linux.go +++ b/src/syscall/exec_linux.go @@ -172,6 +172,9 @@ type caps struct { data [2]capData } +//go:linkname isTeeEnvironment runtime.isTeeEnvironment +func isTeeEnvironment() bool + // See CAP_TO_INDEX in linux/capability.h: func capToIndex(cap uintptr) uintptr { return cap >> 5 } @@ -289,15 +292,24 @@ func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, att // No more allocation or calls of non-assembly functions. runtime_BeforeFork() locked = true - if clone3 != nil { - r1, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3)) - } else { + // Replace syscall clone with vfork here, vfork needs no parameters, + // the giving parameters here will be ignored. + // vfork and exec could create a new process in occlum while clone not. + if isTeeEnvironment() { + const SYS_VFORK = 58 flags |= uintptr(SIGCHLD) - if runtime.GOARCH == "s390x" { - // On Linux/s390, the first two arguments of clone(2) are swapped. - r1, err1 = rawVforkSyscall(SYS_CLONE, 0, flags) + r1, err1 = rawVforkSyscall(SYS_VFORK, flags, 0) + } else { + if clone3 != nil { + r1, err1 = rawVforkSyscall(_SYS_clone3, uintptr(unsafe.Pointer(clone3)), unsafe.Sizeof(*clone3)) } else { - r1, err1 = rawVforkSyscall(SYS_CLONE, flags, 0) + flags |= uintptr(SIGCHLD) + if runtime.GOARCH == "s390x" { + // On Linux/s390, the first two arguments of clone(2) are swapped. + r1, err1 = rawVforkSyscall(SYS_CLONE, 0, flags) + } else { + r1, err1 = rawVforkSyscall(SYS_CLONE, flags, 0) + } } } if err1 != 0 || r1 != 0 { From 982079e84929d2be45c23888d1e064559acc9937 Mon Sep 17 00:00:00 2001 From: "wsj.jeffery" Date: Fri, 2 Feb 2024 14:51:37 +0800 Subject: [PATCH 2/2] syscall/linux: fix xmm15 register pollution issue in Occlum TEE go internal-abi defines x15 register as fixed zero value to initialize some stack variables in x64 architecture. but tee libos occlum could not guarantee to clear x15 when a syscall returns, so ago have to clear x15 when a occlum syscall returned to prevent x15 register from pollution. Reviewed-by: lei.yul Reviewed-by: denghui.ddh Reviewed-by: qingfeng.yy --- src/runtime/internal/syscall/asm_linux_amd64.s | 9 ++++++++- src/runtime/textflag.h | 8 +++++--- src/runtime/time_linux_amd64.s | 4 ++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/runtime/internal/syscall/asm_linux_amd64.s b/src/runtime/internal/syscall/asm_linux_amd64.s index a082516e7dba09..982f631e2fd2fb 100644 --- a/src/runtime/internal/syscall/asm_linux_amd64.s +++ b/src/runtime/internal/syscall/asm_linux_amd64.s @@ -32,7 +32,14 @@ TEXT ·Syscall6(SB),NOSPLIT,$0 MOVQ CX, SI // a2 MOVQ BX, DI // a1 // num already in AX. - SYSCALL_ENHANCE + CMPQ runtime·occlumentry(SB), $0x0 + JBE 12(PC) + BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0a; BYTE $0x00; BYTE $0x00; BYTE $0x00 + MOVQ runtime·occlumentry(SB), R11 + JMP R11 + PXOR X15, X15 + JMP 2(PC) + SYSCALL CMPQ AX, $0xfffffffffffff001 JLS ok NEGQ AX diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h index cd37d3f1a81ccc..41cfcd38451681 100644 --- a/src/runtime/textflag.h +++ b/src/runtime/textflag.h @@ -45,11 +45,13 @@ // step two: jump to syscall interface address provided by occlum when go // .bin file loaded. // -// actually is the assembler instruction: lea 0xc(%rip),%rcx +// actually is the assembler instruction: lea 0xa(%rip),%rcx #define SYSCALL_ENHANCE \ CMPQ runtime·occlumentry(SB), $0x0 \ - JBE 10(PC) \ - BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0c; BYTE $0x00; BYTE $0x00; BYTE $0x00 \ + JBE 12(PC) \ + BYTE $0x48; BYTE $0x8d; BYTE $0x0d; BYTE $0x0a; BYTE $0x00; BYTE $0x00; BYTE $0x00 \ MOVQ runtime·occlumentry(SB), R11 \ JMP R11 \ + PXOR X15, X15 \ + JMP 2(PC) \ SYSCALL diff --git a/src/runtime/time_linux_amd64.s b/src/runtime/time_linux_amd64.s index 1416d232304f39..2867a8a67ca870 100644 --- a/src/runtime/time_linux_amd64.s +++ b/src/runtime/time_linux_amd64.s @@ -77,11 +77,11 @@ ret: fallback: MOVQ $SYS_clock_gettime, AX - SYSCALL + SYSCALL_ENHANCE MOVL $1, DI // CLOCK_MONOTONIC LEAQ 0(SP), SI MOVQ $SYS_clock_gettime, AX - SYSCALL + SYSCALL_ENHANCE JMP ret