From: Richard Henderson Date: Wed, 17 Nov 2021 15:14:00 +0000 (+0100) Subject: common-user: Move safe-syscall.* from linux-user X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=bbf15aaf7c7506c88062288b3ae122b882f65e69;p=qemu.git common-user: Move safe-syscall.* from linux-user Move linux-user safe-syscall.S and safe-syscall-error.c to common-user so that bsd-user can also use it. Also move safe-syscall.h to include/user/. Since there is nothing here that is related to the guest, as opposed to the host, build it once. Reviewed-by: Warner Losh Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- diff --git a/MAINTAINERS b/MAINTAINERS index 9a8d1bdf72..be8fc57538 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3079,6 +3079,8 @@ M: Riku Voipio S: Maintained F: thunk.c F: accel/tcg/user-exec*.c +F: include/user/ +F: common-user/ BSD user M: Warner Losh diff --git a/bsd-user/meson.build b/bsd-user/meson.build index 87885d91ed..25c3976ead 100644 --- a/bsd-user/meson.build +++ b/bsd-user/meson.build @@ -2,6 +2,8 @@ if not have_bsd_user subdir_done() endif +common_user_inc += include_directories('.') + bsd_user_ss.add(files( 'bsdload.c', 'elfload.c', diff --git a/common-user/host/aarch64/safe-syscall.inc.S b/common-user/host/aarch64/safe-syscall.inc.S new file mode 100644 index 0000000000..73a04b73b3 --- /dev/null +++ b/common-user/host/aarch64/safe-syscall.inc.S @@ -0,0 +1,76 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, #function + .type safe_syscall_start, #function + .type safe_syscall_end, #function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + /* The syscall calling convention isn't the same as the + * C one: + * we enter with x0 == &signal_pending + * x1 == syscall number + * x2 ... x7, (stack) == syscall arguments + * and return the result in x0 + * and the syscall instruction needs + * x8 == syscall number + * x0 ... x6 == syscall arguments + * and returns the result in x0 + * Shuffle everything around appropriately. + */ + mov x9, x0 /* signal_pending pointer */ + mov x8, x1 /* syscall number */ + mov x0, x2 /* syscall arguments */ + mov x1, x3 + mov x2, x4 + mov x3, x5 + mov x4, x6 + mov x5, x7 + ldr x6, [sp] + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + ldr w10, [x9] + cbnz w10, 2f + svc 0x0 +safe_syscall_end: + /* code path for having successfully executed the syscall */ + cmp x0, #-4096 + b.hi 0f + ret + + /* code path setting errno */ +0: neg w0, w0 + b safe_syscall_set_errno_tail + + /* code path when we didn't execute the syscall */ +2: mov w0, #QEMU_ERESTARTSYS + b safe_syscall_set_errno_tail + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/arm/safe-syscall.inc.S b/common-user/host/arm/safe-syscall.inc.S new file mode 100644 index 0000000000..66176a902c --- /dev/null +++ b/common-user/host/arm/safe-syscall.inc.S @@ -0,0 +1,99 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, %function + + .cfi_sections .debug_frame + + .text + .syntax unified + .arm + .align 2 + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .fnstart + .cfi_startproc + mov r12, sp /* save entry stack */ + push { r4, r5, r6, r7, r8, lr } + .save { r4, r5, r6, r7, r8, lr } + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r4, 0 + .cfi_rel_offset r5, 4 + .cfi_rel_offset r6, 8 + .cfi_rel_offset r7, 12 + .cfi_rel_offset r8, 16 + .cfi_rel_offset lr, 20 + + /* The syscall calling convention isn't the same as the C one: + * we enter with r0 == &signal_pending + * r1 == syscall number + * r2, r3, [sp+0] ... [sp+12] == syscall arguments + * and return the result in r0 + * and the syscall instruction needs + * r7 == syscall number + * r0 ... r6 == syscall arguments + * and returns the result in r0 + * Shuffle everything around appropriately. + * Note the 16 bytes that we pushed to save registers. + */ + mov r8, r0 /* copy signal_pending */ + mov r7, r1 /* syscall number */ + mov r0, r2 /* syscall args */ + mov r1, r3 + ldm r12, { r2, r3, r4, r5, r6 } + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + ldr r12, [r8] /* signal_pending */ + tst r12, r12 + bne 2f + swi 0 +safe_syscall_end: + /* code path for having successfully executed the syscall */ + cmp r0, #-4096 + neghi r0, r0 + bhi 1f + pop { r4, r5, r6, r7, r8, pc } + + /* code path when we didn't execute the syscall */ +2: mov r0, #QEMU_ERESTARTSYS + + /* code path setting errno */ +1: pop { r4, r5, r6, r7, r8, lr } + .cfi_adjust_cfa_offset -24 + .cfi_restore r4 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore lr + b safe_syscall_set_errno_tail + + .fnend + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/i386/safe-syscall.inc.S b/common-user/host/i386/safe-syscall.inc.S new file mode 100644 index 0000000000..aced8c5141 --- /dev/null +++ b/common-user/host/i386/safe-syscall.inc.S @@ -0,0 +1,115 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset ebp, 0 + push %esi + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset esi, 0 + push %edi + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset edi, 0 + push %ebx + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset ebx, 0 + + /* The syscall calling convention isn't the same as the C one: + * we enter with 0(%esp) == return address + * 4(%esp) == &signal_pending + * 8(%esp) == syscall number + * 12(%esp) ... 32(%esp) == syscall arguments + * and return the result in eax + * and the syscall instruction needs + * eax == syscall number + * ebx, ecx, edx, esi, edi, ebp == syscall arguments + * and returns the result in eax + * Shuffle everything around appropriately. + * Note the 16 bytes that we pushed to save registers. + */ + mov 12+16(%esp), %ebx /* the syscall arguments */ + mov 16+16(%esp), %ecx + mov 20+16(%esp), %edx + mov 24+16(%esp), %esi + mov 28+16(%esp), %edi + mov 32+16(%esp), %ebp + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + mov 4+16(%esp), %eax /* signal_pending */ + cmpl $0, (%eax) + jnz 2f + mov 8+16(%esp), %eax /* syscall number */ + int $0x80 +safe_syscall_end: + /* code path for having successfully executed the syscall */ + cmp $-4095, %eax + jae 0f + pop %ebx + .cfi_remember_state + .cfi_adjust_cfa_offset -4 + .cfi_restore ebx + pop %edi + .cfi_adjust_cfa_offset -4 + .cfi_restore edi + pop %esi + .cfi_adjust_cfa_offset -4 + .cfi_restore esi + pop %ebp + .cfi_adjust_cfa_offset -4 + .cfi_restore ebp + ret + .cfi_restore_state + +0: neg %eax + jmp 1f + + /* code path when we didn't execute the syscall */ +2: mov $QEMU_ERESTARTSYS, %eax + + /* code path setting errno */ +1: pop %ebx + .cfi_adjust_cfa_offset -4 + .cfi_restore ebx + pop %edi + .cfi_adjust_cfa_offset -4 + .cfi_restore edi + pop %esi + .cfi_adjust_cfa_offset -4 + .cfi_restore esi + pop %ebp + .cfi_adjust_cfa_offset -4 + .cfi_restore ebp + jmp safe_syscall_set_errno_tail + + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/mips/safe-syscall.inc.S b/common-user/host/mips/safe-syscall.inc.S new file mode 100644 index 0000000000..fc75a337d1 --- /dev/null +++ b/common-user/host/mips/safe-syscall.inc.S @@ -0,0 +1,148 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2021 Linaro, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "sys/regdef.h" +#include "sys/asm.h" + + .text + .set nomips16 + .set reorder + + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_start, @function + .type safe_syscall_end, @function + + /* + * This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ + +#if _MIPS_SIM == _ABIO32 +/* 8 * 4 = 32 for outgoing parameters; 1 * 4 for s0 save; 1 * 4 for align. */ +#define FRAME 40 +#define OFS_S0 32 +#else +/* 1 * 8 for s0 save; 1 * 8 for align. */ +#define FRAME 16 +#define OFS_S0 0 +#endif + + +NESTED(safe_syscall_base, FRAME, ra) + .cfi_startproc + PTR_ADDIU sp, sp, -FRAME + .cfi_adjust_cfa_offset FRAME + REG_S s0, OFS_S0(sp) + .cfi_rel_offset s0, OFS_S0 +#if _MIPS_SIM == _ABIO32 + /* + * The syscall calling convention is nearly the same as C: + * we enter with a0 == &signal_pending + * a1 == syscall number + * a2, a3, stack == syscall arguments + * and return the result in a0 + * and the syscall instruction needs + * v0 == syscall number + * a0 ... a3, stack == syscall arguments + * and returns the result in v0 + * Shuffle everything around appropriately. + */ + move s0, a0 /* signal_pending pointer */ + move v0, a1 /* syscall number */ + move a0, a2 /* syscall arguments */ + move a1, a3 + lw a2, FRAME+16(sp) + lw a3, FRAME+20(sp) + lw t4, FRAME+24(sp) + lw t5, FRAME+28(sp) + lw t6, FRAME+32(sp) + lw t7, FRAME+40(sp) + sw t4, 16(sp) + sw t5, 20(sp) + sw t6, 24(sp) + sw t7, 28(sp) +#else + /* + * The syscall calling convention is nearly the same as C: + * we enter with a0 == &signal_pending + * a1 == syscall number + * a2 ... a7 == syscall arguments + * and return the result in a0 + * and the syscall instruction needs + * v0 == syscall number + * a0 ... a5 == syscall arguments + * and returns the result in v0 + * Shuffle everything around appropriately. + */ + move s0, a0 /* signal_pending pointer */ + move v0, a1 /* syscall number */ + move a0, a2 /* syscall arguments */ + move a1, a3 + move a2, a4 + move a3, a5 + move a4, a6 + move a5, a7 +#endif + + /* + * This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* If signal_pending is non-zero, don't do the call */ + lw t1, 0(s0) + bnez t1, 2f + syscall +safe_syscall_end: + + /* code path for having successfully executed the syscall */ + REG_L s0, OFS_S0(sp) + PTR_ADDIU sp, sp, FRAME + .cfi_remember_state + .cfi_adjust_cfa_offset -FRAME + .cfi_restore s0 + bnez a3, 1f + jr ra + .cfi_restore_state + + /* code path when we didn't execute the syscall */ +2: REG_L s0, OFS_S0(sp) + PTR_ADDIU sp, sp, FRAME + .cfi_adjust_cfa_offset -FRAME + .cfi_restore s0 + li v0, QEMU_ERESTARTSYS + + /* code path setting errno */ + /* + * We didn't setup GP on entry, optimistic of the syscall success. + * We must do so now to load the address of the helper, as required + * by the ABI, into t9. + * + * Note that SETUP_GPX and SETUP_GPX64 are themselves conditional, + * so we can simply let the one that's not empty succeed. + */ +1: USE_ALT_CP(t0) + SETUP_GPX(t1) + SETUP_GPX64(t0, t1) + PTR_LA t9, safe_syscall_set_errno_tail + jr t9 + + .cfi_endproc +END(safe_syscall_base) diff --git a/common-user/host/ppc64/safe-syscall.inc.S b/common-user/host/ppc64/safe-syscall.inc.S new file mode 100644 index 0000000000..947a850dfd --- /dev/null +++ b/common-user/host/ppc64/safe-syscall.inc.S @@ -0,0 +1,94 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + .text + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +#if _CALL_ELF == 2 +safe_syscall_base: + .cfi_startproc + .localentry safe_syscall_base,0 +#else + .section ".opd","aw" + .align 3 +safe_syscall_base: + .quad .L.safe_syscall_base,.TOC.@tocbase,0 + .previous +.L.safe_syscall_base: + .cfi_startproc +#endif + /* We enter with r3 == &signal_pending + * r4 == syscall number + * r5 ... r10 == syscall arguments + * and return the result in r3 + * and the syscall instruction needs + * r0 == syscall number + * r3 ... r8 == syscall arguments + * and returns the result in r3 + * Shuffle everything around appropriately. + */ + std 14, 16(1) /* Preserve r14 in SP+16 */ + .cfi_offset 14, 16 + mr 14, 3 /* signal_pending */ + mr 0, 4 /* syscall number */ + mr 3, 5 /* syscall arguments */ + mr 4, 6 + mr 5, 7 + mr 6, 8 + mr 7, 9 + mr 8, 10 + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + lwz 12, 0(14) + cmpwi 0, 12, 0 + bne- 2f + sc +safe_syscall_end: + /* code path when we did execute the syscall */ + ld 14, 16(1) /* restore r14 */ + bso- 1f + blr + + /* code path when we didn't execute the syscall */ +2: ld 14, 16(1) /* restore r14 */ + addi 3, 0, QEMU_ERESTARTSYS + + /* code path setting errno */ +1: b safe_syscall_set_errno_tail + nop /* per abi, for the linker to modify */ + + .cfi_endproc + +#if _CALL_ELF == 2 + .size safe_syscall_base, .-safe_syscall_base +#else + .size safe_syscall_base, .-.L.safe_syscall_base + .size .L.safe_syscall_base, .-.L.safe_syscall_base +#endif diff --git a/common-user/host/riscv/safe-syscall.inc.S b/common-user/host/riscv/safe-syscall.inc.S new file mode 100644 index 0000000000..dfe83c300e --- /dev/null +++ b/common-user/host/riscv/safe-syscall.inc.S @@ -0,0 +1,79 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2018 Linaro, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + .type safe_syscall_start, @function + .type safe_syscall_end, @function + + /* + * This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + /* + * The syscall calling convention is nearly the same as C: + * we enter with a0 == &signal_pending + * a1 == syscall number + * a2 ... a7 == syscall arguments + * and return the result in a0 + * and the syscall instruction needs + * a7 == syscall number + * a0 ... a5 == syscall arguments + * and returns the result in a0 + * Shuffle everything around appropriately. + */ + mv t0, a0 /* signal_pending pointer */ + mv t1, a1 /* syscall number */ + mv a0, a2 /* syscall arguments */ + mv a1, a3 + mv a2, a4 + mv a3, a5 + mv a4, a6 + mv a5, a7 + mv a7, t1 + + /* + * This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* If signal_pending is non-zero, don't do the call */ + lw t1, 0(t0) + bnez t1, 2f + scall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + li t2, -4096 + bgtu a0, t2, 0f + ret + + /* code path setting errno */ +0: neg a0, a0 + j safe_syscall_set_errno_tail + + /* code path when we didn't execute the syscall */ +2: li a0, QEMU_ERESTARTSYS + j safe_syscall_set_errno_tail + + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/s390x/safe-syscall.inc.S b/common-user/host/s390x/safe-syscall.inc.S new file mode 100644 index 0000000000..2ccbaa2402 --- /dev/null +++ b/common-user/host/s390x/safe-syscall.inc.S @@ -0,0 +1,98 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + stmg %r6,%r15,48(%r15) /* save all call-saved registers */ + .cfi_offset %r15,-40 + .cfi_offset %r14,-48 + .cfi_offset %r13,-56 + .cfi_offset %r12,-64 + .cfi_offset %r11,-72 + .cfi_offset %r10,-80 + .cfi_offset %r9,-88 + .cfi_offset %r8,-96 + .cfi_offset %r7,-104 + .cfi_offset %r6,-112 + lgr %r1,%r15 + lg %r0,8(%r15) /* load eos */ + aghi %r15,-160 + .cfi_adjust_cfa_offset 160 + stg %r1,0(%r15) /* store back chain */ + stg %r0,8(%r15) /* store eos */ + + /* + * The syscall calling convention isn't the same as the C one: + * we enter with r2 == &signal_pending + * r3 == syscall number + * r4, r5, r6, (stack) == syscall arguments + * and return the result in r2 + * and the syscall instruction needs + * r1 == syscall number + * r2 ... r7 == syscall arguments + * and returns the result in r2 + * Shuffle everything around appropriately. + */ + lgr %r8,%r2 /* signal_pending pointer */ + lgr %r1,%r3 /* syscall number */ + lgr %r2,%r4 /* syscall args */ + lgr %r3,%r5 + lgr %r4,%r6 + lmg %r5,%r7,320(%r15) + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + icm %r0,15,0(%r8) + jne 2f + svc 0 +safe_syscall_end: + + /* code path for having successfully executed the syscall */ + lg %r15,0(%r15) /* load back chain */ + .cfi_remember_state + .cfi_adjust_cfa_offset -160 + lmg %r6,%r15,48(%r15) /* load saved registers */ + + lghi %r0, -4095 /* check for syscall error */ + clgr %r2, %r0 + blr %r14 /* return on success */ + lcr %r2, %r2 /* create positive errno */ + jg safe_syscall_set_errno_tail + .cfi_restore_state + + /* code path when we didn't execute the syscall */ +2: lg %r15,0(%r15) /* load back chain */ + .cfi_adjust_cfa_offset -160 + lmg %r6,%r15,48(%r15) /* load saved registers */ + lghi %r2, QEMU_ERESTARTSYS + jg safe_syscall_set_errno_tail + + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/sparc64/safe-syscall.inc.S b/common-user/host/sparc64/safe-syscall.inc.S new file mode 100644 index 0000000000..a2f2b9c967 --- /dev/null +++ b/common-user/host/sparc64/safe-syscall.inc.S @@ -0,0 +1,89 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2021 Linaro, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .text + .balign 4 + + .register %g2, #scratch + .register %g3, #scratch + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + .type safe_syscall_start, @function + .type safe_syscall_end, @function + +#define STACK_BIAS 2047 +#define PARAM(N) STACK_BIAS + N*8 + + /* + * This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + /* + * The syscall calling convention isn't the same as the C one: + * we enter with o0 == &signal_pending + * o1 == syscall number + * o2 ... o5, (stack) == syscall arguments + * and return the result in x0 + * and the syscall instruction needs + * g1 == syscall number + * o0 ... o5 == syscall arguments + * and returns the result in o0 + * Shuffle everything around appropriately. + */ + mov %o0, %g2 /* signal_pending pointer */ + mov %o1, %g1 /* syscall number */ + mov %o2, %o0 /* syscall arguments */ + mov %o3, %o1 + mov %o4, %o2 + mov %o5, %o3 + ldx [%sp + PARAM(6)], %o4 + ldx [%sp + PARAM(7)], %o5 + + /* + * This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + lduw [%g2], %g3 + brnz,pn %g3, 2f + nop + ta 0x6d +safe_syscall_end: + /* code path for having successfully executed the syscall */ + bcs,pn %xcc, 1f + nop + ret + nop + + /* code path when we didn't execute the syscall */ +2: set QEMU_ERESTARTSYS, %o0 + + /* code path setting errno */ +1: mov %o7, %g1 + call safe_syscall_set_errno_tail + mov %g1, %o7 + + .cfi_endproc + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/host/x86_64/safe-syscall.inc.S b/common-user/host/x86_64/safe-syscall.inc.S new file mode 100644 index 0000000000..84fed206f9 --- /dev/null +++ b/common-user/host/x86_64/safe-syscall.inc.S @@ -0,0 +1,94 @@ +/* + * safe-syscall.inc.S : host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * This is intended to be included by common-user/safe-syscall.S + * + * Copyright (C) 2015 Timothy Edward Baldwin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + + .global safe_syscall_base + .global safe_syscall_start + .global safe_syscall_end + .type safe_syscall_base, @function + + /* This is the entry point for making a system call. The calling + * convention here is that of a C varargs function with the + * first argument an 'int *' to the signal_pending flag, the + * second one the system call number (as a 'long'), and all further + * arguments being syscall arguments (also 'long'). + */ +safe_syscall_base: + .cfi_startproc + /* This saves a frame pointer and aligns the stack for the syscall. + * (It's unclear if the syscall ABI has the same stack alignment + * requirements as the userspace function call ABI, but better safe than + * sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf + * does not list any ABI differences regarding stack alignment.) + */ + push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset rbp, 0 + + /* + * The syscall calling convention isn't the same as the C one: + * we enter with rdi == &signal_pending + * rsi == syscall number + * rdx, rcx, r8, r9, (stack), (stack) == syscall arguments + * and return the result in rax + * and the syscall instruction needs + * rax == syscall number + * rdi, rsi, rdx, r10, r8, r9 == syscall arguments + * and returns the result in rax + * Shuffle everything around appropriately. + * Note that syscall will trash rcx and r11. + */ + mov %rsi, %rax /* syscall number */ + mov %rdi, %rbp /* signal_pending pointer */ + /* and the syscall arguments */ + mov %rdx, %rdi + mov %rcx, %rsi + mov %r8, %rdx + mov %r9, %r10 + mov 16(%rsp), %r8 + mov 24(%rsp), %r9 + + /* This next sequence of code works in conjunction with the + * rewind_if_safe_syscall_function(). If a signal is taken + * and the interrupted PC is anywhere between 'safe_syscall_start' + * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. + * The code sequence must therefore be able to cope with this, and + * the syscall instruction must be the final one in the sequence. + */ +safe_syscall_start: + /* if signal_pending is non-zero, don't do the call */ + cmpl $0, (%rbp) + jnz 2f + syscall +safe_syscall_end: + /* code path for having successfully executed the syscall */ + cmp $-4095, %rax + jae 0f + pop %rbp + .cfi_remember_state + .cfi_def_cfa_offset 8 + .cfi_restore rbp + ret + .cfi_restore_state + +0: neg %eax + jmp 1f + + /* code path when we didn't execute the syscall */ +2: mov $QEMU_ERESTARTSYS, %eax + + /* code path setting errno */ +1: pop %rbp + .cfi_def_cfa_offset 8 + .cfi_restore rbp + jmp safe_syscall_set_errno_tail + .cfi_endproc + + .size safe_syscall_base, .-safe_syscall_base diff --git a/common-user/meson.build b/common-user/meson.build new file mode 100644 index 0000000000..5cb42bc664 --- /dev/null +++ b/common-user/meson.build @@ -0,0 +1,6 @@ +common_user_inc += include_directories('host/' / host_arch) + +common_user_ss.add(files( + 'safe-syscall.S', + 'safe-syscall-error.c', +)) diff --git a/common-user/safe-syscall-error.c b/common-user/safe-syscall-error.c new file mode 100644 index 0000000000..cf74b504f8 --- /dev/null +++ b/common-user/safe-syscall-error.c @@ -0,0 +1,25 @@ +/* + * safe-syscall-error.c: errno setting fragment + * This is intended to be invoked by safe-syscall.S + * + * Written by Richard Henderson + * Copyright (C) 2021 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "user/safe-syscall.h" + +/* + * This is intended to be invoked via tail-call on the error path + * from the assembly in host/arch/safe-syscall.inc.S. This takes + * care of the host specific addressing of errno. + * Return -1 to finalize the return value for safe_syscall_base. + */ +long safe_syscall_set_errno_tail(int value) +{ + errno = value; + return -1; +} diff --git a/common-user/safe-syscall.S b/common-user/safe-syscall.S new file mode 100644 index 0000000000..74f7e35694 --- /dev/null +++ b/common-user/safe-syscall.S @@ -0,0 +1,27 @@ +/* + * safe-syscall.S : include the host-specific assembly fragment + * to handle signals occurring at the same time as system calls. + * + * Written by Peter Maydell + * + * Copyright (C) 2016 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "special-errno.h" + +/* We have the correct host directory on our include path + * so that this will pull in the right fragment for the architecture. + */ +#include "safe-syscall.inc.S" + +/* We must specifically say that we're happy for the stack to not be + * executable, otherwise the toolchain will default to assuming our + * assembly needs an executable stack and the whole QEMU binary will + * needlessly end up with one. This should be the last thing in this file. + */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/include/user/safe-syscall.h b/include/user/safe-syscall.h new file mode 100644 index 0000000000..61a04e2b5a --- /dev/null +++ b/include/user/safe-syscall.h @@ -0,0 +1,140 @@ +/* + * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef LINUX_USER_SAFE_SYSCALL_H +#define LINUX_USER_SAFE_SYSCALL_H + +/** + * safe_syscall: + * @int number: number of system call to make + * ...: arguments to the system call + * + * Call a system call if guest signal not pending. + * This has the same API as the libc syscall() function, except that it + * may return -1 with errno == QEMU_ERESTARTSYS if a signal was pending. + * + * Returns: the system call result, or -1 with an error code in errno + * (Errnos are host errnos; we rely on QEMU_ERESTARTSYS not clashing + * with any of the host errno values.) + */ + +/* + * A guide to using safe_syscall() to handle interactions between guest + * syscalls and guest signals: + * + * Guest syscalls come in two flavours: + * + * (1) Non-interruptible syscalls + * + * These are guest syscalls that never get interrupted by signals and + * so never return EINTR. They can be implemented straightforwardly in + * QEMU: just make sure that if the implementation code has to make any + * blocking calls that those calls are retried if they return EINTR. + * It's also OK to implement these with safe_syscall, though it will be + * a little less efficient if a signal is delivered at the 'wrong' moment. + * + * Some non-interruptible syscalls need to be handled using block_signals() + * to block signals for the duration of the syscall. This mainly applies + * to code which needs to modify the data structures used by the + * host_signal_handler() function and the functions it calls, including + * all syscalls which change the thread's signal mask. + * + * (2) Interruptible syscalls + * + * These are guest syscalls that can be interrupted by signals and + * for which we need to either return EINTR or arrange for the guest + * syscall to be restarted. This category includes both syscalls which + * always restart (and in the kernel return -ERESTARTNOINTR), ones + * which only restart if there is no handler (kernel returns -ERESTARTNOHAND + * or -ERESTART_RESTARTBLOCK), and the most common kind which restart + * if the handler was registered with SA_RESTART (kernel returns + * -ERESTARTSYS). System calls which are only interruptible in some + * situations (like 'open') also need to be handled this way. + * + * Here it is important that the host syscall is made + * via this safe_syscall() function, and *not* via the host libc. + * If the host libc is used then the implementation will appear to work + * most of the time, but there will be a race condition where a + * signal could arrive just before we make the host syscall inside libc, + * and then then guest syscall will not correctly be interrupted. + * Instead the implementation of the guest syscall can use the safe_syscall + * function but otherwise just return the result or errno in the usual + * way; the main loop code will take care of restarting the syscall + * if appropriate. + * + * (If the implementation needs to make multiple host syscalls this is + * OK; any which might really block must be via safe_syscall(); for those + * which are only technically blocking (ie which we know in practice won't + * stay in the host kernel indefinitely) it's OK to use libc if necessary. + * You must be able to cope with backing out correctly if some safe_syscall + * you make in the implementation returns either -QEMU_ERESTARTSYS or + * EINTR though.) + * + * block_signals() cannot be used for interruptible syscalls. + * + * + * How and why the safe_syscall implementation works: + * + * The basic setup is that we make the host syscall via a known + * section of host native assembly. If a signal occurs, our signal + * handler checks the interrupted host PC against the addresse of that + * known section. If the PC is before or at the address of the syscall + * instruction then we change the PC to point at a "return + * -QEMU_ERESTARTSYS" code path instead, and then exit the signal handler + * (causing the safe_syscall() call to immediately return that value). + * Then in the main.c loop if we see this magic return value we adjust + * the guest PC to wind it back to before the system call, and invoke + * the guest signal handler as usual. + * + * This winding-back will happen in two cases: + * (1) signal came in just before we took the host syscall (a race); + * in this case we'll take the guest signal and have another go + * at the syscall afterwards, and this is indistinguishable for the + * guest from the timing having been different such that the guest + * signal really did win the race + * (2) signal came in while the host syscall was blocking, and the + * host kernel decided the syscall should be restarted; + * in this case we want to restart the guest syscall also, and so + * rewinding is the right thing. (Note that "restart" semantics mean + * "first call the signal handler, then reattempt the syscall".) + * The other situation to consider is when a signal came in while the + * host syscall was blocking, and the host kernel decided that the syscall + * should not be restarted; in this case QEMU's host signal handler will + * be invoked with the PC pointing just after the syscall instruction, + * with registers indicating an EINTR return; the special code in the + * handler will not kick in, and we will return EINTR to the guest as + * we should. + * + * Notice that we can leave the host kernel to make the decision for + * us about whether to do a restart of the syscall or not; we do not + * need to check SA_RESTART flags in QEMU or distinguish the various + * kinds of restartability. + */ + +/* The core part of this function is implemented in assembly */ +extern long safe_syscall_base(int *pending, long number, ...); +extern long safe_syscall_set_errno_tail(int value); + +/* These are defined by the safe-syscall.inc.S file */ +extern char safe_syscall_start[]; +extern char safe_syscall_end[]; + +#define safe_syscall(...) \ + safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \ + __VA_ARGS__) + +#endif diff --git a/linux-user/host/aarch64/safe-syscall.inc.S b/linux-user/host/aarch64/safe-syscall.inc.S deleted file mode 100644 index 87c9580faa..0000000000 --- a/linux-user/host/aarch64/safe-syscall.inc.S +++ /dev/null @@ -1,76 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2016 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, #function - .type safe_syscall_start, #function - .type safe_syscall_end, #function - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - /* The syscall calling convention isn't the same as the - * C one: - * we enter with x0 == &signal_pending - * x1 == syscall number - * x2 ... x7, (stack) == syscall arguments - * and return the result in x0 - * and the syscall instruction needs - * x8 == syscall number - * x0 ... x6 == syscall arguments - * and returns the result in x0 - * Shuffle everything around appropriately. - */ - mov x9, x0 /* signal_pending pointer */ - mov x8, x1 /* syscall number */ - mov x0, x2 /* syscall arguments */ - mov x1, x3 - mov x2, x4 - mov x3, x5 - mov x4, x6 - mov x5, x7 - ldr x6, [sp] - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - ldr w10, [x9] - cbnz w10, 2f - svc 0x0 -safe_syscall_end: - /* code path for having successfully executed the syscall */ - cmp x0, #-4096 - b.hi 0f - ret - - /* code path setting errno */ -0: neg w0, w0 - b safe_syscall_set_errno_tail - - /* code path when we didn't execute the syscall */ -2: mov w0, #QEMU_ERESTARTSYS - b safe_syscall_set_errno_tail - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/arm/safe-syscall.inc.S b/linux-user/host/arm/safe-syscall.inc.S deleted file mode 100644 index f1a6aabfd3..0000000000 --- a/linux-user/host/arm/safe-syscall.inc.S +++ /dev/null @@ -1,99 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2016 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, %function - - .cfi_sections .debug_frame - - .text - .syntax unified - .arm - .align 2 - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .fnstart - .cfi_startproc - mov r12, sp /* save entry stack */ - push { r4, r5, r6, r7, r8, lr } - .save { r4, r5, r6, r7, r8, lr } - .cfi_adjust_cfa_offset 24 - .cfi_rel_offset r4, 0 - .cfi_rel_offset r5, 4 - .cfi_rel_offset r6, 8 - .cfi_rel_offset r7, 12 - .cfi_rel_offset r8, 16 - .cfi_rel_offset lr, 20 - - /* The syscall calling convention isn't the same as the C one: - * we enter with r0 == &signal_pending - * r1 == syscall number - * r2, r3, [sp+0] ... [sp+12] == syscall arguments - * and return the result in r0 - * and the syscall instruction needs - * r7 == syscall number - * r0 ... r6 == syscall arguments - * and returns the result in r0 - * Shuffle everything around appropriately. - * Note the 16 bytes that we pushed to save registers. - */ - mov r8, r0 /* copy signal_pending */ - mov r7, r1 /* syscall number */ - mov r0, r2 /* syscall args */ - mov r1, r3 - ldm r12, { r2, r3, r4, r5, r6 } - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - ldr r12, [r8] /* signal_pending */ - tst r12, r12 - bne 2f - swi 0 -safe_syscall_end: - /* code path for having successfully executed the syscall */ - cmp r0, #-4096 - neghi r0, r0 - bhi 1f - pop { r4, r5, r6, r7, r8, pc } - - /* code path when we didn't execute the syscall */ -2: mov r0, #QEMU_ERESTARTSYS - - /* code path setting errno */ -1: pop { r4, r5, r6, r7, r8, lr } - .cfi_adjust_cfa_offset -24 - .cfi_restore r4 - .cfi_restore r5 - .cfi_restore r6 - .cfi_restore r7 - .cfi_restore r8 - .cfi_restore lr - b safe_syscall_set_errno_tail - - .fnend - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/i386/safe-syscall.inc.S b/linux-user/host/i386/safe-syscall.inc.S deleted file mode 100644 index 1fb031d228..0000000000 --- a/linux-user/host/i386/safe-syscall.inc.S +++ /dev/null @@ -1,115 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2016 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - push %ebp - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset ebp, 0 - push %esi - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset esi, 0 - push %edi - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset edi, 0 - push %ebx - .cfi_adjust_cfa_offset 4 - .cfi_rel_offset ebx, 0 - - /* The syscall calling convention isn't the same as the C one: - * we enter with 0(%esp) == return address - * 4(%esp) == &signal_pending - * 8(%esp) == syscall number - * 12(%esp) ... 32(%esp) == syscall arguments - * and return the result in eax - * and the syscall instruction needs - * eax == syscall number - * ebx, ecx, edx, esi, edi, ebp == syscall arguments - * and returns the result in eax - * Shuffle everything around appropriately. - * Note the 16 bytes that we pushed to save registers. - */ - mov 12+16(%esp), %ebx /* the syscall arguments */ - mov 16+16(%esp), %ecx - mov 20+16(%esp), %edx - mov 24+16(%esp), %esi - mov 28+16(%esp), %edi - mov 32+16(%esp), %ebp - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - mov 4+16(%esp), %eax /* signal_pending */ - cmpl $0, (%eax) - jnz 2f - mov 8+16(%esp), %eax /* syscall number */ - int $0x80 -safe_syscall_end: - /* code path for having successfully executed the syscall */ - cmp $-4095, %eax - jae 0f - pop %ebx - .cfi_remember_state - .cfi_adjust_cfa_offset -4 - .cfi_restore ebx - pop %edi - .cfi_adjust_cfa_offset -4 - .cfi_restore edi - pop %esi - .cfi_adjust_cfa_offset -4 - .cfi_restore esi - pop %ebp - .cfi_adjust_cfa_offset -4 - .cfi_restore ebp - ret - .cfi_restore_state - -0: neg %eax - jmp 1f - - /* code path when we didn't execute the syscall */ -2: mov $QEMU_ERESTARTSYS, %eax - - /* code path setting errno */ -1: pop %ebx - .cfi_adjust_cfa_offset -4 - .cfi_restore ebx - pop %edi - .cfi_adjust_cfa_offset -4 - .cfi_restore edi - pop %esi - .cfi_adjust_cfa_offset -4 - .cfi_restore esi - pop %ebp - .cfi_adjust_cfa_offset -4 - .cfi_restore ebp - jmp safe_syscall_set_errno_tail - - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/mips/safe-syscall.inc.S b/linux-user/host/mips/safe-syscall.inc.S deleted file mode 100644 index e9362e774d..0000000000 --- a/linux-user/host/mips/safe-syscall.inc.S +++ /dev/null @@ -1,148 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2021 Linaro, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "sys/regdef.h" -#include "sys/asm.h" - - .text - .set nomips16 - .set reorder - - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_start, @function - .type safe_syscall_end, @function - - /* - * This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ - -#if _MIPS_SIM == _ABIO32 -/* 8 * 4 = 32 for outgoing parameters; 1 * 4 for s0 save; 1 * 4 for align. */ -#define FRAME 40 -#define OFS_S0 32 -#else -/* 1 * 8 for s0 save; 1 * 8 for align. */ -#define FRAME 16 -#define OFS_S0 0 -#endif - - -NESTED(safe_syscall_base, FRAME, ra) - .cfi_startproc - PTR_ADDIU sp, sp, -FRAME - .cfi_adjust_cfa_offset FRAME - REG_S s0, OFS_S0(sp) - .cfi_rel_offset s0, OFS_S0 -#if _MIPS_SIM == _ABIO32 - /* - * The syscall calling convention is nearly the same as C: - * we enter with a0 == &signal_pending - * a1 == syscall number - * a2, a3, stack == syscall arguments - * and return the result in a0 - * and the syscall instruction needs - * v0 == syscall number - * a0 ... a3, stack == syscall arguments - * and returns the result in v0 - * Shuffle everything around appropriately. - */ - move s0, a0 /* signal_pending pointer */ - move v0, a1 /* syscall number */ - move a0, a2 /* syscall arguments */ - move a1, a3 - lw a2, FRAME+16(sp) - lw a3, FRAME+20(sp) - lw t4, FRAME+24(sp) - lw t5, FRAME+28(sp) - lw t6, FRAME+32(sp) - lw t7, FRAME+40(sp) - sw t4, 16(sp) - sw t5, 20(sp) - sw t6, 24(sp) - sw t7, 28(sp) -#else - /* - * The syscall calling convention is nearly the same as C: - * we enter with a0 == &signal_pending - * a1 == syscall number - * a2 ... a7 == syscall arguments - * and return the result in a0 - * and the syscall instruction needs - * v0 == syscall number - * a0 ... a5 == syscall arguments - * and returns the result in v0 - * Shuffle everything around appropriately. - */ - move s0, a0 /* signal_pending pointer */ - move v0, a1 /* syscall number */ - move a0, a2 /* syscall arguments */ - move a1, a3 - move a2, a4 - move a3, a5 - move a4, a6 - move a5, a7 -#endif - - /* - * This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* If signal_pending is non-zero, don't do the call */ - lw t1, 0(s0) - bnez t1, 2f - syscall -safe_syscall_end: - - /* code path for having successfully executed the syscall */ - REG_L s0, OFS_S0(sp) - PTR_ADDIU sp, sp, FRAME - .cfi_remember_state - .cfi_adjust_cfa_offset -FRAME - .cfi_restore s0 - bnez a3, 1f - jr ra - .cfi_restore_state - - /* code path when we didn't execute the syscall */ -2: REG_L s0, OFS_S0(sp) - PTR_ADDIU sp, sp, FRAME - .cfi_adjust_cfa_offset -FRAME - .cfi_restore s0 - li v0, QEMU_ERESTARTSYS - - /* code path setting errno */ - /* - * We didn't setup GP on entry, optimistic of the syscall success. - * We must do so now to load the address of the helper, as required - * by the ABI, into t9. - * - * Note that SETUP_GPX and SETUP_GPX64 are themselves conditional, - * so we can simply let the one that's not empty succeed. - */ -1: USE_ALT_CP(t0) - SETUP_GPX(t1) - SETUP_GPX64(t0, t1) - PTR_LA t9, safe_syscall_set_errno_tail - jr t9 - - .cfi_endproc -END(safe_syscall_base) diff --git a/linux-user/host/ppc64/safe-syscall.inc.S b/linux-user/host/ppc64/safe-syscall.inc.S deleted file mode 100644 index 69d3c70094..0000000000 --- a/linux-user/host/ppc64/safe-syscall.inc.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2016 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - - .text - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -#if _CALL_ELF == 2 -safe_syscall_base: - .cfi_startproc - .localentry safe_syscall_base,0 -#else - .section ".opd","aw" - .align 3 -safe_syscall_base: - .quad .L.safe_syscall_base,.TOC.@tocbase,0 - .previous -.L.safe_syscall_base: - .cfi_startproc -#endif - /* We enter with r3 == &signal_pending - * r4 == syscall number - * r5 ... r10 == syscall arguments - * and return the result in r3 - * and the syscall instruction needs - * r0 == syscall number - * r3 ... r8 == syscall arguments - * and returns the result in r3 - * Shuffle everything around appropriately. - */ - std 14, 16(1) /* Preserve r14 in SP+16 */ - .cfi_offset 14, 16 - mr 14, 3 /* signal_pending */ - mr 0, 4 /* syscall number */ - mr 3, 5 /* syscall arguments */ - mr 4, 6 - mr 5, 7 - mr 6, 8 - mr 7, 9 - mr 8, 10 - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - lwz 12, 0(14) - cmpwi 0, 12, 0 - bne- 2f - sc -safe_syscall_end: - /* code path when we did execute the syscall */ - ld 14, 16(1) /* restore r14 */ - bso- 1f - blr - - /* code path when we didn't execute the syscall */ -2: ld 14, 16(1) /* restore r14 */ - addi 3, 0, QEMU_ERESTARTSYS - - /* code path setting errno */ -1: b safe_syscall_set_errno_tail - nop /* per abi, for the linker to modify */ - - .cfi_endproc - -#if _CALL_ELF == 2 - .size safe_syscall_base, .-safe_syscall_base -#else - .size safe_syscall_base, .-.L.safe_syscall_base - .size .L.safe_syscall_base, .-.L.safe_syscall_base -#endif diff --git a/linux-user/host/riscv/safe-syscall.inc.S b/linux-user/host/riscv/safe-syscall.inc.S deleted file mode 100644 index ca456d8a46..0000000000 --- a/linux-user/host/riscv/safe-syscall.inc.S +++ /dev/null @@ -1,79 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2018 Linaro, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - .type safe_syscall_start, @function - .type safe_syscall_end, @function - - /* - * This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - /* - * The syscall calling convention is nearly the same as C: - * we enter with a0 == &signal_pending - * a1 == syscall number - * a2 ... a7 == syscall arguments - * and return the result in a0 - * and the syscall instruction needs - * a7 == syscall number - * a0 ... a5 == syscall arguments - * and returns the result in a0 - * Shuffle everything around appropriately. - */ - mv t0, a0 /* signal_pending pointer */ - mv t1, a1 /* syscall number */ - mv a0, a2 /* syscall arguments */ - mv a1, a3 - mv a2, a4 - mv a3, a5 - mv a4, a6 - mv a5, a7 - mv a7, t1 - - /* - * This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* If signal_pending is non-zero, don't do the call */ - lw t1, 0(t0) - bnez t1, 2f - scall -safe_syscall_end: - /* code path for having successfully executed the syscall */ - li t2, -4096 - bgtu a0, t2, 0f - ret - - /* code path setting errno */ -0: neg a0, a0 - j safe_syscall_set_errno_tail - - /* code path when we didn't execute the syscall */ -2: li a0, QEMU_ERESTARTSYS - j safe_syscall_set_errno_tail - - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/s390x/safe-syscall.inc.S b/linux-user/host/s390x/safe-syscall.inc.S deleted file mode 100644 index 66f84385a2..0000000000 --- a/linux-user/host/s390x/safe-syscall.inc.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2016 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - stmg %r6,%r15,48(%r15) /* save all call-saved registers */ - .cfi_offset %r15,-40 - .cfi_offset %r14,-48 - .cfi_offset %r13,-56 - .cfi_offset %r12,-64 - .cfi_offset %r11,-72 - .cfi_offset %r10,-80 - .cfi_offset %r9,-88 - .cfi_offset %r8,-96 - .cfi_offset %r7,-104 - .cfi_offset %r6,-112 - lgr %r1,%r15 - lg %r0,8(%r15) /* load eos */ - aghi %r15,-160 - .cfi_adjust_cfa_offset 160 - stg %r1,0(%r15) /* store back chain */ - stg %r0,8(%r15) /* store eos */ - - /* - * The syscall calling convention isn't the same as the C one: - * we enter with r2 == &signal_pending - * r3 == syscall number - * r4, r5, r6, (stack) == syscall arguments - * and return the result in r2 - * and the syscall instruction needs - * r1 == syscall number - * r2 ... r7 == syscall arguments - * and returns the result in r2 - * Shuffle everything around appropriately. - */ - lgr %r8,%r2 /* signal_pending pointer */ - lgr %r1,%r3 /* syscall number */ - lgr %r2,%r4 /* syscall args */ - lgr %r3,%r5 - lgr %r4,%r6 - lmg %r5,%r7,320(%r15) - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - icm %r0,15,0(%r8) - jne 2f - svc 0 -safe_syscall_end: - - /* code path for having successfully executed the syscall */ - lg %r15,0(%r15) /* load back chain */ - .cfi_remember_state - .cfi_adjust_cfa_offset -160 - lmg %r6,%r15,48(%r15) /* load saved registers */ - - lghi %r0, -4095 /* check for syscall error */ - clgr %r2, %r0 - blr %r14 /* return on success */ - lcr %r2, %r2 /* create positive errno */ - jg safe_syscall_set_errno_tail - .cfi_restore_state - - /* code path when we didn't execute the syscall */ -2: lg %r15,0(%r15) /* load back chain */ - .cfi_adjust_cfa_offset -160 - lmg %r6,%r15,48(%r15) /* load saved registers */ - lghi %r2, QEMU_ERESTARTSYS - jg safe_syscall_set_errno_tail - - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/sparc64/safe-syscall.inc.S b/linux-user/host/sparc64/safe-syscall.inc.S deleted file mode 100644 index f4b3c0f9ae..0000000000 --- a/linux-user/host/sparc64/safe-syscall.inc.S +++ /dev/null @@ -1,89 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2021 Linaro, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .text - .balign 4 - - .register %g2, #scratch - .register %g3, #scratch - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - .type safe_syscall_start, @function - .type safe_syscall_end, @function - -#define STACK_BIAS 2047 -#define PARAM(N) STACK_BIAS + N*8 - - /* - * This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - /* - * The syscall calling convention isn't the same as the C one: - * we enter with o0 == &signal_pending - * o1 == syscall number - * o2 ... o5, (stack) == syscall arguments - * and return the result in x0 - * and the syscall instruction needs - * g1 == syscall number - * o0 ... o5 == syscall arguments - * and returns the result in o0 - * Shuffle everything around appropriately. - */ - mov %o0, %g2 /* signal_pending pointer */ - mov %o1, %g1 /* syscall number */ - mov %o2, %o0 /* syscall arguments */ - mov %o3, %o1 - mov %o4, %o2 - mov %o5, %o3 - ldx [%sp + PARAM(6)], %o4 - ldx [%sp + PARAM(7)], %o5 - - /* - * This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - lduw [%g2], %g3 - brnz,pn %g3, 2f - nop - ta 0x6d -safe_syscall_end: - /* code path for having successfully executed the syscall */ - bcs,pn %xcc, 1f - nop - ret - nop - - /* code path when we didn't execute the syscall */ -2: set QEMU_ERESTARTSYS, %o0 - - /* code path setting errno */ -1: mov %o7, %g1 - call safe_syscall_set_errno_tail - mov %g1, %o7 - - .cfi_endproc - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/host/x86_64/safe-syscall.inc.S b/linux-user/host/x86_64/safe-syscall.inc.S deleted file mode 100644 index f88cbe1347..0000000000 --- a/linux-user/host/x86_64/safe-syscall.inc.S +++ /dev/null @@ -1,94 +0,0 @@ -/* - * safe-syscall.inc.S : host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * This is intended to be included by linux-user/safe-syscall.S - * - * Copyright (C) 2015 Timothy Edward Baldwin - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - - .global safe_syscall_base - .global safe_syscall_start - .global safe_syscall_end - .type safe_syscall_base, @function - - /* This is the entry point for making a system call. The calling - * convention here is that of a C varargs function with the - * first argument an 'int *' to the signal_pending flag, the - * second one the system call number (as a 'long'), and all further - * arguments being syscall arguments (also 'long'). - */ -safe_syscall_base: - .cfi_startproc - /* This saves a frame pointer and aligns the stack for the syscall. - * (It's unclear if the syscall ABI has the same stack alignment - * requirements as the userspace function call ABI, but better safe than - * sorry. Appendix A2 of http://www.x86-64.org/documentation/abi.pdf - * does not list any ABI differences regarding stack alignment.) - */ - push %rbp - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset rbp, 0 - - /* - * The syscall calling convention isn't the same as the C one: - * we enter with rdi == &signal_pending - * rsi == syscall number - * rdx, rcx, r8, r9, (stack), (stack) == syscall arguments - * and return the result in rax - * and the syscall instruction needs - * rax == syscall number - * rdi, rsi, rdx, r10, r8, r9 == syscall arguments - * and returns the result in rax - * Shuffle everything around appropriately. - * Note that syscall will trash rcx and r11. - */ - mov %rsi, %rax /* syscall number */ - mov %rdi, %rbp /* signal_pending pointer */ - /* and the syscall arguments */ - mov %rdx, %rdi - mov %rcx, %rsi - mov %r8, %rdx - mov %r9, %r10 - mov 16(%rsp), %r8 - mov 24(%rsp), %r9 - - /* This next sequence of code works in conjunction with the - * rewind_if_safe_syscall_function(). If a signal is taken - * and the interrupted PC is anywhere between 'safe_syscall_start' - * and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'. - * The code sequence must therefore be able to cope with this, and - * the syscall instruction must be the final one in the sequence. - */ -safe_syscall_start: - /* if signal_pending is non-zero, don't do the call */ - cmpl $0, (%rbp) - jnz 2f - syscall -safe_syscall_end: - /* code path for having successfully executed the syscall */ - cmp $-4095, %rax - jae 0f - pop %rbp - .cfi_remember_state - .cfi_def_cfa_offset 8 - .cfi_restore rbp - ret - .cfi_restore_state - -0: neg %eax - jmp 1f - - /* code path when we didn't execute the syscall */ -2: mov $QEMU_ERESTARTSYS, %eax - - /* code path setting errno */ -1: pop %rbp - .cfi_def_cfa_offset 8 - .cfi_restore rbp - jmp safe_syscall_set_errno_tail - .cfi_endproc - - .size safe_syscall_base, .-safe_syscall_base diff --git a/linux-user/meson.build b/linux-user/meson.build index 94ac3c58ce..eef1dd68bd 100644 --- a/linux-user/meson.build +++ b/linux-user/meson.build @@ -2,6 +2,9 @@ if not have_linux_user subdir_done() endif +common_user_inc += include_directories('host/' / host_arch) +common_user_inc += include_directories('.') + linux_user_ss.add(files( 'elfload.c', 'exit.c', @@ -9,8 +12,6 @@ linux_user_ss.add(files( 'linuxload.c', 'main.c', 'mmap.c', - 'safe-syscall.S', - 'safe-syscall-error.c', 'signal.c', 'strace.c', 'syscall.c', diff --git a/linux-user/safe-syscall-error.c b/linux-user/safe-syscall-error.c deleted file mode 100644 index 55d95ac39a..0000000000 --- a/linux-user/safe-syscall-error.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * safe-syscall-error.c: errno setting fragment - * This is intended to be invoked by safe-syscall.S - * - * Written by Richard Henderson - * Copyright (C) 2021 Red Hat, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "safe-syscall.h" - -/* - * This is intended to be invoked via tail-call on the error path - * from the assembly in host/arch/safe-syscall.inc.S. This takes - * care of the host specific addressing of errno. - * Return -1 to finalize the return value for safe_syscall_base. - */ -long safe_syscall_set_errno_tail(int value) -{ - errno = value; - return -1; -} diff --git a/linux-user/safe-syscall.S b/linux-user/safe-syscall.S deleted file mode 100644 index 74f7e35694..0000000000 --- a/linux-user/safe-syscall.S +++ /dev/null @@ -1,27 +0,0 @@ -/* - * safe-syscall.S : include the host-specific assembly fragment - * to handle signals occurring at the same time as system calls. - * - * Written by Peter Maydell - * - * Copyright (C) 2016 Linaro Limited - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "special-errno.h" - -/* We have the correct host directory on our include path - * so that this will pull in the right fragment for the architecture. - */ -#include "safe-syscall.inc.S" - -/* We must specifically say that we're happy for the stack to not be - * executable, otherwise the toolchain will default to assuming our - * assembly needs an executable stack and the whole QEMU binary will - * needlessly end up with one. This should be the last thing in this file. - */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack, "", %progbits -#endif diff --git a/linux-user/safe-syscall.h b/linux-user/safe-syscall.h deleted file mode 100644 index 61a04e2b5a..0000000000 --- a/linux-user/safe-syscall.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * safe-syscall.h: prototypes for linux-user signal-race-safe syscalls - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef LINUX_USER_SAFE_SYSCALL_H -#define LINUX_USER_SAFE_SYSCALL_H - -/** - * safe_syscall: - * @int number: number of system call to make - * ...: arguments to the system call - * - * Call a system call if guest signal not pending. - * This has the same API as the libc syscall() function, except that it - * may return -1 with errno == QEMU_ERESTARTSYS if a signal was pending. - * - * Returns: the system call result, or -1 with an error code in errno - * (Errnos are host errnos; we rely on QEMU_ERESTARTSYS not clashing - * with any of the host errno values.) - */ - -/* - * A guide to using safe_syscall() to handle interactions between guest - * syscalls and guest signals: - * - * Guest syscalls come in two flavours: - * - * (1) Non-interruptible syscalls - * - * These are guest syscalls that never get interrupted by signals and - * so never return EINTR. They can be implemented straightforwardly in - * QEMU: just make sure that if the implementation code has to make any - * blocking calls that those calls are retried if they return EINTR. - * It's also OK to implement these with safe_syscall, though it will be - * a little less efficient if a signal is delivered at the 'wrong' moment. - * - * Some non-interruptible syscalls need to be handled using block_signals() - * to block signals for the duration of the syscall. This mainly applies - * to code which needs to modify the data structures used by the - * host_signal_handler() function and the functions it calls, including - * all syscalls which change the thread's signal mask. - * - * (2) Interruptible syscalls - * - * These are guest syscalls that can be interrupted by signals and - * for which we need to either return EINTR or arrange for the guest - * syscall to be restarted. This category includes both syscalls which - * always restart (and in the kernel return -ERESTARTNOINTR), ones - * which only restart if there is no handler (kernel returns -ERESTARTNOHAND - * or -ERESTART_RESTARTBLOCK), and the most common kind which restart - * if the handler was registered with SA_RESTART (kernel returns - * -ERESTARTSYS). System calls which are only interruptible in some - * situations (like 'open') also need to be handled this way. - * - * Here it is important that the host syscall is made - * via this safe_syscall() function, and *not* via the host libc. - * If the host libc is used then the implementation will appear to work - * most of the time, but there will be a race condition where a - * signal could arrive just before we make the host syscall inside libc, - * and then then guest syscall will not correctly be interrupted. - * Instead the implementation of the guest syscall can use the safe_syscall - * function but otherwise just return the result or errno in the usual - * way; the main loop code will take care of restarting the syscall - * if appropriate. - * - * (If the implementation needs to make multiple host syscalls this is - * OK; any which might really block must be via safe_syscall(); for those - * which are only technically blocking (ie which we know in practice won't - * stay in the host kernel indefinitely) it's OK to use libc if necessary. - * You must be able to cope with backing out correctly if some safe_syscall - * you make in the implementation returns either -QEMU_ERESTARTSYS or - * EINTR though.) - * - * block_signals() cannot be used for interruptible syscalls. - * - * - * How and why the safe_syscall implementation works: - * - * The basic setup is that we make the host syscall via a known - * section of host native assembly. If a signal occurs, our signal - * handler checks the interrupted host PC against the addresse of that - * known section. If the PC is before or at the address of the syscall - * instruction then we change the PC to point at a "return - * -QEMU_ERESTARTSYS" code path instead, and then exit the signal handler - * (causing the safe_syscall() call to immediately return that value). - * Then in the main.c loop if we see this magic return value we adjust - * the guest PC to wind it back to before the system call, and invoke - * the guest signal handler as usual. - * - * This winding-back will happen in two cases: - * (1) signal came in just before we took the host syscall (a race); - * in this case we'll take the guest signal and have another go - * at the syscall afterwards, and this is indistinguishable for the - * guest from the timing having been different such that the guest - * signal really did win the race - * (2) signal came in while the host syscall was blocking, and the - * host kernel decided the syscall should be restarted; - * in this case we want to restart the guest syscall also, and so - * rewinding is the right thing. (Note that "restart" semantics mean - * "first call the signal handler, then reattempt the syscall".) - * The other situation to consider is when a signal came in while the - * host syscall was blocking, and the host kernel decided that the syscall - * should not be restarted; in this case QEMU's host signal handler will - * be invoked with the PC pointing just after the syscall instruction, - * with registers indicating an EINTR return; the special code in the - * handler will not kick in, and we will return EINTR to the guest as - * we should. - * - * Notice that we can leave the host kernel to make the decision for - * us about whether to do a restart of the syscall or not; we do not - * need to check SA_RESTART flags in QEMU or distinguish the various - * kinds of restartability. - */ - -/* The core part of this function is implemented in assembly */ -extern long safe_syscall_base(int *pending, long number, ...); -extern long safe_syscall_set_errno_tail(int value); - -/* These are defined by the safe-syscall.inc.S file */ -extern char safe_syscall_start[]; -extern char safe_syscall_end[]; - -#define safe_syscall(...) \ - safe_syscall_base(&((TaskState *)thread_cpu->opaque)->signal_pending, \ - __VA_ARGS__) - -#endif diff --git a/linux-user/signal.c b/linux-user/signal.c index 12b1705287..510db73c34 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -31,7 +31,7 @@ #include "trace.h" #include "signal-common.h" #include "host-signal.h" -#include "safe-syscall.h" +#include "user/safe-syscall.h" static struct target_sigaction sigact_table[TARGET_NSIG]; diff --git a/linux-user/syscall.c b/linux-user/syscall.c index f5bf6d155c..56a3e17183 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -132,7 +132,7 @@ #include "signal-common.h" #include "loader.h" #include "user-mmap.h" -#include "safe-syscall.h" +#include "user/safe-syscall.h" #include "qemu/guest-random.h" #include "qemu/selfmap.h" #include "user/syscall-trace.h" diff --git a/meson.build b/meson.build index 2495360fd0..4e0a8033f7 100644 --- a/meson.build +++ b/meson.build @@ -2378,6 +2378,7 @@ block_ss = ss.source_set() bsd_user_ss = ss.source_set() chardev_ss = ss.source_set() common_ss = ss.source_set() +common_user_ss = ss.source_set() crypto_ss = ss.source_set() hwcore_ss = ss.source_set() io_ss = ss.source_set() @@ -2622,15 +2623,30 @@ subdir('tcg') subdir('fpu') subdir('accel') subdir('plugins') +subdir('ebpf') + +common_user_inc = [] + +subdir('common-user') subdir('bsd-user') subdir('linux-user') -subdir('ebpf') specific_ss.add_all(when: 'CONFIG_BSD_USER', if_true: bsd_user_ss) linux_user_ss.add(files('thunk.c')) specific_ss.add_all(when: 'CONFIG_LINUX_USER', if_true: linux_user_ss) +common_user_ss = common_user_ss.apply(config_all, strict: false) +common_user = static_library('common-user', + sources: common_user_ss.sources(), + dependencies: common_user_ss.dependencies(), + include_directories: common_user_inc, + name_suffix: 'fa', + build_by_default: false) +common_user = declare_dependency(link_with: common_user) + +user_ss.add(common_user) + # needed for fuzzing binaries subdir('tests/qtest/libqos') subdir('tests/qtest/fuzz')