From 8bc7636e6b61e2e75fa83555d75c379355a172f6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:22:12 -0400 Subject: [PATCH 001/104] make DEFAULT_MMAP_MIN_ADDR match LSM_MMAP_MIN_ADDR Signed-off-by: Daniel Micay --- mm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index ab80933be65ff..5012bf12aab69 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -304,7 +304,8 @@ config KSM config DEFAULT_MMAP_MIN_ADDR int "Low address space to protect from user allocation" depends on MMU - default 4096 + default 32768 if ARM || (ARM64 && COMPAT) + default 65536 help This is the portion of low virtual memory which should be protected from userspace allocation. Keeping a user from writing to low pages From a8488f50a964aecdd4f18057605400d1b7e3a07d Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 06:17:41 -0400 Subject: [PATCH 002/104] enable HARDENED_USERCOPY by default Signed-off-by: Daniel Micay --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 2a1a2d3962281..aa60d6fccf4b6 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -154,6 +154,7 @@ config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" depends on HAVE_HARDENED_USERCOPY_ALLOCATOR imply STRICT_DEVMEM + default y help This option checks for obviously wrong memory regions when copying memory to/from the kernel (via copy_to_user() and From 74df0250994d601cc2445d17a68bea719480a922 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 26 Apr 2018 02:01:26 -0400 Subject: [PATCH 003/104] disable HARDENED_USERCOPY_FALLBACK by default --- security/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index aa60d6fccf4b6..39d5aaaf4ef63 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -167,7 +167,6 @@ config HARDENED_USERCOPY config HARDENED_USERCOPY_FALLBACK bool "Allow usercopy whitelist violations to fallback to object size" depends on HARDENED_USERCOPY - default y help This is a temporary option that allows missing usercopy whitelists to be discovered via a WARN() to the kernel log, instead of From 4eb0d58d990f2d3fa6f50eb04e786506a60e82f0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:05:15 -0400 Subject: [PATCH 004/104] enable SECURITY_DMESG_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index 39d5aaaf4ef63..2204195a6d35c 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -9,7 +9,7 @@ source "security/keys/Kconfig" config SECURITY_DMESG_RESTRICT bool "Restrict unprivileged access to the kernel syslog" - default n + default y help This enforces restrictions on unprivileged users reading the kernel syslog via dmesg(8). From c51ecd0d32fec8d5a50ff8338767d6a294b1ed9e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:06:14 -0400 Subject: [PATCH 005/104] set kptr_restrict=2 by default Signed-off-by: Daniel Micay --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7c488a1ce318c..27e16ab859fe1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -810,7 +810,7 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr, return pointer_string(buf, end, (const void *)hashval, spec); } -int kptr_restrict __read_mostly; +int kptr_restrict __read_mostly = 2; static noinline_for_stack char *restricted_pointer(char *buf, char *end, const void *ptr, From d4adb1e275d12699d2c6af74824b5e302c9c7299 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:10:57 -0400 Subject: [PATCH 006/104] enable DEBUG_LIST by default Signed-off-by: Daniel Micay --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 69def4a9df009..dfcdb89e6bee6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1317,6 +1317,7 @@ menu "Debug kernel data structures" config DEBUG_LIST bool "Debug linked list manipulation" depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION + default y help Enable this to turn on extended checks in the linked-list walking routines. From 1ccc60c7daef4e1e1abe5a6a82189882bd9a3cff Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 12:21:21 -0400 Subject: [PATCH 007/104] enable BUG_ON_DATA_CORRUPTION by default Signed-off-by: Daniel Micay --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dfcdb89e6bee6..3ef42af65a02f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1357,6 +1357,7 @@ config DEBUG_NOTIFIERS config BUG_ON_DATA_CORRUPTION bool "Trigger a BUG when data corruption is detected" select DEBUG_LIST + default y help Select this option if the kernel should BUG when it encounters data corruption in kernel memory structures when they get checked From 1d4df4f6f9536d589135d1fc27cba15219bff83f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 01:39:32 -0500 Subject: [PATCH 008/104] enable ARM64_SW_TTBR0_PAN by default --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b30e884e0889..de22de99cc0a1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1212,6 +1212,7 @@ config RODATA_FULL_DEFAULT_ENABLED config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + default y help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved From 8c8221bab0cad438d60a7a7795d856a78e097cd7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 01:33:48 -0500 Subject: [PATCH 009/104] arm64: enable RANDOMIZE_BASE by default --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index de22de99cc0a1..a5498cb017acd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1641,6 +1641,7 @@ config RANDOMIZE_BASE bool "Randomize the address of the kernel image" select ARM64_MODULE_PLTS if MODULES select RELOCATABLE + default y help Randomizes the virtual address at which the kernel image is loaded, as a security feature that deters exploit attempts From 522a7a833b296f4cba5b80ab1b32f31eb8f0c52a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 19:43:38 -0400 Subject: [PATCH 010/104] enable SLAB_FREELIST_RANDOM by default Signed-off-by: Daniel Micay --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 24b23d843df15..47d0cc0395010 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1830,9 +1830,9 @@ config SLAB_MERGE_DEFAULT command line. config SLAB_FREELIST_RANDOM - default n depends on SLAB || SLUB bool "SLAB freelist randomization" + default y help Randomizes the freelist order used on creating new pages. This security feature reduces the predictability of the kernel slab From e66a9db309085d25193d6e8ecb44741089f0caac Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 20 Aug 2017 15:39:25 -0400 Subject: [PATCH 011/104] enable SLAB_FREELIST_HARDENED by default --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index 47d0cc0395010..222c637cd8b4a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1841,6 +1841,7 @@ config SLAB_FREELIST_RANDOM config SLAB_FREELIST_HARDENED bool "Harden slab freelist metadata" depends on SLUB + default y help Many kernel heap attacks try to target slab cache metadata and other infrastructure. This options makes minor performance From a54f6908803a8069cb68de93ecc076efa6d20fc2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 8 Jul 2017 02:38:54 -0400 Subject: [PATCH 012/104] disable SLAB_MERGE_DEFAULT by default --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 222c637cd8b4a..a7ab59a25da4f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1817,7 +1817,6 @@ endchoice config SLAB_MERGE_DEFAULT bool "Allow slab caches to be merged" - default y help For reduced kernel memory fragmentation, slab caches can be merged when they share the same size and other characteristics. From 6dc663bc41b2b6f2661bce120c76307da9da64ff Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 8 May 2017 12:51:54 -0400 Subject: [PATCH 013/104] enable FORTIFY_SOURCE by default Signed-off-by: Daniel Micay --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 2204195a6d35c..7826255fd85d4 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -191,6 +191,7 @@ config HARDENED_USERCOPY_PAGESPAN config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE + default y help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. From 1b317c4a838668f29bfb2c984855ca76aec6f504 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:09:17 -0400 Subject: [PATCH 014/104] enable PANIC_ON_OOPS by default Signed-off-by: Daniel Micay --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3ef42af65a02f..fdfcfd8d937d0 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -798,6 +798,7 @@ menu "Debug Oops, Lockups and Hangs" config PANIC_ON_OOPS bool "Panic on Oops" + default y help Say Y here to enable the kernel to panic when it oopses. This has the same effect as setting oops=panic on the kernel command @@ -807,7 +808,7 @@ config PANIC_ON_OOPS anything erroneous after an oops which could result in data corruption or other issues. - Say N if unsure. + Say Y if unsure. config PANIC_ON_OOPS_VALUE int From d6c6493166674b6edda8076b78fbc7bacaf944a2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 22:39:34 -0400 Subject: [PATCH 015/104] stop hiding SLUB_DEBUG behind EXPERT It can make sense to disable this to reduce attack surface / complexity. --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index a7ab59a25da4f..a6702faaa2663 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1746,7 +1746,7 @@ config VM_EVENT_COUNTERS config SLUB_DEBUG default y - bool "Enable SLUB debugging support" if EXPERT + bool "Enable SLUB debugging support" depends on SLUB && SYSFS help SLUB has extensive debug support features. Disabling these can From 3970d13c0deff91b01abae4658c7aa513f627419 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:11:31 -0400 Subject: [PATCH 016/104] stop hiding X86_16BIT behind EXPERT --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index beea77046f9ba..81ca2a76241f2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1193,7 +1193,7 @@ config VM86 default X86_LEGACY_VM86 config X86_16BIT - bool "Enable support for 16-bit segments" if EXPERT + bool "Enable support for 16-bit segments" default y depends on MODIFY_LDT_SYSCALL ---help--- From 3945314d5bf1a507dd4093474877af9b18043936 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:11:52 -0400 Subject: [PATCH 017/104] disable X86_16BIT by default Signed-off-by: Daniel Micay --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 81ca2a76241f2..c17dd18abf930 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1194,7 +1194,6 @@ config VM86 config X86_16BIT bool "Enable support for 16-bit segments" - default y depends on MODIFY_LDT_SYSCALL ---help--- This option is required by programs like Wine to run 16-bit From 8ffec639af49cca1f670aafce9e4387f83c6c1d8 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:15:52 -0400 Subject: [PATCH 018/104] stop hiding MODIFY_LDT_SYSCALL behind EXPERT --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c17dd18abf930..fd880ca0452b8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2426,7 +2426,7 @@ config CMDLINE_OVERRIDE be set to 'N' under normal conditions. config MODIFY_LDT_SYSCALL - bool "Enable the LDT (local descriptor table)" if EXPERT + bool "Enable the LDT (local descriptor table)" default y ---help--- Linux can allow user programs to install a per-process x86 From 9d44baa1d8c9b39c3e260c61456d0664df42c49f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:16:16 -0400 Subject: [PATCH 019/104] disable MODIFY_LDT_SYSCALL by default Signed-off-by: Daniel Micay --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fd880ca0452b8..0c80cfcb56d23 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2427,7 +2427,6 @@ config CMDLINE_OVERRIDE config MODIFY_LDT_SYSCALL bool "Enable the LDT (local descriptor table)" - default y ---help--- Linux can allow user programs to install a per-process x86 Local Descriptor Table (LDT) using the modify_ldt(2) system From f108bf6ee413fc83345b4cc9f51b5b9b93fcbf4a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 07:08:42 -0400 Subject: [PATCH 020/104] set LEGACY_VSYSCALL_NONE by default --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0c80cfcb56d23..0d6108b38f053 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2330,7 +2330,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_XONLY + default LEGACY_VSYSCALL_NONE help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in From fd40a8086f0650ef58ee8d89e89a9666bb4bc0d1 Mon Sep 17 00:00:00 2001 From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com> Date: Fri, 6 Oct 2017 10:21:50 +0000 Subject: [PATCH 021/104] stop hiding AIO behind EXPERT --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index a6702faaa2663..009febc477dd5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1514,7 +1514,7 @@ config SHMEM which may be appropriate on small systems without swap. config AIO - bool "Enable AIO support" if EXPERT + bool "Enable AIO support" default y help This option enables POSIX asynchronous I/O which may by used From 7b271c4bf517bddbdebf38b5812c0328e7e8895f Mon Sep 17 00:00:00 2001 From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com> Date: Fri, 6 Oct 2017 10:24:10 +0000 Subject: [PATCH 022/104] disable AIO by default --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 009febc477dd5..2d2fcff3b4815 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1515,7 +1515,6 @@ config SHMEM config AIO bool "Enable AIO support" - default y help This option enables POSIX asynchronous I/O which may by used by some high performance threaded applications. Disabling From 08bb496db8545204954315f3f08370b2856ef9ca Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:08:49 -0500 Subject: [PATCH 023/104] remove SYSVIPC from arm64/x86_64 defconfigs --- arch/arm64/configs/defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index b2f667307f829..1e71ad9063a25 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 0b9654c7a05c2..4fdb04daf3dcc 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_TASKSTATS=y From c01594f33b7b214e89d3af86026486b48b838a11 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:28:10 -0400 Subject: [PATCH 024/104] disable DEVPORT by default --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 26956c0069876..149402bc8bb97 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -514,7 +514,6 @@ config TELCLOCK config DEVPORT bool "/dev/port character device" depends on ISA || PCI - default y help Say Y here if you want to support the /dev/port device. The /dev/port device is similar to /dev/mem, but for I/O ports. From 13736585818ba3deb4e32c0208ed4c71c0e5b674 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:29:45 -0400 Subject: [PATCH 025/104] disable PROC_VMCORE by default --- fs/proc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 27ef84d99f59c..fb27f99a5e666 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -41,7 +41,6 @@ config PROC_KCORE config PROC_VMCORE bool "/proc/vmcore support" depends on PROC_FS && CRASH_DUMP - default y help Exports the dump image of crashed kernel in ELF format. From b74b828a986cc52cc8889c4516c926248e5f0f1c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 28 May 2017 03:03:46 -0400 Subject: [PATCH 026/104] disable NFS_DEBUG by default --- fs/nfs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 295a7a21b7744..3aed361bc0f90 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -195,4 +195,3 @@ config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG select CRC32 - default y From 3014c2f8c98b9667983be5e625794485fd57cacf Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 12:11:11 -0400 Subject: [PATCH 027/104] enable DEBUG_WX by default Signed-off-by: Daniel Micay --- arch/arm64/Kconfig.debug | 1 + arch/x86/Kconfig.debug | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1c906d932d6bc..d8b5cdb456822 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -26,6 +26,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET config DEBUG_WX bool "Warn on W+X mappings at boot" select PTDUMP_CORE + default y ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e74690b028a5..87c7294dd172f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -75,6 +75,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" select PTDUMP_CORE + default y ---help--- Generate a warning if any W+X mappings are found at boot. From d2c3ec49daa833ccaf1f6b4a04ac9a57c877bab2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 13:21:16 -0500 Subject: [PATCH 028/104] disable LEGACY_PTYS by default --- drivers/tty/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index a312cb33a99b7..b141c96e955d4 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -122,7 +122,6 @@ config UNIX98_PTYS config LEGACY_PTYS bool "Legacy (BSD) PTY support" - default y ---help--- A pseudo terminal (PTY) is a software device consisting of two halves: a master and a slave. The slave device behaves identical to From a86572447e9e1b9c6137d3e43032742055ee8b56 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 12:41:42 -0500 Subject: [PATCH 029/104] disable DEVMEM by default --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 149402bc8bb97..206edc0b60a9e 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -9,7 +9,6 @@ source "drivers/tty/Kconfig" config DEVMEM bool "/dev/mem virtual device support" - default y help Say Y here if you want to support the /dev/mem device. The /dev/mem device is used to access areas of physical From aaa455756e32e90add1a3bea48ae6f13226e8370 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 12:43:49 -0500 Subject: [PATCH 030/104] enable IO_STRICT_DEVMEM by default --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fdfcfd8d937d0..e8f4916204b83 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1514,6 +1514,7 @@ config STRICT_DEVMEM config IO_STRICT_DEVMEM bool "Filter I/O access to /dev/mem" depends on STRICT_DEVMEM + default y help If this option is disabled, you allow userspace (root) access to all io-memory regardless of whether a driver is actively using that From cf7b4652364995a63e98c5cd5d00266a3a033224 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 18:28:33 -0400 Subject: [PATCH 031/104] disable COMPAT_BRK by default --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 2d2fcff3b4815..36b56084a180c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1769,7 +1769,6 @@ config SLUB_MEMCG_SYSFS_ON config COMPAT_BRK bool "Disable heap randomization" - default y help Randomizing heap placement makes heap exploits harder, but it also breaks ancient binaries (including anything libc5 based). From 250a7a8e9545066af535518a26c207ef727ab6a0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 16:16:39 -0400 Subject: [PATCH 032/104] use maximum supported mmap rnd entropy by default Signed-off-by: Daniel Micay --- arch/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 98de654b79b31..76be2ec6de237 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -658,7 +658,7 @@ config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT - default ARCH_MMAP_RND_BITS_MIN + default ARCH_MMAP_RND_BITS_MAX depends on HAVE_ARCH_MMAP_RND_BITS help This value can be used to select the number of bits to use to @@ -692,7 +692,7 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT - default ARCH_MMAP_RND_COMPAT_BITS_MIN + default ARCH_MMAP_RND_COMPAT_BITS_MAX depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to From 63ee9bb5128a8e4920a71e682ffac27c3f13916b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 May 2017 10:47:23 -0400 Subject: [PATCH 033/104] enable protected_{symlinks,hardlinks} by default --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index db6565c998259..402d74528f9de 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -949,8 +949,8 @@ static inline void put_link(struct nameidata *nd) path_put(&last->link); } -int sysctl_protected_symlinks __read_mostly = 0; -int sysctl_protected_hardlinks __read_mostly = 0; +int sysctl_protected_symlinks __read_mostly = 1; +int sysctl_protected_hardlinks __read_mostly = 1; int sysctl_protected_fifos __read_mostly; int sysctl_protected_regular __read_mostly; From e400d981be61198de9e6e454193173f7efe96411 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:13:48 -0500 Subject: [PATCH 034/104] enable SECURITY by default --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 7826255fd85d4..ecbe2b4127dae 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -23,6 +23,7 @@ config SECURITY bool "Enable different security models" depends on SYSFS depends on MULTIUSER + default y help This allows you to choose different security modules to be configured into your kernel. From b9c3132a4d107ea6a1b7c4d39e840911599ffdff Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 06:17:59 -0400 Subject: [PATCH 035/104] enable SECURITY_YAMA by default --- security/yama/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/yama/Kconfig b/security/yama/Kconfig index a810304123ca5..b809050b25d23 100644 --- a/security/yama/Kconfig +++ b/security/yama/Kconfig @@ -2,7 +2,7 @@ config SECURITY_YAMA bool "Yama support" depends on SECURITY - default n + default y help This selects Yama, which extends DAC support with additional system-wide security settings beyond regular Linux discretionary From e915334707f3998cd6e4bad7d1e4ced0748c42d6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:14:02 -0500 Subject: [PATCH 036/104] enable SECURITY_NETWORK by default --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index ecbe2b4127dae..c64c19ab4fa99 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -49,6 +49,7 @@ config SECURITYFS config SECURITY_NETWORK bool "Socket and Networking Security Hooks" depends on SECURITY + default y help This enables the socket and networking security hooks. If enabled, a security module can use these hooks to From 128adb031f5893087b7869343ca410b238ab2d44 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:15:24 -0500 Subject: [PATCH 037/104] enable AUDIT by default --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index 36b56084a180c..28a290b839fec 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -362,6 +362,7 @@ config USELIB config AUDIT bool "Auditing support" depends on NET + default y help Enable auditing infrastructure that can be used with another kernel subsystem, such as SELinux (which requires this for From 9a82a1439a0ef06194af961f3823461bda904254 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:16:49 -0500 Subject: [PATCH 038/104] enable SECURITY_SELINUX by default --- security/selinux/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 1014cb0ee956c..2d48aef6d5ac4 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -3,7 +3,7 @@ config SECURITY_SELINUX bool "NSA SELinux Support" depends on SECURITY_NETWORK && AUDIT && NET && INET select NETWORK_SECMARK - default n + default y help This selects NSA Security-Enhanced Linux (SELinux). You will also need a policy configuration and a labeled filesystem. From 982fabd091f00c8ada08f1be261fe39426cbe7e6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 6 Jan 2018 13:41:11 -0500 Subject: [PATCH 039/104] enable SYN_COOKIES by default --- net/ipv4/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f96bd489b362a..89ac8fd063454 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -267,6 +267,7 @@ config IP_PIMSM_V2 config SYN_COOKIES bool "IP: TCP syncookie support" + default y ---help--- Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote From a32a012fe8d7c16d305188650e51bc48c472a56e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 00:28:23 -0400 Subject: [PATCH 040/104] add __read_only for non-init related usage --- include/linux/cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cache.h b/include/linux/cache.h index 750621e41d1c1..e7157c18c62c1 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -31,6 +31,8 @@ #define __ro_after_init __attribute__((__section__(".data..ro_after_init"))) #endif +#define __read_only __ro_after_init + #ifndef ____cacheline_aligned #define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) #endif From 3c64c49ed9d5088276c15be6f026e8431d63c72c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 00:43:03 -0400 Subject: [PATCH 041/104] make sysctl constants read-only Most of this is extracted from the last publicly available version of the PaX patches where it's part of KERNEXEC as __read_only. It has been extended to a few more of these constants. --- kernel/sysctl.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d396aaaf19a32..7065e19ce97b1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -121,32 +121,32 @@ extern int sysctl_nr_trim_pages; /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR -static int sixty = 60; +static int sixty __read_only = 60; #endif -static int __maybe_unused neg_one = -1; -static int __maybe_unused two = 2; -static int __maybe_unused four = 4; -static unsigned long zero_ul; -static unsigned long one_ul = 1; -static unsigned long long_max = LONG_MAX; -static int one_hundred = 100; -static int one_thousand = 1000; +static int __maybe_unused neg_one __read_only = -1; +static int __maybe_unused two __read_only = 2; +static int __maybe_unused four __read_only = 4; +static unsigned long zero_ul __read_only; +static unsigned long one_ul __read_only = 1; +static unsigned long long_max __read_only = LONG_MAX; +static int one_hundred __read_only = 100; +static int one_thousand __read_only = 1000; #ifdef CONFIG_PRINTK -static int ten_thousand = 10000; +static int ten_thousand __read_only = 10000; #endif #ifdef CONFIG_PERF_EVENTS -static int six_hundred_forty_kb = 640 * 1024; +static int six_hundred_forty_kb __read_only = 640 * 1024; #endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; +static unsigned long dirty_bytes_min __read_only = 2 * PAGE_SIZE; /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ -static int maxolduid = 65535; -static int minolduid; +static int maxolduid __read_only = 65535; +static int minolduid __read_only; -static int ngroups_max = NGROUPS_MAX; +static int ngroups_max __read_only = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; /* @@ -154,7 +154,7 @@ static const int cap_last_cap = CAP_LAST_CAP; * and hung_task_check_interval_secs */ #ifdef CONFIG_DETECT_HUNG_TASK -static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); +static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ); #endif #ifdef CONFIG_INOTIFY_USER @@ -301,19 +301,19 @@ static struct ctl_table sysctl_base_table[] = { }; #ifdef CONFIG_SCHED_DEBUG -static int min_sched_granularity_ns = 100000; /* 100 usecs */ -static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ -static int min_wakeup_granularity_ns; /* 0 usecs */ -static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ +static int min_sched_granularity_ns __read_only = 100000; /* 100 usecs */ +static int max_sched_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ +static int min_wakeup_granularity_ns __read_only; /* 0 usecs */ +static int max_wakeup_granularity_ns __read_only = NSEC_PER_SEC; /* 1 second */ #ifdef CONFIG_SMP -static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; -static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; +static int min_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_NONE; +static int max_sched_tunable_scaling __read_only = SCHED_TUNABLESCALING_END-1; #endif /* CONFIG_SMP */ #endif /* CONFIG_SCHED_DEBUG */ #ifdef CONFIG_COMPACTION -static int min_extfrag_threshold; -static int max_extfrag_threshold = 1000; +static int min_extfrag_threshold __read_only; +static int max_extfrag_threshold __read_only = 1000; #endif static struct ctl_table kern_table[] = { From 6394f490d62e56c9302fc3e1ff8cfd2f364894b3 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 12 May 2017 03:22:00 -0400 Subject: [PATCH 042/104] mark kernel_set_to_readonly as __ro_after_init This change was extracted from PaX where it's part of KERNEXEC. Signed-off-by: Daniel Micay --- arch/x86/mm/init_32.c | 5 ++--- arch/x86/mm/init_64.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 23df4885bbede..407ace888df22 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -871,7 +871,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, } #endif -int kernel_set_to_readonly __read_mostly; +int kernel_set_to_readonly __ro_after_init; static void mark_nxdata_nx(void) { @@ -895,12 +895,11 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = (unsigned long)__end_rodata - start; + kernel_set_to_readonly = 1; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - kernel_set_to_readonly = 1; - #ifdef CONFIG_CPA_DEBUG pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index abbdecb75fad8..5996f31e6a064 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1256,7 +1256,7 @@ void __init mem_init(void) mem_init_print_info(NULL); } -int kernel_set_to_readonly; +int kernel_set_to_readonly __ro_after_init; void mark_rodata_ro(void) { @@ -1269,9 +1269,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); - set_memory_ro(start, (end - start) >> PAGE_SHIFT); - kernel_set_to_readonly = 1; + set_memory_ro(start, (end - start) >> PAGE_SHIFT); /* * The rodata/data/bss/brk section (but not the kernel text!) From 9682c789c7599985f22290185e9d010c98f6d14f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 19:01:58 -0400 Subject: [PATCH 043/104] mark slub runtime configuration as __ro_after_init Signed-off-by: Daniel Micay --- mm/slub.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 17dc00e33115b..3f0929731f2d8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -493,13 +493,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p) * Debug settings: */ #if defined(CONFIG_SLUB_DEBUG_ON) -static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; +static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS; #else -static slab_flags_t slub_debug; +static slab_flags_t slub_debug __ro_after_init; #endif -static char *slub_debug_slabs; -static int disable_higher_order_debug; +static char *slub_debug_slabs __ro_after_init; +static int disable_higher_order_debug __ro_after_init; /* * slub is about to manipulate internal object metadata. This memory lies @@ -3234,9 +3234,9 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk); * and increases the number of allocations possible without having to * take the list_lock. */ -static unsigned int slub_min_order; -static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER; -static unsigned int slub_min_objects; +static unsigned int slub_min_order __ro_after_init; +static unsigned int slub_max_order __ro_after_init = PAGE_ALLOC_COSTLY_ORDER; +static unsigned int slub_min_objects __ro_after_init; /* * Calculate the order of allocation given an slab object size. @@ -4771,7 +4771,7 @@ enum slab_stat_type { #define SO_TOTAL (1 << SL_TOTAL) #ifdef CONFIG_MEMCG -static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); +static bool memcg_sysfs_enabled __ro_after_init = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON); static int __init setup_slub_memcg_sysfs(char *str) { From 659c8063bbc6fcd0ad973151aecc07d83170f2a7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 11:35:35 -0400 Subject: [PATCH 044/104] add __ro_after_init to slab_nomerge and slab_state This was extracted from the PaX patch where it's part of the KERNEXEC feature as __read_only. Signed-off-by: Daniel Micay --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 1907cb2903c72..912b8d816c24b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -28,7 +28,7 @@ #include "slab.h" -enum slab_state slab_state; +enum slab_state slab_state __ro_after_init; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; @@ -59,7 +59,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, /* * Merge control. If this is set then no merging of slab caches will occur. */ -static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); +static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); static int __init setup_slab_nomerge(char *str) { From c2668ce678cff89c4063a617ded14f952a841d0c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 28 May 2017 18:51:30 -0400 Subject: [PATCH 045/104] mark kmem_cache as __ro_after_init --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 912b8d816c24b..d92bcd8460475 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -31,7 +31,7 @@ enum slab_state slab_state __ro_after_init; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); -struct kmem_cache *kmem_cache; +struct kmem_cache *kmem_cache __ro_after_init; #ifdef CONFIG_HARDENED_USERCOPY bool usercopy_fallback __ro_after_init = From 080a5089477435feac57b4607acfe171a79c1229 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 12 May 2017 00:06:16 -0400 Subject: [PATCH 046/104] mark __supported_pte_mask as __ro_after_init These changes were extracted from PaX where it was part of KERNEXEC as __read_only. Signed-off-by: Daniel Micay --- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 407ace888df22..ff3f34d9c21a1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -561,9 +561,9 @@ static void __init pagetable_init(void) #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5996f31e6a064..6320b04d752ec 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -97,9 +97,9 @@ DEFINE_ENTRY(pte, pte, init) */ /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = ~0; +pteval_t __supported_pte_mask __ro_after_init = ~0; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = ~0; +pteval_t __default_kernel_pte_mask __ro_after_init = ~0; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); From 12b1e2acc4dae21ddfe181cc85e37e30e6f93c18 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:24:28 -0400 Subject: [PATCH 047/104] mark kobj_ns_type_register as only used for init This allows kobj_ns_ops_tbl to be __ro_after_init. Extracted from PaX. --- include/linux/kobject_ns.h | 2 +- lib/kobject.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 069aa2ebef90a..cb9e3637a6200 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -45,7 +45,7 @@ struct kobj_ns_type_operations { void (*drop_ns)(void *); }; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops); int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj); diff --git a/lib/kobject.c b/lib/kobject.c index 83198cb37d8d9..4a053b7aef425 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1009,9 +1009,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add); static DEFINE_SPINLOCK(kobj_ns_type_lock); -static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops) { enum kobj_ns_type type = ops->type; int error; From 9e3cfd849ab97bce35e23140ab506c28e74eb9df Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:32:30 -0400 Subject: [PATCH 048/104] mark open_softirq as only used for init --- include/linux/interrupt.h | 2 +- kernel/softirq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c5fe60ec6b84e..690fcc545b83f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -567,7 +567,7 @@ static inline void do_softirq_own_stack(void) } #endif -extern void open_softirq(int nr, void (*action)(struct softirq_action *)); +extern void __init open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/softirq.c b/kernel/softirq.c index 0427a86743a46..bf90717694f39 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -452,7 +452,7 @@ void __raise_softirq_irqoff(unsigned int nr) or_softirq_pending(1UL << nr); } -void open_softirq(int nr, void (*action)(struct softirq_action *)) +void __init open_softirq(int nr, void (*action)(struct softirq_action *)) { softirq_vec[nr].action = action; } From 114a0721871ae6d8d80043c33cb303cca0d256a0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:41:11 -0400 Subject: [PATCH 049/104] remove unused softirq_action callback parameter Extracted from PaX. --- block/blk-softirq.c | 2 +- include/linux/interrupt.h | 4 ++-- kernel/rcu/tiny.c | 2 +- kernel/rcu/tree.c | 2 +- kernel/sched/fair.c | 2 +- kernel/softirq.c | 15 +++++++-------- kernel/time/hrtimer.c | 2 +- kernel/time/timer.c | 2 +- lib/irq_poll.c | 2 +- net/core/dev.c | 4 ++-- 10 files changed, 18 insertions(+), 19 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 6e7ec87d49faa..d6ee3f8b3e745 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -20,7 +20,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done); * Softirq action handler - move entries to local list and loop over them * while passing them to the queue registered handler. */ -static __latent_entropy void blk_done_softirq(struct softirq_action *h) +static __latent_entropy void blk_done_softirq(void) { struct list_head *cpu_list, local_list; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 690fcc545b83f..bdfb16b84d231 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -552,7 +552,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS]; struct softirq_action { - void (*action)(struct softirq_action *); + void (*action)(void); }; asmlinkage void do_softirq(void); @@ -567,7 +567,7 @@ static inline void do_softirq_own_stack(void) } #endif -extern void __init open_softirq(int nr, void (*action)(struct softirq_action *)); +extern void __init open_softirq(int nr, void (*action)(void)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index dd572ce7c7479..95af139ac6ba9 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -100,7 +100,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head) } /* Invoke the RCU callbacks whose grace period has elapsed. */ -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) +static __latent_entropy void rcu_process_callbacks(void) { struct rcu_head *next, *list; unsigned long flags; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d91c9156fab2e..b1359b592ca5c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2414,7 +2414,7 @@ static __latent_entropy void rcu_core(void) trace_rcu_utilization(TPS("End RCU core")); } -static void rcu_core_si(struct softirq_action *h) +static void rcu_core_si(void) { rcu_core(); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fe4e0d7753756..c7f3c4223af79 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10220,7 +10220,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf) * run_rebalance_domains is triggered when needed from the scheduler tick. * Also triggered for nohz idle balancing (with nohz_balancing_kick set). */ -static __latent_entropy void run_rebalance_domains(struct softirq_action *h) +static __latent_entropy void run_rebalance_domains(void) { struct rq *this_rq = this_rq(); enum cpu_idle_type idle = this_rq->idle_balance ? diff --git a/kernel/softirq.c b/kernel/softirq.c index bf90717694f39..c287440d130d0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -289,7 +289,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) kstat_incr_softirqs_this_cpu(vec_nr); trace_softirq_entry(vec_nr); - h->action(h); + h->action(); trace_softirq_exit(vec_nr); if (unlikely(prev_count != preempt_count())) { pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", @@ -452,7 +452,7 @@ void __raise_softirq_irqoff(unsigned int nr) or_softirq_pending(1UL << nr); } -void __init open_softirq(int nr, void (*action)(struct softirq_action *)) +void __init open_softirq(int nr, void (*action)(void)) { softirq_vec[nr].action = action; } @@ -498,8 +498,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -static void tasklet_action_common(struct softirq_action *a, - struct tasklet_head *tl_head, +static void tasklet_action_common(struct tasklet_head *tl_head, unsigned int softirq_nr) { struct tasklet_struct *list; @@ -536,14 +535,14 @@ static void tasklet_action_common(struct softirq_action *a, } } -static __latent_entropy void tasklet_action(struct softirq_action *a) +static __latent_entropy void tasklet_action(void) { - tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); + tasklet_action_common(this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); } -static __latent_entropy void tasklet_hi_action(struct softirq_action *a) +static __latent_entropy void tasklet_hi_action(void) { - tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); + tasklet_action_common(this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); } void tasklet_init(struct tasklet_struct *t, diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 3a609e7344f3d..ee935fc118630 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1583,7 +1583,7 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, } } -static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) +static __latent_entropy void hrtimer_run_softirq(void) { struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); unsigned long flags; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 4820823515e9a..1a61e5aa87ae7 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1779,7 +1779,7 @@ static inline void __run_timers(struct timer_base *base) /* * This function runs timers and the timer-tq in bottom half context. */ -static __latent_entropy void run_timer_softirq(struct softirq_action *h) +static __latent_entropy void run_timer_softirq(void) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2f17b488d58e1..b6e7996a0058f 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -75,7 +75,7 @@ void irq_poll_complete(struct irq_poll *iop) } EXPORT_SYMBOL(irq_poll_complete); -static void __latent_entropy irq_poll_softirq(struct softirq_action *h) +static void __latent_entropy irq_poll_softirq(void) { struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); int rearm = 0, budget = irq_poll_budget; diff --git a/net/core/dev.c b/net/core/dev.c index a69e8bd7ed74f..19f3531154f8e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4763,7 +4763,7 @@ int netif_rx_ni(struct sk_buff *skb) } EXPORT_SYMBOL(netif_rx_ni); -static __latent_entropy void net_tx_action(struct softirq_action *h) +static __latent_entropy void net_tx_action(void) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -6624,7 +6624,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) return work; } -static __latent_entropy void net_rx_action(struct softirq_action *h) +static __latent_entropy void net_rx_action(void) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + From 983d7afed36577fc59a0b8895bd581fda6fe2d24 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:42:33 -0400 Subject: [PATCH 050/104] mark softirq_vec as __ro_after_init Note: __cacheline_aligned_in_smp conflicts with __ro_after_init on x86. Extracted from PaX. --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index c287440d130d0..5e6a9b4ccb41b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -52,7 +52,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); #endif -static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; +static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE); DEFINE_PER_CPU(struct task_struct *, ksoftirqd); From dd112cf740c4edc3b9fcec4c71c0b8cb031e18ea Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 17 Sep 2019 18:00:54 +0200 Subject: [PATCH 051/104] mm: slab: trigger BUG if requested object is not a slab page Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak Signed-off-by: Thibaut Sautereau --- mm/slab.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slab.h b/mm/slab.h index 7e94700aa78c6..92c427a235433 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -470,9 +470,13 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) struct page *page; page = virt_to_head_page(obj); +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(!PageSlab(page)); +#else if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n", __func__)) return NULL; +#endif return page->slab_cache; } From 1d286a38707f03461e218de2de272530c639682a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 11:50:53 -0400 Subject: [PATCH 052/104] bug on kmem_cache_free with the wrong cache At least when CONFIG_BUG_ON_DATA_CORRUPTION is enabled. Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak Signed-off-by: Thibaut Sautereau --- mm/slab.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 92c427a235433..5683b3059cd61 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -522,9 +522,14 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) return s; cachep = virt_to_cache(x); - WARN_ONCE(cachep && !slab_equal_or_root(cachep, s), - "%s: Wrong slab cache. %s but object is from %s\n", - __func__, s->name, cachep->name); + if (cachep && !slab_equal_or_root(cachep, s)) { +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG(); +#else + WARN_ONCE(1, "%s: Wrong slab cache. %s but object is from %s\n", + __func__, s->name, cachep->name); +#endif + } return cachep; } From da2673e245663939c637563ae8b9856c63c6c904 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 11:57:35 -0400 Subject: [PATCH 053/104] bug on !PageSlab && !PageCompound in ksize At least when CONFIG_BUG_ON_DATA_CORRUPTION is enabled. Signed-off-by: Daniel Micay --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 3f0929731f2d8..d188997cb0ad5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3944,7 +3944,11 @@ size_t __ksize(const void *object) page = virt_to_head_page(object); if (unlikely(!PageSlab(page))) { +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(!PageCompound(page)); +#else WARN_ON(!PageCompound(page)); +#endif return page_size(page); } From bf7150ebc90b18ea58aaf1326dcb84d387e30645 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 21:54:56 -0400 Subject: [PATCH 054/104] mm: add support for verifying page sanitization Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- include/linux/highmem.h | 7 +++++++ kernel/power/snapshot.c | 3 +++ mm/page_alloc.c | 6 ++++++ security/Kconfig.hardening | 7 +++++++ 4 files changed, 23 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index ea5cdbd8c2c32..805b84d6bbca6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -215,6 +215,13 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr); } +static inline void verify_zero_highpage(struct page *page) +{ + void *kaddr = kmap_atomic(page); + BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE)); + kunmap_atomic(kaddr); +} + static inline void zero_user_segments(struct page *page, unsigned start1, unsigned end1, unsigned start2, unsigned end2) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ddade80ad2767..08ab868bde8b6 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1150,6 +1150,9 @@ void clear_free_pages(void) struct memory_bitmap *bm = free_pages_map; unsigned long pfn; + if (!IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && !want_init_on_free()) + return; + if (WARN_ON(!(free_pages_map))) return; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c4eb750a199b..3a98bca89e35e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2156,6 +2156,12 @@ static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags { post_alloc_hook(page, order, gfp_flags); + if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) { + int i; + for (i = 0; i < (1 << order); i++) + verify_zero_highpage(page + i); + } + if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags)) kernel_init_free_pages(page, 1 << order); diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index af4c979b38eed..bcf7ce9db40e8 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -196,6 +196,13 @@ config INIT_ON_FREE_DEFAULT_ON touching "cold" memory areas. Most cases see 3-5% impact. Some synthetic workloads have measured as high as 8%. +config PAGE_SANITIZE_VERIFY + bool "Verify sanitized pages" + default y + help + When init_on_free is enabled, verify that newly allocated pages + are zeroed to detect write-after-free bugs. + endmenu endmenu From 2c095f3834a9788beda8e195337f68f13e1ce7ad Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 20 Sep 2019 14:02:42 +0200 Subject: [PATCH 055/104] slub: Extend init_on_free to slab caches with constructors This is the remaining non-upstream part of SLAB_SANITIZE, which was a partial port, from Daniel Micay, of the feature from PaX without the default fast mode based on passing SLAB_NO_SANITIZE in performance-critical cases that are not particularly security sensitive. Signed-off-by: Thibaut Sautereau --- mm/slab.h | 12 +++++++++--- mm/slub.c | 14 +++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 5683b3059cd61..b7987d683a8d9 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -694,9 +694,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) static inline bool slab_want_init_on_free(struct kmem_cache *c) { - if (static_branch_unlikely(&init_on_free)) - return !(c->ctor || - (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); + if (static_branch_unlikely(&init_on_free)) { +#ifndef CONFIG_SLUB + if (c->ctor) + return false; +#endif + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return false; + return true; + } return false; } diff --git a/mm/slub.c b/mm/slub.c index d188997cb0ad5..cea6b5ce4fa49 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1471,7 +1471,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - + if (s->ctor) + s->ctor(object); } /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { @@ -1480,6 +1481,17 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *head = object; if (!*tail) *tail = object; + } else if (slab_want_init_on_free(s) && s->ctor) { + /* Objects that are put into quarantine by KASAN will + * still undergo free_consistency_checks() and thus + * need to show a valid freepointer to check_object(). + * + * Note that doing this for all caches (not just ctor + * ones, which have s->offset != NULL)) causes a GPF, + * due to KASAN poisoning and the way set_freepointer() + * eventually dereferences the freepointer. + */ + set_freepointer(s, object, NULL); } } while (object != old_tail); From 9458c89a681c063fcda189bbde3d11b7785e9329 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 15:58:57 -0400 Subject: [PATCH 056/104] slub: Add support for verifying slab sanitization This is an extension to the sanitization feature in PaX for when sacricifing more performance for security is acceptable. The initial version from Daniel Micay was relying on PAGE_SANITIZE. It now relies on upstream's init_on_free. Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau --- mm/slub.c | 36 ++++++++++++++++++++++++++++++++---- security/Kconfig.hardening | 7 +++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index cea6b5ce4fa49..384571d95619c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -123,6 +123,12 @@ static inline int kmem_cache_debug(struct kmem_cache *s) #endif } +static inline bool has_sanitize_verify(struct kmem_cache *s) +{ + return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && + slab_want_init_on_free(s); +} + void *fixup_red_left(struct kmem_cache *s, void *p) { if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) @@ -1471,7 +1477,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - if (s->ctor) + if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) s->ctor(object); } /* If object's reuse doesn't have to be delayed */ @@ -1506,7 +1512,7 @@ static void *setup_object(struct kmem_cache *s, struct page *page, { setup_object_debug(s, page, object); object = kasan_init_slab_obj(s, object); - if (unlikely(s->ctor)) { + if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_object_data(s, object); s->ctor(object); kasan_poison_object_data(s, object); @@ -2784,7 +2790,16 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, maybe_wipe_obj_freeptr(s, object); - if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) + if (has_sanitize_verify(s) && object) { + /* KASAN hasn't unpoisoned the object yet (this is done in the + * post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_object_data(s, object); + BUG_ON(memchr_inv(object, 0, s->object_size)); + if (s->ctor) + s->ctor(object); + kasan_poison_object_data(s, object); + } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); slab_post_alloc_hook(s, gfpflags, 1, &object); @@ -3208,7 +3223,20 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_irq_enable(); /* Clear memory outside IRQ disabled fastpath loop */ - if (unlikely(slab_want_init_on_alloc(flags, s))) { + if (has_sanitize_verify(s)) { + int j; + + for (j = 0; j < i; j++) { + /* KASAN hasn't unpoisoned the object yet (this is done + * in the post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_object_data(s, p[j]); + BUG_ON(memchr_inv(p[j], 0, s->object_size)); + if (s->ctor) + s->ctor(p[j]); + kasan_poison_object_data(s, p[j]); + } + } else if (unlikely(slab_want_init_on_alloc(flags, s))) { int j; for (j = 0; j < i; j++) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index bcf7ce9db40e8..7c6fa3eb1a295 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -203,6 +203,13 @@ config PAGE_SANITIZE_VERIFY When init_on_free is enabled, verify that newly allocated pages are zeroed to detect write-after-free bugs. +config SLAB_SANITIZE_VERIFY + default y + bool "Verify sanitized SLAB allocations" + help + When init_on_free is enabled, verify that newly allocated slab + objects are zeroed to detect write-after-free bugs. + endmenu endmenu From 2ce19567481d8b4af257b8cbd6a90544d76f9aa9 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 16:16:58 -0400 Subject: [PATCH 057/104] slub: add multi-purpose random canaries From the configuration option: Place canaries at the end of kernel slab allocations, sacrificing some performance and memory usage for security. Canaries can detect some forms of heap corruption when allocations are freed and as part of the HARDENED_USERCOPY feature. It provides basic use-after-free detection for HARDENED_USERCOPY. Canaries absorb small overflows (rendering them harmless), mitigate non-NUL terminated C string overflows on 64-bit via a guaranteed zero byte and provide basic double-free detection. Signed-off-by: Daniel Micay --- include/linux/slub_def.h | 5 +++ init/Kconfig | 17 +++++++++++ mm/slab.h | 2 +- mm/slub.c | 66 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 88 insertions(+), 2 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index d2153789bd9f9..97da977d60605 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -121,6 +121,11 @@ struct kmem_cache { unsigned long random; #endif +#ifdef CONFIG_SLAB_CANARY + unsigned long random_active; + unsigned long random_inactive; +#endif + #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. diff --git a/init/Kconfig b/init/Kconfig index 28a290b839fec..52599ea654788 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1846,6 +1846,23 @@ config SLAB_FREELIST_HARDENED sacrifices to harden the kernel slab allocator against common freelist exploit methods. +config SLAB_CANARY + depends on SLUB + depends on !SLAB_MERGE_DEFAULT + bool "SLAB canaries" + default y + help + Place canaries at the end of kernel slab allocations, sacrificing + some performance and memory usage for security. + + Canaries can detect some forms of heap corruption when allocations + are freed and as part of the HARDENED_USERCOPY feature. It provides + basic use-after-free detection for HARDENED_USERCOPY. + + Canaries absorb small overflows (rendering them harmless), mitigate + non-NUL terminated C string overflows on 64-bit via a guaranteed zero + byte and provide basic double-free detection. + config SHUFFLE_PAGE_ALLOCATOR bool "Page allocator randomization" default SLAB_FREELIST_RANDOM && ACPI_NUMA diff --git a/mm/slab.h b/mm/slab.h index b7987d683a8d9..d28687a4b45a5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -554,7 +554,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) + if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY)) return s->inuse; /* * Else we can use all the padding etc for the allocation diff --git a/mm/slub.c b/mm/slub.c index 384571d95619c..cc1741cc7a643 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -313,6 +313,35 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr); } +#ifdef CONFIG_SLAB_CANARY +static inline unsigned long *get_canary(struct kmem_cache *s, void *object) +{ + if (s->offset) + return object + s->offset + sizeof(void *); + return object + s->inuse; +} + +static inline unsigned long get_canary_value(const void *canary, unsigned long value) +{ + return (value ^ (unsigned long)canary) & CANARY_MASK; +} + +static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + *canary = get_canary_value(canary, value); +} + +static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + BUG_ON(*canary != get_canary_value(canary, value)); +} +#else +#define set_canary(s, object, value) +#define check_canary(s, object, value) +#endif + /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ for (__p = fixup_red_left(__s, __addr); \ @@ -566,6 +595,9 @@ static struct track *get_track(struct kmem_cache *s, void *object, else p = object + s->inuse; + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + p = (void *)p + sizeof(void *); + return p + alloc; } @@ -696,6 +728,9 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) else off = s->inuse; + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); @@ -827,6 +862,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) /* Freepointer is placed after the object. */ off += sizeof(void *); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) /* We also have user information there */ off += 2 * sizeof(struct track); @@ -1467,6 +1505,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, object = next; next = get_freepointer(s, object); + check_canary(s, object, s->random_active); + if (slab_want_init_on_free(s)) { /* * Clear the object and the metadata, but don't touch @@ -1480,6 +1520,9 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) s->ctor(object); } + + set_canary(s, object, s->random_inactive); + /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { /* Move object to the new freelist */ @@ -1511,6 +1554,7 @@ static void *setup_object(struct kmem_cache *s, struct page *page, void *object) { setup_object_debug(s, page, object); + set_canary(s, object, s->random_inactive); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_object_data(s, object); @@ -2802,6 +2846,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); + if (object) { + check_canary(s, object, s->random_inactive); + set_canary(s, object, s->random_active); + } + slab_post_alloc_hook(s, gfpflags, 1, &object); return object; @@ -3183,7 +3232,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { struct kmem_cache_cpu *c; - int i; + int i, k; /* memcg and kmem_cache debug support */ s = slab_pre_alloc_hook(s, flags); @@ -3243,6 +3292,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, memset(p[j], 0, s->object_size); } + for (k = 0; k < i; k++) { + check_canary(s, p[k], s->random_inactive); + set_canary(s, p[k], s->random_active); + } + /* memcg and kmem_cache debug support */ slab_post_alloc_hook(s, flags, size, p); return i; @@ -3444,6 +3498,7 @@ static void early_kmem_cache_node_alloc(int node) init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif + set_canary(kmem_cache_node, n, kmem_cache_node->random_active); n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node), GFP_KERNEL); page->freelist = get_freepointer(kmem_cache_node, n); @@ -3604,6 +3659,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) size += sizeof(void *); } + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + size += sizeof(void *); + #ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) /* @@ -3676,6 +3734,10 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); #endif +#ifdef CONFIG_SLAB_CANARY + s->random_active = get_random_long(); + s->random_inactive = get_random_long(); +#endif if (!calculate_sizes(s, -1)) goto error; @@ -3951,6 +4013,8 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, offset -= s->red_left_pad; } + check_canary(s, (void *)ptr - offset, s->random_active); + /* Allow address range falling entirely within usercopy region. */ if (offset >= s->useroffset && offset - s->useroffset <= s->usersize && From 468095d594f028ed25cfd07df5088fb4cfaa1353 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 11 Jan 2016 15:23:55 +0000 Subject: [PATCH 058/104] security,perf: Allow further restriction of perf_event_open When kernel.perf_event_open is set to 3 (or greater), disallow all access to performance events by users without CAP_SYS_ADMIN. Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that makes this value the default. This is based on a similar feature in grsecurity (CONFIG_GRKERNSEC_PERF_HARDEN). This version doesn't include making the variable read-only. It also allows enabling further restriction at run-time regardless of whether the default is changed. Signed-off-by: Ben Hutchings --- include/linux/perf_event.h | 5 +++++ kernel/events/core.c | 8 ++++++++ security/Kconfig | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6d4c22aee3848..3e64a054b42ec 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1283,6 +1283,11 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } +static inline bool perf_paranoid_any(void) +{ + return sysctl_perf_event_paranoid > 2; +} + static inline int perf_allow_kernel(struct perf_event_attr *attr) { if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) diff --git a/kernel/events/core.c b/kernel/events/core.c index dc9c643bce94c..f1c3f2781b36d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -398,8 +398,13 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -11170,6 +11175,9 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; + if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) diff --git a/security/Kconfig b/security/Kconfig index c64c19ab4fa99..f7010d17bfadb 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -19,6 +19,15 @@ config SECURITY_DMESG_RESTRICT If you are unsure how to answer this question, answer N. +config SECURITY_PERF_EVENTS_RESTRICT + bool "Restrict unprivileged use of performance events" + depends on PERF_EVENTS + help + If you say Y here, the kernel.perf_event_paranoid sysctl + will be set to 3 by default, and no unprivileged use of the + perf_event_open syscall will be permitted unless it is + changed. + config SECURITY bool "Enable different security models" depends on SYSFS From 99a33305d83fa77d2a07486dc7fc290e13b00fba Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 14:45:59 -0400 Subject: [PATCH 059/104] enable SECURITY_PERF_EVENTS_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index f7010d17bfadb..7275053b01d04 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -22,6 +22,7 @@ config SECURITY_DMESG_RESTRICT config SECURITY_PERF_EVENTS_RESTRICT bool "Restrict unprivileged use of performance events" depends on PERF_EVENTS + default y help If you say Y here, the kernel.perf_event_paranoid sysctl will be set to 3 by default, and no unprivileged use of the From f5058d98d58a27d98fd3998b0838c540ab44bbc0 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Fri, 31 May 2013 19:12:12 +0100 Subject: [PATCH 060/104] add sysctl to disallow unprivileged CLONE_NEWUSER by default Signed-off-by: Serge Hallyn [bwh: Remove unneeded binary sysctl bits] Signed-off-by: Daniel Micay --- kernel/fork.c | 15 +++++++++++++++ kernel/sysctl.c | 12 ++++++++++++ kernel/user_namespace.c | 3 +++ 3 files changed, 30 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 60a1295f43843..709163fea606d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,11 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#else +#define unprivileged_userns_clone 0 +#endif /* * Minimum number of threads to boot the kernel @@ -1844,6 +1849,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2933,6 +2942,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7065e19ce97b1..531d9a01deaaf 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -110,6 +110,9 @@ extern int core_uses_pid; extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#endif extern int pid_max; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; @@ -546,6 +549,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8eadadc478f9a..eea0a7694df85 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,9 @@ #include #include +/* sysctl */ +int unprivileged_userns_clone; + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); From ec73ebd0f7af9f87642c1b0da92935466b4326f9 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:02:56 -0400 Subject: [PATCH 061/104] add kmalloc/krealloc alloc_size attributes Note that this is overly strict when combined with ksize users accessing beyond the requested data size. Signed-off-by: Daniel Micay --- include/linux/slab.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 03a389358562a..1c1b4b3a79b03 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -184,7 +184,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *, size_t, gfp_t); +void * __must_check krealloc(const void *, size_t, gfp_t) __attribute((alloc_size(2))); void kfree(const void *); void kzfree(const void *); size_t __ksize(const void *); @@ -389,7 +389,7 @@ static __always_inline unsigned int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc; +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *, void *); @@ -413,7 +413,7 @@ static __always_inline void kfree_bulk(size_t size, void **p) } #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc; +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc __attribute__((alloc_size(1))); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) @@ -538,7 +538,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline void *kmalloc(size_t size, gfp_t flags) +static __always_inline __attribute__((alloc_size(1))) void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { #ifndef CONFIG_SLOB @@ -560,7 +560,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags) return __kmalloc(size, flags); } -static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __attribute__((alloc_size(1))) void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && From 533c43f36b26d70234ec56a5b097b95c44e6856e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:04:03 -0400 Subject: [PATCH 062/104] add vmalloc alloc_size attributes Signed-off-by: Daniel Micay --- include/linux/vmalloc.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ec38132366992..30ddd5b4c93fa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -102,20 +102,20 @@ static inline void vmalloc_init(void) static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size); -extern void *vzalloc(unsigned long size); -extern void *vmalloc_user(unsigned long size); -extern void *vmalloc_node(unsigned long size, int node); -extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); -extern void *vmalloc_exec(unsigned long size); -extern void *vmalloc_32(unsigned long size); -extern void *vmalloc_32_user(unsigned long size); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); +extern void *vmalloc(unsigned long size) __attribute__((alloc_size(1))); +extern void *vzalloc(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_user(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); +extern void *vzalloc_node(unsigned long size, int node) __attribute__((alloc_size(1))); +extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) __attribute__((alloc_size(1))); +extern void *vmalloc_exec(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_32(unsigned long size) __attribute__((alloc_size(1))); +extern void *vmalloc_32_user(unsigned long size) __attribute__((alloc_size(1))); +extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) __attribute__((alloc_size(1))); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, - const void *caller); + const void *caller) __attribute__((alloc_size(1))); #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, From fe1765f0695d109095be3b7d81b7dfc534fef8f7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 00:51:33 -0400 Subject: [PATCH 063/104] add kvmalloc alloc_size attribute Signed-off-by: Daniel Micay --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 52269e56c514d..04533d3fe0417 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -639,7 +639,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif -extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __attribute__((alloc_size(1))); static inline void *kvmalloc(size_t size, gfp_t flags) { return kvmalloc_node(size, flags, NUMA_NO_NODE); From eee3fa3bf11a227c6f58c2902ff6d2c75c6668c7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 16:39:36 -0400 Subject: [PATCH 064/104] add percpu alloc_size attributes Signed-off-by: Daniel Micay --- include/linux/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 5e76af742c807..9a6c682ec1273 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp); -extern void __percpu *__alloc_percpu(size_t size, size_t align); +extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __attribute__((alloc_size(1))); +extern void __percpu *__alloc_percpu(size_t size, size_t align) __attribute__((alloc_size(1))); extern void free_percpu(void __percpu *__pdata); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); From 694cbc943d76f30de7eb6e2ac13c8e67d59a6c35 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 16:53:59 -0400 Subject: [PATCH 065/104] add alloc_pages_exact alloc_size attributes Edited-by: Thibaut Sautereau Signed-off-by: Daniel Micay --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e5b817cb86e7e..7a266a15dc486 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -553,9 +553,9 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); -void *alloc_pages_exact(size_t size, gfp_t gfp_mask); +void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __attribute__((alloc_size(1))); void free_pages_exact(void *virt, size_t size); -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); +void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __attribute__((alloc_size(2))); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) From 0ca98c2ffe1ee5b4580c4d2aa797bc793164fc94 Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Tue, 31 May 2016 01:34:02 +0200 Subject: [PATCH 066/104] Add the extra_latent_entropy kernel parameter When extra_latent_entropy is passed on the kernel command line, entropy will be extracted from up to the first 4GB of RAM while the runtime memory allocator is being initialized. Based on work created by the PaX Team. Signed-off-by: Emese Revfy Signed-off-by: Daniel Micay --- .../admin-guide/kernel-parameters.txt | 5 ++++ mm/page_alloc.c | 25 +++++++++++++++++++ scripts/gcc-plugins/Kconfig | 5 ++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ddc5ccdd4cd15..a90d111805512 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3399,6 +3399,11 @@ the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. + extra_latent_entropy + Enable a very simple form of latent entropy extraction + from the first 4GB of memory as the bootmem allocator + passes the memory pages to the buddy allocator. + pcbit= [HW,ISDN] pcd. [PARIDE] diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3a98bca89e35e..e2ac37f126b62 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,15 @@ struct pcpu_drain { DEFINE_MUTEX(pcpu_drain_mutex); DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain); +bool __meminitdata extra_latent_entropy; + +static int __init setup_extra_latent_entropy(char *str) +{ + extra_latent_entropy = true; + return 0; +} +early_param("extra_latent_entropy", setup_extra_latent_entropy); + #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; EXPORT_SYMBOL(latent_entropy); @@ -1442,6 +1452,21 @@ void __free_pages_core(struct page *page, unsigned int order) __ClearPageReserved(p); set_page_count(p, 0); + if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { + unsigned long hash = 0; + size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; + const unsigned long *data = lowmem_page_address(page); + + for (index = 0; index < end; index++) + hash ^= hash + data[index]; +#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY + latent_entropy ^= hash; + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); +#else + add_device_randomness((const void *)&hash, sizeof(hash)); +#endif + } + atomic_long_add(nr_pages, &page_zone(page)->managed_pages); set_page_refcounted(page); __free_pages(page, order); diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index e3569543bdac2..55cc439b3bc6b 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -61,6 +61,11 @@ config GCC_PLUGIN_LATENT_ENTROPY is some slowdown of the boot process (about 0.5%) and fork and irq processing. + When extra_latent_entropy is passed on the kernel command line, + entropy will be extracted from up to the first 4GB of RAM while the + runtime memory allocator is being initialized. This costs even more + slowdown of the boot process. + Note that entropy extracted this way is not cryptographically secure! From ecbd8142736d9537515de40204b7a94fd4372f15 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 15 May 2017 23:45:34 -0400 Subject: [PATCH 067/104] ata: avoid null pointer dereference on bug Extracted from PaX. Signed-off-by: Daniel Micay --- drivers/ata/libata-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6f4ab5c5b52dd..05d96b73ec76b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5146,7 +5146,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) struct ata_port *ap; unsigned int tag; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ ap = qc->ap; qc->flags = 0; @@ -5163,7 +5163,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) struct ata_port *ap; struct ata_link *link; - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)); ap = qc->ap; link = qc->dev->link; From 14c10d0bb5dd4a92f419271b9d9568d86110e3fd Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 15 May 2017 23:51:12 -0400 Subject: [PATCH 068/104] sanity check for negative length in nla_memcpy Extracted from PaX. Signed-off-by: Daniel Micay --- lib/nlattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/nlattr.c b/lib/nlattr.c index cace9b3077810..39ba1387045da 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -571,6 +571,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); + BUG_ON(minlen < 0); + memcpy(dest, nla_data(src), minlen); if (count > minlen) memset(dest + minlen, 0, count - minlen); From 5cc8adbec683971cf4519279f90cf1d072d8ed34 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 15 May 2017 23:59:18 -0400 Subject: [PATCH 069/104] add page destructor sanity check Taken from the public PaX patches. Signed-off-by: Daniel Micay --- mm/swap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/swap.c b/mm/swap.c index cf39d24ada2ac..c7cb787e145ea 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -94,6 +94,13 @@ static void __put_compound_page(struct page *page) if (!PageHuge(page)) __page_cache_release(page); dtor = get_compound_page_dtor(page); + if (!PageHuge(page)) + BUG_ON(dtor != free_compound_page +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + && dtor != free_transhuge_page +#endif + ); + (*dtor)(page); } From 6973f4b65ecfe4b8163ae0181f8487bb1cf30183 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 00:59:48 -0400 Subject: [PATCH 070/104] PaX shadow cr4 sanity check (essentially a revert) Signed-off-by: Daniel Micay --- arch/x86/include/asm/tlbflush.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6f66d841262d9..b786e7cb395d4 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -295,6 +295,7 @@ static inline void cr4_set_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); if ((cr4 | mask) != cr4) __cr4_set(cr4 | mask); } @@ -305,6 +306,7 @@ static inline void cr4_clear_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); if ((cr4 & ~mask) != cr4) __cr4_set(cr4 & ~mask); } @@ -334,6 +336,7 @@ static inline void cr4_toggle_bits_irqsoff(unsigned long mask) unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); __cr4_set(cr4 ^ mask); } @@ -440,6 +443,7 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); /* toggle PGE */ native_write_cr4(cr4 ^ X86_CR4_PGE); /* write old PGE again and flush TLBs */ From eaa87f605a9d374d180708365e56f7e4c9a5cc15 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 9 Jul 2017 17:53:23 -0400 Subject: [PATCH 071/104] add writable function pointer detection Taken from the public PaX patches. Signed-off-by: Daniel Micay --- scripts/mod/modpost.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7edfdb2f4497a..460f39370bb1e 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -36,6 +36,7 @@ static int vmlinux_section_warnings = 1; static int warn_unresolved = 0; /* How a symbol is exported */ static int sec_mismatch_count = 0; +static int writable_fptr_count = 0; static int sec_mismatch_fatal = 0; /* ignore missing files */ static int ignore_missing_files; @@ -1012,6 +1013,7 @@ enum mismatch { ANY_EXIT_TO_ANY_INIT, EXPORT_TO_INIT_EXIT, EXTABLE_TO_NON_TEXT, + DATA_TO_TEXT }; /** @@ -1138,6 +1140,12 @@ static const struct sectioncheck sectioncheck[] = { .good_tosec = {ALL_TEXT_SECTIONS , NULL}, .mismatch = EXTABLE_TO_NON_TEXT, .handler = extable_mismatch_handler, +}, +/* Do not reference code from writable data */ +{ + .fromsec = { DATA_SECTIONS, NULL }, + .bad_tosec = { ALL_TEXT_SECTIONS, NULL }, + .mismatch = DATA_TO_TEXT } }; @@ -1325,10 +1333,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, continue; if (!is_valid_name(elf, sym)) continue; - if (sym->st_value == addr) - return sym; /* Find a symbol nearby - addr are maybe negative */ d = sym->st_value - addr; + if (d == 0) + return sym; if (d < 0) d = addr - sym->st_value; if (d < distance) { @@ -1463,7 +1471,10 @@ static void report_sec_mismatch(const char *modname, char *prl_from; char *prl_to; - sec_mismatch_count++; + if (mismatch->mismatch == DATA_TO_TEXT) + writable_fptr_count++; + else + sec_mismatch_count++; get_pretty_name(from_is_func, &from, &from_p); get_pretty_name(to_is_func, &to, &to_p); @@ -1585,6 +1596,14 @@ static void report_sec_mismatch(const char *modname, fatal("There's a special handler for this mismatch type, " "we should never get here."); break; + case DATA_TO_TEXT: +#if 0 + fprintf(stderr, + "The %s %s:%s references\n" + "the %s %s:%s%s\n", + from, fromsec, fromsym, to, tosec, tosym, to_p); +#endif + break; } fprintf(stderr, "\n"); } @@ -2674,6 +2693,9 @@ int main(int argc, char **argv) } free(buf.p); + if (writable_fptr_count) + warn("modpost: Found %d writable function pointer(s).\n", + writable_fptr_count); return err; } From 5639af8d8c878a88f3d8510a809d20df51837097 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 9 Jul 2017 17:20:29 -0400 Subject: [PATCH 072/104] support overriding early audit kernel cmdline --- kernel/audit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/audit.c b/kernel/audit.c index 17b0d523afb35..3337d02c6ab6e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1629,6 +1629,9 @@ static int __init audit_enable(char *str) if (audit_default == AUDIT_OFF) audit_initialized = AUDIT_DISABLED; + else if (!audit_ever_enabled) + audit_initialized = AUDIT_UNINITIALIZED; + if (audit_set_enabled(audit_default)) pr_err("audit: error setting audit state (%d)\n", audit_default); From 8cc6f2c0c362f90adcd8d200d1ebd6122c2a429a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 3 Jun 2017 17:34:13 -0400 Subject: [PATCH 073/104] FORTIFY_SOURCE intra-object overflow checking This adds supporting for detecting buffer overflows from inner objects for the fortified string family functions. It's comparable to the _FORTIFY_SOURCE=2 feature in glibc with the additional coverage of intra-object read overflows for supported functions. The mem* family functions are left with only the inter-object overflow checks as is the case with glibc _FORTIFY_SOURCE=2. This feature is currently hidden behind CONFIG_EXPERT because it's a lot more likely to uncover benign / intended issues and will need a lot of runtime testing. It's already useful for finding bugs but it may not yet be a good idea to use it for hardening unless panics for benign issues are seen as a lesser evil than the vulnerabilities it can catch. Signed-off-by: Daniel Micay --- include/linux/string.h | 26 ++++++++++++++++---------- security/Kconfig | 10 ++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 6dfbb2efa8157..c06ca2838932e 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -271,10 +271,16 @@ void __read_overflow2(void) __compiletime_error("detected read beyond size of ob void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); +#ifdef CONFIG_FORTIFY_SOURCE_STRICT_STRING +#define __string_size(p) __builtin_object_size(p, 1) +#else +#define __string_size(p) __builtin_object_size(p, 0) +#endif + #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); if (__builtin_constant_p(size) && p_size < size) __write_overflow(); if (p_size < size) @@ -284,7 +290,7 @@ __FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) __FORTIFY_INLINE char *strcat(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); if (p_size == (size_t)-1) return __builtin_strcat(p, q); if (strlcat(p, q, p_size) >= p_size) @@ -295,7 +301,7 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q) __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); /* Work around gcc excess stack consumption issue */ if (p_size == (size_t)-1 || @@ -310,7 +316,7 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) { - size_t p_size = __builtin_object_size(p, 0); + size_t p_size = __string_size(p); __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) fortify_panic(__func__); @@ -322,8 +328,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) { size_t ret; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __real_strlcpy(p, q, size); ret = strlen(q); @@ -343,8 +349,8 @@ __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) __FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) { size_t p_len, copy_len; - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __builtin_strncat(p, q, count); p_len = strlen(p); @@ -457,8 +463,8 @@ __FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) /* defined after fortified strlen and memcpy to reuse them */ __FORTIFY_INLINE char *strcpy(char *p, const char *q) { - size_t p_size = __builtin_object_size(p, 0); - size_t q_size = __builtin_object_size(q, 0); + size_t p_size = __string_size(p); + size_t q_size = __string_size(q); if (p_size == (size_t)-1 && q_size == (size_t)-1) return __builtin_strcpy(p, q); memcpy(p, q, strlen(q) + 1); diff --git a/security/Kconfig b/security/Kconfig index 7275053b01d04..b0d20596b9178 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -208,6 +208,16 @@ config FORTIFY_SOURCE Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. +config FORTIFY_SOURCE_STRICT_STRING + bool "Harden common functions against buffer overflows" + depends on FORTIFY_SOURCE + depends on EXPERT + help + Perform stricter overflow checks catching overflows within objects + for common C string functions rather than only between objects. + + This is not yet intended for production use, only bug finding. + config STATIC_USERMODEHELPER bool "Force all usermode helper calls through a single binary" help From 224b56e592c837a73ba0c15eabb986e9deb284ea Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 26 Aug 2017 20:16:03 -0400 Subject: [PATCH 074/104] Revert "mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes" This reverts commit aab425db4279aeb83b7911693f0cccbd3644c9fd. --- arch/arm64/include/asm/elf.h | 8 ++------ arch/x86/include/asm/elf.h | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3f..bef1dfb3bb5f0 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -103,14 +103,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#ifdef CONFIG_ARM64_FORCE_52BIT -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) -#else -#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) -#endif /* CONFIG_ARM64_FORCE_52BIT */ +#define ELF_ET_DYN_BASE 0x100000000UL #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 69c0f892e310a..02e14e2e0f5db 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -248,11 +248,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (DEFAULT_MAP_WINDOW / 3 * 2)) + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, From 5bcddc93b3deddd641ff43d0df1f99e866bc4047 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 11 May 2017 16:52:00 -0400 Subject: [PATCH 075/104] x86_64: move vdso to mmap region from stack region This removes the only executable code from the stack region and gives the vdso the same randomized base as other mmap mappings including the linker and other shared objects. It results in a sane amount of entropy being provided and there's little to no advantage in separating this from the existing executable code there. It's sensible for userspace to reserve the initial mmap base as a region for executable code with a random gap for other mmap allocations, along with providing randomization within that region. However, there isn't much the kernel can do to help due to how dynamic linkers load the shared objects. This was extracted from the PaX RANDMMAP feature. Signed-off-by: Daniel Micay --- arch/x86/entry/vdso/vma.c | 48 +----------------------------------- arch/x86/include/asm/elf.h | 1 - arch/x86/kernel/sys_x86_64.c | 7 ------ 3 files changed, 1 insertion(+), 55 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index c1b8496b56067..1655c9eb47065 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -314,55 +314,9 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) } #ifdef CONFIG_X86_64 -/* - * Put the vdso above the (randomized) stack with another randomized - * offset. This way there is no hole in the middle of address space. - * To save memory make sure it is still in the same PTE as the stack - * top. This doesn't give that many random bits. - * - * Note that this algorithm is imperfect: the distribution of the vdso - * start address within a PMD is biased toward the end. - * - * Only used for the 64-bit and x32 vdsos. - */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ - unsigned long addr, end; - unsigned offset; - - /* - * Round up the start address. It can start out unaligned as a result - * of stack start randomization. - */ - start = PAGE_ALIGN(start); - - /* Round the lowest possible end address up to a PMD boundary. */ - end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; - end -= len; - - if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); - addr = start + (offset << PAGE_SHIFT); - } else { - addr = start; - } - - /* - * Forcibly align the final address in case we have a hardware - * issue that requires alignment for performance reasons. - */ - addr = align_vdso_addr(addr); - - return addr; -} - static int map_vdso_randomized(const struct vdso_image *image) { - unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); - - return map_vdso(image, addr); + return map_vdso(image, 0); } #endif diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 02e14e2e0f5db..bd086b296e388 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -380,5 +380,4 @@ struct va_alignment { } ____cacheline_aligned; extern struct va_alignment va_align; -extern unsigned long align_vdso_addr(unsigned long); #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index ca3c11a17b5a1..97d7eded58b8e 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -53,13 +53,6 @@ static unsigned long get_align_bits(void) return va_align.bits & get_align_mask(); } -unsigned long align_vdso_addr(unsigned long addr) -{ - unsigned long align_mask = get_align_mask(); - addr = (addr + align_mask) & ~align_mask; - return addr | get_align_bits(); -} - static int __init control_va_addr_alignment(char *str) { /* guard against enabling this on other CPU families */ From 8b31fc5cb93d051c66bde622912a1dc8d650851a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 21 May 2017 20:30:44 -0400 Subject: [PATCH 076/104] x86: determine stack entropy based on mmap entropy Stack mapping entropy is currently hard-wired to 11 bits of entropy on 32-bit and 22 bits of entropy on 64-bit. The stack itself gains an extra 8 bits of entropy from lower bit randomization within 16 byte alignment constraints. The argument block could have all lower bits randomized but it currently only gets the mapping randomization. Rather than hard-wiring values this switches to using the mmap entropy configuration like the mmap base and executable base, resulting in a range of 8 to 16 bits on 32-bit and 28 to 32 bits on 64-bit depending on kernel configuration and overridable via the sysctl entries. It's worth noting that since these kernel configuration options default to the minimum supported entropy value, the entropy on 32-bit will drop from 11 to 8 bits for builds using the defaults. However, following the configuration seems like the right thing to do regardless. At the very least, changing the defaults for COMPAT (32-bit processes on 64-bit) should be considered due to the larger address space compared to real 32-bit. Signed-off-by: Daniel Micay --- arch/x86/include/asm/elf.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index bd086b296e388..f9f7a85bb71e7 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -312,8 +312,8 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #ifdef CONFIG_X86_32 -#define __STACK_RND_MASK(is32bit) (0x7ff) -#define STACK_RND_MASK (0x7ff) +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1) #define ARCH_DLINFO ARCH_DLINFO_IA32 @@ -322,7 +322,11 @@ extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #else /* CONFIG_X86_32 */ /* 1GB for 64bit, 8MB for 32bit */ -#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff) +#ifdef CONFIG_COMPAT +#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1) +#else +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#endif #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32()) #define ARCH_DLINFO \ From 01ece9e5831e8d90245d1b7cb3b9c513804142f1 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 22 May 2017 05:06:20 -0400 Subject: [PATCH 077/104] arm64: determine stack entropy based on mmap entropy Stack mapping entropy is currently hard-wired to 11 bits of entropy on 32-bit and 18 bits of entropy on 64-bit. The stack itself gains an extra 8 bits of entropy from lower bit randomization within 16 byte alignment constraints. The argument block could have all lower bits randomized but it currently only gets the mapping randomization. Rather than hard-wiring values this switches to using the mmap entropy configuration like the mmap base and executable base, resulting in a range of 8 to 16 bits on 32-bit and 18 to 24 bits on 64-bit (with 4k pages and 3 level page tables) depending on kernel configuration and overridable via the sysctl entries. It's worth noting that since these kernel configuration options default to the minimum supported entropy value, the entropy on 32-bit will drop from 11 to 8 bits for builds using the defaults. However, following the configuration seems like the right thing to do regardless. At the very least, changing the defaults for COMPAT (32-bit processes on 64-bit) should be considered due to the larger address space compared to real 32-bit. Signed-off-by: Daniel Micay --- arch/arm64/include/asm/elf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index bef1dfb3bb5f0..0a228dbcad656 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -160,10 +160,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, /* 1GB of VA */ #ifdef CONFIG_COMPAT #define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ - 0x7ff >> (PAGE_SHIFT - 12) : \ - 0x3ffff >> (PAGE_SHIFT - 12)) + ((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \ + ((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #else -#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#define STACK_RND_MASK (((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #endif #ifdef __AARCH64EB__ From b596f543ac1e4df8656abd9f5aeb6e0cc2e52687 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 11 May 2017 16:02:49 -0400 Subject: [PATCH 078/104] randomize lower bits of the argument block This was based on the PaX RANDUSTACK feature in grsecurity, where all of the lower bits are randomized. PaX keeps 16-byte alignment. Signed-off-by: Daniel Micay --- fs/exec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index db17be51b1127..1f5c4bc942bdd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -274,6 +275,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm) mm->stack_vm = mm->total_vm = 1; up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); + if (randomize_va_space) + bprm->p ^= get_random_int() & ~PAGE_MASK; return 0; err: up_write(&mm->mmap_sem); From 0f012fe8c6f8dc5076535e9c29a73896911e6d0e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 May 2017 07:19:48 -0400 Subject: [PATCH 079/104] x86_64: match arm64 brk randomization entropy Signed-off-by: Daniel Micay --- arch/x86/kernel/process.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 839b5244e3b7e..12875d7e8b3c3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include "process.h" @@ -913,7 +915,10 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { - return randomize_page(mm->brk, 0x02000000); + if (mmap_is_ia32()) + return randomize_page(mm->brk, SZ_32M); + else + return randomize_page(mm->brk, SZ_1G); } /* From 4f3d1968798705bb7d0ca3c4e0d66063f629935d Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 May 2017 18:03:30 -0400 Subject: [PATCH 080/104] support randomizing the lower bits of brk This adds support for arch_randomize_brk implementations not performing page alignment in order to randomize the lower bits of the brk heap. This idea is taken from PaX but the approach is different. This reuses the existing code and avoids forcing early creation of the heap mapping, avoiding mapping it if it's not used which is the case with many modern allocators based solely on mmap. The malloc implementation can be relied upon to align this as needed to the requirements it has, so using 16 byte alignment here is unnecessary. Signed-off-by: Daniel Micay --- mm/mmap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 6756b8bb00334..dc866b510e853 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -230,6 +230,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); + /* properly handle unaligned min_brk as an empty heap */ + if (min_brk & ~PAGE_MASK) { + if (brk == min_brk) + newbrk -= PAGE_SIZE; + if (mm->brk == min_brk) + oldbrk -= PAGE_SIZE; + } if (oldbrk == newbrk) { mm->brk = brk; goto success; From 0e35de5c54fe23e71b3d69b0d51f5294fdc28304 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:22:38 -0400 Subject: [PATCH 081/104] mm: randomize lower bits of brk Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 988d11e6c17cb..9529eb371678f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -335,9 +335,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - return randomize_page(mm->brk, SZ_32M); + return mm->brk + get_random_long() % SZ_32M; - return randomize_page(mm->brk, SZ_1G); + return mm->brk + get_random_long() % SZ_1G; } unsigned long arch_mmap_rnd(void) From e0da9d87115b03184007eef7e4a99dadf7cb73e2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:06 -0400 Subject: [PATCH 082/104] x86: randomize lower bits of brk Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 12875d7e8b3c3..465645fa7756e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -916,9 +916,9 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { if (mmap_is_ia32()) - return randomize_page(mm->brk, SZ_32M); + return mm->brk + get_random_long() % SZ_32M; else - return randomize_page(mm->brk, SZ_1G); + return mm->brk + get_random_long() % SZ_1G; } /* From b55113f992f352bc1ae4d0d648d503c5b31e5aa6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:39 -0400 Subject: [PATCH 083/104] mm: guarantee brk gap is at least one page Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 9529eb371678f..94536089e0e98 100644 --- a/mm/util.c +++ b/mm/util.c @@ -335,9 +335,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - return mm->brk + get_random_long() % SZ_32M; + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; - return mm->brk + get_random_long() % SZ_1G; + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } unsigned long arch_mmap_rnd(void) From 3a4c89837922bb73783061dd80ef5ed236dad972 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:48 -0400 Subject: [PATCH 084/104] x86: guarantee brk gap is at least one page Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 465645fa7756e..221dd3c1b20d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -916,9 +916,9 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { if (mmap_is_ia32()) - return mm->brk + get_random_long() % SZ_32M; + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; else - return mm->brk + get_random_long() % SZ_1G; + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } /* From 293f9847cc5c5639f2ce64a6f79bab4786ce4ea1 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 14:50:54 -0400 Subject: [PATCH 085/104] x86_64: bound mmap between legacy/modern bases --- arch/x86/kernel/sys_x86_64.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 97d7eded58b8e..017ba12762fde 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -114,10 +114,7 @@ static void find_start_end(unsigned long addr, unsigned long flags, } *begin = get_mmap_base(1); - if (in_32bit_syscall()) - *end = task_size_32bit(); - else - *end = task_size_64bit(addr > DEFAULT_MAP_WINDOW); + *end = get_mmap_base(0); } unsigned long @@ -194,7 +191,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = get_mmap_base(1); info.high_limit = get_mmap_base(0); /* From 030e84d3c7db177830f371aebd0db5b1b09b4053 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 18:26:10 -0400 Subject: [PATCH 086/104] restrict device timing side channels Based on the public grsecurity patches. --- fs/stat.c | 20 +++++++++++++++----- include/linux/capability.h | 5 +++++ include/linux/fs.h | 11 +++++++++++ include/linux/fsnotify.h | 6 ++++++ kernel/capability.c | 6 ++++++ kernel/sysctl.c | 12 ++++++++++++ 6 files changed, 55 insertions(+), 5 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index 030008796479a..b1c2c0d5b874a 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -42,8 +42,13 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->gid = inode->i_gid; stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; + if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { + stat->atime = inode->i_ctime; + stat->mtime = inode->i_ctime; + } else { + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + } stat->ctime = inode->i_ctime; stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; @@ -79,9 +84,14 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; - if (inode->i_op->getattr) - return inode->i_op->getattr(path, stat, request_mask, - query_flags); + if (inode->i_op->getattr) { + int retval = inode->i_op->getattr(path, stat, request_mask, query_flags); + if (!retval && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { + stat->atime = stat->ctime; + stat->mtime = stat->ctime; + } + return retval; + } generic_fillattr(inode, stat); return 0; diff --git a/include/linux/capability.h b/include/linux/capability.h index ecce0f43c73ac..e46306dd44010 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -208,6 +208,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); +extern bool capable_noaudit(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); @@ -234,6 +235,10 @@ static inline bool capable(int cap) { return true; } +static inline bool capable_noaudit(int cap) +{ + return true; +} static inline bool ns_capable(struct user_namespace *ns, int cap) { return true; diff --git a/include/linux/fs.h b/include/linux/fs.h index 6eae91c0668f9..da2cb2601bbd1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3644,4 +3644,15 @@ static inline int inode_drain_writes(struct inode *inode) return filemap_write_and_wait(inode->i_mapping); } +extern int device_sidechannel_restrict; + +static inline bool is_sidechannel_device(const struct inode *inode) +{ + umode_t mode; + if (!device_sidechannel_restrict) + return false; + mode = inode->i_mode; + return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH))); +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a2d5d175d3c15..e91ab06119b0f 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -233,6 +233,9 @@ static inline void fsnotify_access(struct file *file) struct inode *inode = file_inode(file); __u32 mask = FS_ACCESS; + if (is_sidechannel_device(inode)) + return; + if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; @@ -249,6 +252,9 @@ static inline void fsnotify_modify(struct file *file) struct inode *inode = file_inode(file); __u32 mask = FS_MODIFY; + if (is_sidechannel_device(inode)) + return; + if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; diff --git a/kernel/capability.c b/kernel/capability.c index 1444f3954d750..8cc9dd7992f2f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -449,6 +449,12 @@ bool capable(int cap) return ns_capable(&init_user_ns, cap); } EXPORT_SYMBOL(capable); + +bool capable_noaudit(int cap) +{ + return ns_capable_noaudit(&init_user_ns, cap); +} +EXPORT_SYMBOL(capable_noaudit); #endif /* CONFIG_MULTIUSER */ /** diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 531d9a01deaaf..a8fa85410e43d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -160,6 +160,9 @@ static const int cap_last_cap = CAP_LAST_CAP; static unsigned long hung_task_timeout_max __read_only = (LONG_MAX/HZ); #endif +int device_sidechannel_restrict __read_mostly = 1; +EXPORT_SYMBOL(device_sidechannel_restrict); + #ifdef CONFIG_INOTIFY_USER #include #endif @@ -914,6 +917,15 @@ static struct ctl_table kern_table[] = { .extra2 = &two, }, #endif + { + .procname = "device_sidechannel_restrict", + .data = &device_sidechannel_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { .procname = "ngroups_max", .data = &ngroups_max, From b36b517b216c09c7b54ef92510fea09e66732a83 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 17:51:48 -0400 Subject: [PATCH 087/104] add toggle for disabling newly added USB devices Based on the public grsecurity patches. Note: not for submission upstream and may be removed once CopperheadOS is migrated to another mechanism for this. --- drivers/usb/core/hub.c | 8 ++++++++ kernel/sysctl.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3405b146edc94..57e8c5fac2576 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -43,6 +43,8 @@ #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ +extern int deny_new_usb; + /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ @@ -5077,6 +5079,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, goto done; return; } + + if (deny_new_usb) { + dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1); + goto done; + } + if (hub_is_superspeed(hub->hdev)) unit_load = 150; else diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a8fa85410e43d..6a5ecdb5d5382 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -104,6 +104,10 @@ #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ +#if IS_ENABLED(CONFIG_USB) +int deny_new_usb __read_mostly = 0; +EXPORT_SYMBOL(deny_new_usb); +#endif extern int suid_dumpable; #ifdef CONFIG_COREDUMP extern int core_uses_pid; @@ -926,6 +930,17 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#if IS_ENABLED(CONFIG_USB) + { + .procname = "deny_new_usb", + .data = &deny_new_usb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif { .procname = "ngroups_max", .data = &ngroups_max, From cc190379cf2bd52872c28faa2c3277c91f5e25f1 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 03:26:45 -0500 Subject: [PATCH 088/104] hard-wire legacy checkreqprot option to 0 The userspace API is left intact for compatibility. Edited-by: Levente Polyak --- .../admin-guide/kernel-parameters.txt | 10 ---------- security/selinux/Kconfig | 20 ------------------- security/selinux/hooks.c | 13 +----------- security/selinux/selinuxfs.c | 4 +--- 4 files changed, 2 insertions(+), 45 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a90d111805512..111a1317c068b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -503,16 +503,6 @@ nosocket -- Disable socket memory accounting. nokmem -- Disable kernel memory accounting. - checkreqprot [SELINUX] Set initial checkreqprot flag value. - Format: { "0" | "1" } - See security/selinux/Kconfig help text. - 0 -- check protection applied by kernel (includes - any implied execute protection). - 1 -- check protection requested by application. - Default value is set via a kernel config option. - Value can be changed at runtime via - /sys/fs/selinux/checkreqprot. - cio_ignore= [S390] See Documentation/s390/common_io.rst for details. clk_ignore_unused diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 2d48aef6d5ac4..ae851a826c26c 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -70,26 +70,6 @@ config SECURITY_SELINUX_AVC_STATS /sys/fs/selinux/avc/cache_stats, which may be monitored via tools such as avcstat. -config SECURITY_SELINUX_CHECKREQPROT_VALUE - int "NSA SELinux checkreqprot default value" - depends on SECURITY_SELINUX - range 0 1 - default 0 - help - This option sets the default value for the 'checkreqprot' flag - that determines whether SELinux checks the protection requested - by the application or the protection that will be applied by the - kernel (including any implied execute for read-implies-exec) for - mmap and mprotect calls. If this option is set to 0 (zero), - SELinux will default to checking the protection that will be applied - by the kernel. If this option is set to 1 (one), SELinux will - default to checking the protection requested by the application. - The checkreqprot flag may be changed from the default via the - 'checkreqprot=' boot parameter. It may also be changed at runtime - via /sys/fs/selinux/checkreqprot if authorized by policy. - - If you are unsure how to answer this question, answer 0. - config SECURITY_SELINUX_SIDTAB_HASH_BITS int "NSA SELinux sidtab hashtable size" depends on SECURITY_SELINUX diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d9e8b2131a650..8fa343c697009 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -135,18 +135,7 @@ static int __init selinux_enabled_setup(char *str) __setup("selinux=", selinux_enabled_setup); #endif -static unsigned int selinux_checkreqprot_boot = - CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; - -static int __init checkreqprot_setup(char *str) -{ - unsigned long checkreqprot; - - if (!kstrtoul(str, 0, &checkreqprot)) - selinux_checkreqprot_boot = checkreqprot ? 1 : 0; - return 1; -} -__setup("checkreqprot=", checkreqprot_setup); +static const unsigned int selinux_checkreqprot_boot; /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 79c710911a3c9..09b7b543fc135 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -641,7 +641,6 @@ static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; unsigned int new_value; @@ -665,10 +664,9 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, return PTR_ERR(page); length = -EINVAL; - if (sscanf(page, "%u", &new_value) != 1) + if (sscanf(page, "%u", &new_value) != 1 || new_value) goto out; - fsi->state->checkreqprot = new_value ? 1 : 0; length = count; out: kfree(page); From 941ac11a0299765ed08d8000db3ec08b9fbf7930 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 29 May 2017 17:37:59 -0400 Subject: [PATCH 089/104] security: tty: Add owner user namespace to tty_struct This patch adds struct user_namespace *owner_user_ns to the tty_struct. Then it is set to current_user_ns() in the alloc_tty_struct function. This is done to facilitate capability checks against the original user namespace that allocated the tty. E.g. ns_capable(tty->owner_user_ns,CAP_SYS_ADMIN) This combined with the use of user namespace's will allow hardening protections to be built to mitigate container escapes that utilize TTY ioctls such as TIOCSTI. See: https://bugzilla.redhat.com/show_bug.cgi?id=1411256 Acked-by: Serge Hallyn Reviewed-by: Kees Cook Signed-off-by: Matt Brown --- drivers/tty/tty_io.c | 2 ++ include/linux/tty.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a1453fe108621..94d111c8f6928 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -174,6 +174,7 @@ static void free_tty_struct(struct tty_struct *tty) put_device(tty->dev); kfree(tty->write_buf); tty->magic = 0xDEADDEAD; + put_user_ns(tty->owner_user_ns); kfree(tty); } @@ -3011,6 +3012,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) tty->index = idx; tty_line_name(driver, idx, tty->name); tty->dev = tty_get_device(tty); + tty->owner_user_ns = get_user_ns(current_user_ns()); return tty; } diff --git a/include/linux/tty.h b/include/linux/tty.h index bfa4e2ee94a9d..f71e3223dd2ca 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -14,6 +14,7 @@ #include #include #include +#include /* @@ -336,6 +337,7 @@ struct tty_struct { /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; + struct user_namespace *owner_user_ns; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ From 3134273b2ef45df99d8c5bb294c18625527bd7a9 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 29 May 2017 17:38:00 -0400 Subject: [PATCH 090/104] security: tty: make TIOCSTI ioctl require CAP_SYS_ADMIN This introduces the tiocsti_restrict sysctl, whose default is controlled via CONFIG_SECURITY_TIOCSTI_RESTRICT. When activated, this control restricts all TIOCSTI ioctl calls from non CAP_SYS_ADMIN users. This patch depends on patch 1/2 This patch was inspired from GRKERNSEC_HARDEN_TTY. This patch would have prevented https://bugzilla.redhat.com/show_bug.cgi?id=1411256 under the following conditions: * non-privileged container * container run inside new user namespace Possible effects on userland: There could be a few user programs that would be effected by this change. See: notable programs are: agetty, csh, xemacs and tcsh However, I still believe that this change is worth it given that the Kconfig defaults to n. This will be a feature that is turned on for the same reason that people activate it when using grsecurity. Users of this opt-in feature will realize that they are choosing security over some OS features like unprivileged TIOCSTI ioctls, as should be clear in the Kconfig help message. Threat Model/Patch Rational: >From grsecurity's config for GRKERNSEC_HARDEN_TTY. | There are very few legitimate uses for this functionality and it | has made vulnerabilities in several 'su'-like programs possible in | the past. Even without these vulnerabilities, it provides an | attacker with an easy mechanism to move laterally among other | processes within the same user's compromised session. So if one process within a tty session becomes compromised it can follow that additional processes, that are thought to be in different security boundaries, can be compromised as a result. When using a program like su or sudo, these additional processes could be in a tty session where TTY file descriptors are indeed shared over privilege boundaries. This is also an excellent writeup about the issue: When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is done against the user namespace that originally opened the tty. Acked-by: Serge Hallyn Reviewed-by: Kees Cook Signed-off-by: Matt Brown --- Documentation/admin-guide/sysctl/kernel.rst | 20 ++++++++++++++++++++ drivers/tty/tty_io.c | 8 ++++++++ include/linux/tty.h | 2 ++ kernel/sysctl.c | 12 ++++++++++++ security/Kconfig | 13 +++++++++++++ 5 files changed, 55 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index def074807cee9..8770b4bc20f24 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -102,6 +102,7 @@ show up in /proc/sys/kernel: - sysctl_writes_strict - tainted ==> Documentation/admin-guide/tainted-kernels.rst - threads-max +- tiocsti_restrict - unknown_nmi_panic - watchdog - watchdog_thresh @@ -1112,6 +1113,25 @@ If a value outside of this range is written to threads-max an error EINVAL occurs. +tiocsti_restrict: +================= + +This toggle indicates whether unprivileged users are prevented from using the +TIOCSTI ioctl to inject commands into other processes which share a tty +session. + +When tiocsti_restrict is set to (0) there are no restrictions(accept the +default restriction of only being able to injection commands into one's own +tty). When tiocsti_restrict is set to (1), users must have CAP_SYS_ADMIN to +use the TIOCSTI ioctl. + +When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is +done against the user namespace that originally opened the tty. + +The kernel config option CONFIG_SECURITY_TIOCSTI_RESTRICT sets the default +value of tiocsti_restrict. + + unknown_nmi_panic: ================== diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 94d111c8f6928..9c535b09e6ff2 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2184,11 +2184,19 @@ static int tty_fasync(int fd, struct file *filp, int on) * FIXME: may race normal receive processing */ +int tiocsti_restrict = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT); + static int tiocsti(struct tty_struct *tty, char __user *p) { char ch, mbz = 0; struct tty_ldisc *ld; + if (tiocsti_restrict && + !ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) { + dev_warn_ratelimited(tty->dev, + "Denied TIOCSTI ioctl for non-privileged process\n"); + return -EPERM; + } if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) diff --git a/include/linux/tty.h b/include/linux/tty.h index f71e3223dd2ca..3e18d583fc8de 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -347,6 +347,8 @@ struct tty_file_private { struct list_head list; }; +extern int tiocsti_restrict; + /* tty magic number */ #define TTY_MAGIC 0x5401 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6a5ecdb5d5382..8cf067fcf8c77 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "../lib/kstrtox.h" @@ -920,6 +921,17 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, +#endif +#if defined CONFIG_TTY + { + .procname = "tiocsti_restrict", + .data = &tiocsti_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { .procname = "device_sidechannel_restrict", diff --git a/security/Kconfig b/security/Kconfig index b0d20596b9178..65260bbce2530 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -29,6 +29,19 @@ config SECURITY_PERF_EVENTS_RESTRICT perf_event_open syscall will be permitted unless it is changed. +config SECURITY_TIOCSTI_RESTRICT + bool "Restrict unprivileged use of tiocsti command injection" + default n + help + This enforces restrictions on unprivileged users injecting commands + into other processes which share a tty session using the TIOCSTI + ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN. + + If this option is not selected, no restrictions will be enforced + unless the tiocsti_restrict sysctl is explicitly set to (1). + + If you are unsure how to answer this question, answer N. + config SECURITY bool "Enable different security models" depends on SYSFS From b3029bdda65ed56c6e7656b48bb1214a57ce5431 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 23:36:14 -0400 Subject: [PATCH 091/104] enable SECURITY_TIOCSTI_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index 65260bbce2530..3b7a71410f885 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -31,7 +31,7 @@ config SECURITY_PERF_EVENTS_RESTRICT config SECURITY_TIOCSTI_RESTRICT bool "Restrict unprivileged use of tiocsti command injection" - default n + default y help This enforces restrictions on unprivileged users injecting commands into other processes which share a tty session using the TIOCSTI From 69b5671916a473fa6301725b329a9bf786d099d1 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Mon, 7 May 2018 20:37:07 +0200 Subject: [PATCH 092/104] disable unprivileged eBPF access by default --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a91ad518c0503..b5ad38d2b1b6d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); static DEFINE_SPINLOCK(map_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = 1; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) From 453f6355144b6eb7c868e974928e7aef716022d5 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Mon, 7 May 2018 20:37:55 +0200 Subject: [PATCH 093/104] enable BPF JIT hardening by default (if available) --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 973a20d497498..1405760c90d2f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -520,7 +520,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); -int bpf_jit_harden __read_mostly; +int bpf_jit_harden __read_mostly = 2; long bpf_jit_limit __read_mostly; static __always_inline void From 95d0499c10a7899119c507de82fedec383325acc Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 4 Nov 2018 18:48:53 +0100 Subject: [PATCH 094/104] enable protected_{fifos,regular} by default --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 402d74528f9de..e012140a971a4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -951,8 +951,8 @@ static inline void put_link(struct nameidata *nd) int sysctl_protected_symlinks __read_mostly = 1; int sysctl_protected_hardlinks __read_mostly = 1; -int sysctl_protected_fifos __read_mostly; -int sysctl_protected_regular __read_mostly; +int sysctl_protected_fifos __read_mostly = 2; +int sysctl_protected_regular __read_mostly = 2; /** * may_follow_link - Check symlink following for unsafe situations From 51bb62525d89a8d872a92db85eaa42c0e966b853 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 13 Jan 2019 21:42:45 +0100 Subject: [PATCH 095/104] Revert "mark kernel_set_to_readonly as __ro_after_init" This reverts commit 296db046b8688b96332d54e273cac265644a1e47. CPA conflicts detected when marking kernel_set_to_readonly as __ro_after_init #4 --- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index ff3f34d9c21a1..6906855aff6c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -871,7 +871,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, } #endif -int kernel_set_to_readonly __ro_after_init; +int kernel_set_to_readonly __read_mostly; static void mark_nxdata_nx(void) { @@ -895,11 +895,12 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = (unsigned long)__end_rodata - start; - kernel_set_to_readonly = 1; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); + kernel_set_to_readonly = 1; + #ifdef CONFIG_CPA_DEBUG pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6320b04d752ec..a5c16ac722fb6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1256,7 +1256,7 @@ void __init mem_init(void) mem_init_print_info(NULL); } -int kernel_set_to_readonly __ro_after_init; +int kernel_set_to_readonly; void mark_rodata_ro(void) { @@ -1269,9 +1269,10 @@ void mark_rodata_ro(void) printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); - kernel_set_to_readonly = 1; set_memory_ro(start, (end - start) >> PAGE_SHIFT); + kernel_set_to_readonly = 1; + /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. From 1336a9e9c94414ee449664b0e1b60c72c75d07f0 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Mon, 6 May 2019 17:07:11 +0200 Subject: [PATCH 096/104] modpost: Add CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE With 46c7dd56d541 ("modpost: always show verbose warning for section mismatch"), sec_mismatch_verbose was removed which would have printed errors for all writable function pointers during compilation if it hadn't been "#if 0"ed out for quite some time now. Let's introduce a new DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE Kconfig option to cleanly control this linux-hardened functionality. Signed-off-by: Thibaut Sautereau --- lib/Kconfig.debug | 3 +++ scripts/Makefile.modpost | 1 + scripts/mod/modpost.c | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e8f4916204b83..12b8dff8329a3 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -347,6 +347,9 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. +config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE + bool "Enable verbose reporting of writable function pointers" + # # Select this config option from the architecture Kconfig, if it # is preferred to always offer frame pointers as a config diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index b4d3f2d122ac2..fc2e8ae733159 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -53,6 +53,7 @@ MODPOST = scripts/mod/modpost \ $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f) \ $(if $(KBUILD_MODPOST_WARN),-w) ifdef MODPOST_VMLINUX diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 460f39370bb1e..3520f499a6ff1 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -36,8 +36,9 @@ static int vmlinux_section_warnings = 1; static int warn_unresolved = 0; /* How a symbol is exported */ static int sec_mismatch_count = 0; -static int writable_fptr_count = 0; static int sec_mismatch_fatal = 0; +static int writable_fptr_count = 0; +static int writable_fptr_verbose = 0; /* ignore missing files */ static int ignore_missing_files; @@ -1471,10 +1472,13 @@ static void report_sec_mismatch(const char *modname, char *prl_from; char *prl_to; - if (mismatch->mismatch == DATA_TO_TEXT) + if (mismatch->mismatch == DATA_TO_TEXT) { writable_fptr_count++; - else + if (!writable_fptr_verbose) + return; + } else { sec_mismatch_count++; + } get_pretty_name(from_is_func, &from, &from_p); get_pretty_name(to_is_func, &to, &to_p); @@ -1597,12 +1601,10 @@ static void report_sec_mismatch(const char *modname, "we should never get here."); break; case DATA_TO_TEXT: -#if 0 fprintf(stderr, "The %s %s:%s references\n" "the %s %s:%s%s\n", from, fromsec, fromsym, to, tosec, tosym, to_p); -#endif break; } fprintf(stderr, "\n"); @@ -2579,7 +2581,7 @@ int main(int argc, char **argv) struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:e:mnsT:o:awEd:")) != -1) { + while ((opt = getopt(argc, argv, "i:e:fmnsT:o:awEd")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2593,6 +2595,9 @@ int main(int argc, char **argv) extsym_iter->file = optarg; extsym_start = extsym_iter; break; + case 'f': + writable_fptr_verbose = 1; + break; case 'm': modversions = 1; break; @@ -2693,9 +2698,11 @@ int main(int argc, char **argv) } free(buf.p); - if (writable_fptr_count) - warn("modpost: Found %d writable function pointer(s).\n", - writable_fptr_count); + if (writable_fptr_count && !writable_fptr_verbose) + warn("modpost: Found %d writable function pointer%s.\n" + "To see full details build your kernel with:\n" + "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n", + writable_fptr_count, (writable_fptr_count == 1 ? "" : "s")); return err; } From d80365007b6edfe1112ccd7944e722dbb00aa15a Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Tue, 7 May 2019 11:46:21 +0200 Subject: [PATCH 097/104] mm: Fix extra_latent_entropy Commit a9cd410a3d29 ("mm/page_alloc.c: memory hotplug: free pages as higher order") changed `static void __init __free_pages_boot_core()` into `void __free_pages_core()`, causing the following section mismatch warning at compile time: WARNING: vmlinux.o(.text+0x180fe4): Section mismatch in reference from the function __free_pages_core() to the variable .meminit.data:extra_latent_entropy The function __free_pages_core() references the variable __meminitdata extra_latent_entropy. This is often because __free_pages_core lacks a __meminitdata annotation or the annotation of extra_latent_entropy is wrong. This commit is an attempt at fixing this issue. I'm not sure it's OK as we are accessing pages that are still managed by the bootmem allocator. The prefetching part is not an issue as it only affects struct pages. Signed-off-by: Thibaut Sautereau --- mm/page_alloc.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e2ac37f126b62..03e7e786d3a79 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1437,21 +1437,9 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -void __free_pages_core(struct page *page, unsigned int order) +static void __init __gather_extra_latent_entropy(struct page *page, + unsigned int nr_pages) { - unsigned int nr_pages = 1 << order; - struct page *p = page; - unsigned int loop; - - prefetchw(p); - for (loop = 0; loop < (nr_pages - 1); loop++, p++) { - prefetchw(p + 1); - __ClearPageReserved(p); - set_page_count(p, 0); - } - __ClearPageReserved(p); - set_page_count(p, 0); - if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { unsigned long hash = 0; size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; @@ -1466,7 +1454,22 @@ void __free_pages_core(struct page *page, unsigned int order) add_device_randomness((const void *)&hash, sizeof(hash)); #endif } +} +void __free_pages_core(struct page *page, unsigned int order) +{ + unsigned int nr_pages = 1 << order; + struct page *p = page; + unsigned int loop; + + prefetchw(p); + for (loop = 0; loop < (nr_pages - 1); loop++, p++) { + prefetchw(p + 1); + __ClearPageReserved(p); + set_page_count(p, 0); + } + __ClearPageReserved(p); + set_page_count(p, 0); atomic_long_add(nr_pages, &page_zone(page)->managed_pages); set_page_refcounted(page); __free_pages(page, order); @@ -1517,6 +1520,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; + __gather_extra_latent_entropy(page, 1 << order); __free_pages_core(page, order); } @@ -1607,6 +1611,7 @@ static void __init deferred_free_range(unsigned long pfn, if (nr_pages == pageblock_nr_pages && (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); + __gather_extra_latent_entropy(page, 1 << pageblock_order); __free_pages_core(page, pageblock_order); return; } @@ -1614,6 +1619,7 @@ static void __init deferred_free_range(unsigned long pfn, for (i = 0; i < nr_pages; i++, page++, pfn++) { if ((pfn & (pageblock_nr_pages - 1)) == 0) set_pageblock_migratetype(page, MIGRATE_MOVABLE); + __gather_extra_latent_entropy(page, 1); __free_pages_core(page, 0); } } From 86bcf07a3f22b71b979199498e0bb3d620a14a95 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 31 Jul 2019 20:50:48 +0100 Subject: [PATCH 098/104] add CONFIG for unprivileged_userns_clone When disabled, unprivileged users will not be able to create new namespaces. Allowing users to create their own namespaces has been part of several recent local privilege escalation exploits, so if you need user namespaces but are paranoid^Wsecurity-conscious you want to disable this. By default unprivileged user namespaces are disabled. Authored-by: Jan Alexander Steffens (heftig) Edited-by: Levente Polyak (anthraxx) --- init/Kconfig | 16 ++++++++++++++++ kernel/user_namespace.c | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 52599ea654788..0353269eb81c9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1112,6 +1112,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + depends on USER_NS + default n + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say N. + config PID_NS bool "PID Namespaces" default y diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index eea0a7694df85..c36ecd19562c2 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -22,7 +22,11 @@ #include /* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else int unprivileged_userns_clone; +#endif static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); From b9b5b844078b479cb2963d17e5d8989875fc250c Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Thu, 19 Sep 2019 19:02:23 +0200 Subject: [PATCH 099/104] enable INIT_ON_ALLOC_DEFAULT_ON by default --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 7c6fa3eb1a295..689d3819ec36d 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -169,6 +169,7 @@ config STACKLEAK_RUNTIME_DISABLE config INIT_ON_ALLOC_DEFAULT_ON bool "Enable heap memory zeroing on allocation by default" + default yes help This has the effect of setting "init_on_alloc=1" on the kernel command line. This can be disabled with "init_on_alloc=0". From a746e05937d9535cb4798d75981697f09d88cbd1 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Thu, 19 Sep 2019 19:03:01 +0200 Subject: [PATCH 100/104] enable INIT_ON_FREE_DEFAULT_ON by default --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 689d3819ec36d..473e40bb8537d 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -182,6 +182,7 @@ config INIT_ON_ALLOC_DEFAULT_ON config INIT_ON_FREE_DEFAULT_ON bool "Enable heap memory zeroing on free by default" + default yes help This has the effect of setting "init_on_free=1" on the kernel command line. This can be disabled with "init_on_free=0". From 922fadca071f426bc67032cba57bfb3e69b8cdec Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 2 Oct 2019 01:22:17 +0200 Subject: [PATCH 101/104] add CONFIG for unprivileged_userfaultfd When disabled, unprivileged users will not be able to use the userfaultfd syscall. Userfaultfd provide attackers with a way to stall a kernel thread in the middle of memory accesses from userspace by initiating an access on an unmapped page. To avoid various heap grooming and heap spraying techniques for exploiting use-after-free flaws this should be disabled by default. This setting can be overridden at runtime via the vm.unprivileged_userfaultfd sysctl. Signed-off-by: Levente Polyak --- fs/userfaultfd.c | 4 ++++ init/Kconfig | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 37df7c9eedb15..97e21b2c26708 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -28,7 +28,11 @@ #include #include +#ifdef CONFIG_USERFAULTFD_UNPRIVILEGED int sysctl_unprivileged_userfaultfd __read_mostly = 1; +#else +int sysctl_unprivileged_userfaultfd __read_mostly; +#endif static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; diff --git a/init/Kconfig b/init/Kconfig index 0353269eb81c9..82a7a171adebb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1650,6 +1650,23 @@ config USERFAULTFD Enable the userfaultfd() system call that allows to intercept and handle page faults in userland. +config USERFAULTFD_UNPRIVILEGED + bool "Allow unprivileged users to use the userfaultfd syscall" + depends on USERFAULTFD + default n + help + When disabled, unprivileged users will not be able to use the userfaultfd + syscall. Userfaultfd provide attackers with a way to stall a kernel + thread in the middle of memory accesses from userspace by initiating an + access on an unmapped page. To avoid various heap grooming and heap + spraying techniques for exploiting use-after-free flaws this should be + disabled by default. + + This setting can be overridden at runtime via the + vm.unprivileged_userfaultfd sysctl. + + If unsure, say N. + config ARCH_HAS_MEMBARRIER_CALLBACKS bool From 932258a589a324b14607f84fcc350c3856360bc7 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 29 Nov 2019 16:27:14 +0100 Subject: [PATCH 102/104] slub: Extend init_on_alloc to slab caches with constructors Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- mm/slab.h | 2 ++ mm/slub.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index d28687a4b45a5..975a75b7230c5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -683,8 +683,10 @@ static inline void cache_random_seq_destroy(struct kmem_cache *cachep) { } static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_unlikely(&init_on_alloc)) { +#ifndef CONFIG_SLUB if (c->ctor) return false; +#endif if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; diff --git a/mm/slub.c b/mm/slub.c index cc1741cc7a643..58a9a9bcb10f7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2843,8 +2843,14 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, if (s->ctor) s->ctor(object); kasan_poison_object_data(s, object); - } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) + } else if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) { memset(object, 0, s->object_size); + if (s->ctor) { + kasan_unpoison_object_data(s, object); + s->ctor(object); + kasan_poison_object_data(s, object); + } + } if (object) { check_canary(s, object, s->random_inactive); @@ -3288,8 +3294,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } else if (unlikely(slab_want_init_on_alloc(flags, s))) { int j; - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { memset(p[j], 0, s->object_size); + if (s->ctor) { + kasan_unpoison_object_data(s, p[j]); + s->ctor(p[j]); + kasan_poison_object_data(s, p[j]); + } + } } for (k = 0; k < i; k++) { From 7e22e8c34c2c7787cd503b6f22f7ddd2399a4267 Mon Sep 17 00:00:00 2001 From: HacKurx Date: Tue, 31 Mar 2020 12:45:04 +0200 Subject: [PATCH 103/104] log the access to SUID Log the access to SUID. The display format is as follows: [ 9.799423] linux-hardened: exim4 executed by the uid/euid:0/0 just used a SUID [ 78.596654] linux-hardened: bash executed by the uid/euid:1000/1000 just used a SUID --- fs/exec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/exec.c b/fs/exec.c index 1f5c4bc942bdd..c7b923b09c2b4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1558,6 +1558,8 @@ static void bprm_fill_uid(struct linux_binprm *bprm) !kgid_has_mapping(bprm->cred->user_ns, gid)) return; + printk(KERN_ERR "linux-hardened: %s executed by the uid/euid:%u/%u just used a SUID\n", current->comm, current_uid(), current_euid()); + if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = uid; From 13ef903b63a160fe1308bb8313d1cb9b74861854 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc?= <4661917+HacKurx@users.noreply.github.com> Date: Tue, 31 Mar 2020 14:23:19 +0200 Subject: [PATCH 104/104] Use KERN_WARNING to log the access to SUID @anthraxx prefers to use KERN_WARNING. --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exec.c b/fs/exec.c index c7b923b09c2b4..a274360d63de5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1558,7 +1558,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) !kgid_has_mapping(bprm->cred->user_ns, gid)) return; - printk(KERN_ERR "linux-hardened: %s executed by the uid/euid:%u/%u just used a SUID\n", current->comm, current_uid(), current_euid()); + printk(KERN_WARNING "linux-hardened: %s executed by the uid/euid:%u/%u just used a SUID\n", current->comm, current_uid(), current_euid()); if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID;