Skip to content

Commit c8dce6f

Browse files
authored
Fix bounds-check elision with 4 GiB guard pages. (#11973)
* Fix bounds-check elision with 4 GiB guard pages. First, this fixes the `can_elide_bounds_check` function to consider the guard size when determining whether all 32 bits of a 32-bit index space will be at least reserved. This allows configurations which use 4 GiB guard regions with smaller memory reservations to benefit from bounds check eliding. Second, this fixes a subtract overflow in `bounds_check_field_access` when the offset is greater than the memory reservation size. In this case, optimization isn't important because the code can only ever trap, so the change here is just to avoid panicking in the compiler. * Fix winch's bounds-check elimination logic. * Winch now uses more efficient bounds checks for statically-sized memory. * prtest:full
1 parent 1308d0a commit c8dce6f

File tree

6 files changed

+156
-108
lines changed

6 files changed

+156
-108
lines changed

crates/cranelift/src/bounds_checks.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ fn bounds_check_field_access(
402402
if can_use_virtual_memory
403403
&& heap.memory.minimum_byte_size().unwrap_or(u64::MAX) <= memory_reservation
404404
&& !heap.memory.memory_may_move(env.tunables())
405+
&& memory_reservation >= offset_and_size
405406
{
406407
let adjusted_bound = memory_reservation.checked_sub(offset_and_size).unwrap();
407408
let adjusted_bound_value = builder

crates/environ/src/types.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,7 @@ impl Memory {
21522152
pub fn can_elide_bounds_check(&self, tunables: &Tunables, host_page_size_log2: u8) -> bool {
21532153
self.can_use_virtual_memory(tunables, host_page_size_log2)
21542154
&& self.idx_type == IndexType::I32
2155-
&& tunables.memory_reservation >= (1 << 32)
2155+
&& tunables.memory_reservation + tunables.memory_guard_size >= (1 << 32)
21562156
}
21572157

21582158
/// Returns the static size of this heap in bytes at runtime, if available.

tests/disas/bounds-check.wat

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
;;! test = "optimize"
2+
;;! target = "x86_64"
3+
;;! flags = ["-Omemory-reservation=0x8000000", "-Omemory-guard-size=0x100000000", "-Omemory-may-move=n"]
4+
5+
(module
6+
(memory 16)
7+
(func $store (param i32)
8+
;; No offset. But because we have a 4 GiB guard, this needs no bounds check.
9+
local.get 0
10+
i32.const 0
11+
i32.store8 0
12+
13+
;; The greatest possible offset that can ever be in bounds. Again, no
14+
;; bounds check.
15+
local.get 0
16+
i32.const 0
17+
i32.store8 0 offset=134217727
18+
19+
;; The greatest encodable offset. This will never be in bounds, given
20+
;; our memory reservation size, so optimization isn't a concern.
21+
local.get 0
22+
i32.const 0
23+
i32.store8 0 offset=4294967295
24+
)
25+
(export "store" (func $store))
26+
)
27+
;; function u0:0(i64 vmctx, i64, i32) tail {
28+
;; gv0 = vmctx
29+
;; gv1 = load.i64 notrap aligned readonly gv0+8
30+
;; gv2 = load.i64 notrap aligned gv1+16
31+
;; gv3 = vmctx
32+
;; gv4 = load.i64 notrap aligned gv3+64
33+
;; gv5 = load.i64 notrap aligned readonly can_move checked gv3+56
34+
;; stack_limit = gv2
35+
;;
36+
;; block0(v0: i64, v1: i64, v2: i32):
37+
;; @002a v3 = iconst.i32 0
38+
;; @002c v5 = load.i64 notrap aligned readonly can_move checked v0+56
39+
;; @002c v4 = uextend.i64 v2
40+
;; @002c v6 = iadd v5, v4
41+
;; @002c istore8 little heap v3, v6 ; v3 = 0
42+
;; @0033 v11 = iconst.i64 0x07ff_ffff
43+
;; @0033 v12 = iadd v6, v11 ; v11 = 0x07ff_ffff
44+
;; @0033 istore8 little heap v3, v12 ; v3 = 0
45+
;; @003d v15 = load.i64 notrap aligned v0+64
46+
;; @003d v16 = icmp ugt v4, v15
47+
;; @003d v21 = iconst.i64 0
48+
;; @003d v19 = iconst.i64 0xffff_ffff
49+
;; @003d v20 = iadd v6, v19 ; v19 = 0xffff_ffff
50+
;; @003d v22 = select_spectre_guard v16, v21, v20 ; v21 = 0
51+
;; @003d istore8 little heap v3, v22 ; v3 = 0
52+
;; @0044 jump block1
53+
;;
54+
;; block1:
55+
;; @0044 return
56+
;; }

tests/disas/winch/x64/load/v128_load32x2_s_oob_avx.wat

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,22 @@
1414
;; movq 0x10(%r11), %r11
1515
;; addq $0x10, %r11
1616
;; cmpq %rsp, %r11
17-
;; ja 0x72
17+
;; ja 0x64
1818
;; 1c: movq %rdi, %r14
1919
;; subq $0x10, %rsp
2020
;; movq %rdi, 8(%rsp)
2121
;; movq %rsi, (%rsp)
2222
;; movl $0xfff9, %eax
23-
;; movl $0x10000, %ecx
24-
;; movl %eax, %edx
25-
;; addq $8, %rdx
26-
;; jb 0x74
27-
;; 48: cmpq %rcx, %rdx
28-
;; ja 0x76
29-
;; 51: movq 0x38(%r14), %rbx
30-
;; addq %rax, %rbx
31-
;; movl $0, %esi
32-
;; cmpq %rcx, %rdx
33-
;; cmovaq %rsi, %rbx
34-
;; vpmovsxdq (%rbx), %xmm0
23+
;; cmpq $0xfff8, %rax
24+
;; ja 0x66
25+
;; 40: movq 0x38(%r14), %rcx
26+
;; addq %rax, %rcx
27+
;; movl $0, %edx
28+
;; cmpq $0xfff8, %rax
29+
;; cmovaq %rdx, %rcx
30+
;; vpmovsxdq (%rcx), %xmm0
3531
;; addq $0x10, %rsp
3632
;; popq %rbp
3733
;; retq
38-
;; 72: ud2
39-
;; 74: ud2
40-
;; 76: ud2
34+
;; 64: ud2
35+
;; 66: ud2

tests/disas/winch/x64/load/v128_load32x2_u_oob_avx.wat

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,22 @@
1414
;; movq 0x10(%r11), %r11
1515
;; addq $0x10, %r11
1616
;; cmpq %rsp, %r11
17-
;; ja 0x72
17+
;; ja 0x64
1818
;; 1c: movq %rdi, %r14
1919
;; subq $0x10, %rsp
2020
;; movq %rdi, 8(%rsp)
2121
;; movq %rsi, (%rsp)
2222
;; movl $0xfff9, %eax
23-
;; movl $0x10000, %ecx
24-
;; movl %eax, %edx
25-
;; addq $8, %rdx
26-
;; jb 0x74
27-
;; 48: cmpq %rcx, %rdx
28-
;; ja 0x76
29-
;; 51: movq 0x38(%r14), %rbx
30-
;; addq %rax, %rbx
31-
;; movl $0, %esi
32-
;; cmpq %rcx, %rdx
33-
;; cmovaq %rsi, %rbx
34-
;; vpmovzxdq (%rbx), %xmm0
23+
;; cmpq $0xfff8, %rax
24+
;; ja 0x66
25+
;; 40: movq 0x38(%r14), %rcx
26+
;; addq %rax, %rcx
27+
;; movl $0, %edx
28+
;; cmpq $0xfff8, %rax
29+
;; cmovaq %rdx, %rcx
30+
;; vpmovzxdq (%rcx), %xmm0
3531
;; addq $0x10, %rsp
3632
;; popq %rbp
3733
;; retq
38-
;; 72: ud2
39-
;; 74: ud2
40-
;; 76: ud2
34+
;; 64: ud2
35+
;; 66: ud2

winch/codegen/src/codegen/mod.rs

Lines changed: 76 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,8 @@ where
695695
.can_elide_bounds_check(self.tunables, self.env.page_size_log2);
696696

697697
let addr = if offset_with_access_size > heap.memory.maximum_byte_size().unwrap_or(u64::MAX)
698+
|| (!self.tunables.memory_may_move
699+
&& offset_with_access_size > self.tunables.memory_reservation)
698700
{
699701
// Detect at compile time if the access is out of bounds.
700702
// Doing so will put the compiler in an unreachable code state,
@@ -706,7 +708,80 @@ where
706708
self.masm.trap(TrapCode::HEAP_OUT_OF_BOUNDS)?;
707709
self.context.reachable = false;
708710
None
709-
} else if !can_elide_bounds_check {
711+
712+
// Account for the case in which we can completely elide the bounds
713+
// checks.
714+
//
715+
// This case, makes use of the fact that if a memory access uses
716+
// a 32-bit index, then we be certain that
717+
//
718+
// index <= u32::MAX
719+
//
720+
// Therefore if any 32-bit index access occurs in the region
721+
// represented by
722+
//
723+
// bound + guard_size - (offset + access_size)
724+
//
725+
// We are certain that it's in bounds or that the underlying virtual
726+
// memory subsystem will report an illegal access at runtime.
727+
//
728+
// Note:
729+
//
730+
// * bound - (offset + access_size) cannot wrap, because it's checked
731+
// in the condition above.
732+
// * bound + heap.offset_guard_size is guaranteed to not overflow if
733+
// the heap configuration is correct, given that it's address must
734+
// fit in 64-bits.
735+
// * If the heap type is 32-bits, the offset is at most u32::MAX, so
736+
// no adjustment is needed as part of
737+
// [bounds::ensure_index_and_offset].
738+
} else if can_elide_bounds_check
739+
&& u64::from(u32::MAX)
740+
<= self.tunables.memory_reservation + self.tunables.memory_guard_size
741+
- offset_with_access_size
742+
{
743+
assert!(can_elide_bounds_check);
744+
assert!(heap.index_type() == WasmValType::I32);
745+
let addr = self.context.any_gpr(self.masm)?;
746+
bounds::load_heap_addr_unchecked(self.masm, &heap, index, offset, addr, ptr_size)?;
747+
Some(addr)
748+
749+
// Account for the case of a static memory size. The access is out
750+
// of bounds if:
751+
//
752+
// index > bound - (offset + access_size)
753+
//
754+
// bound - (offset + access_size) cannot wrap, because we already
755+
// checked that (offset + access_size) > bound, above.
756+
} else if let Some(static_size) = heap.memory.static_heap_size() {
757+
let bounds = Bounds::from_u64(static_size);
758+
let addr = bounds::load_heap_addr_checked(
759+
self.masm,
760+
&mut self.context,
761+
ptr_size,
762+
&heap,
763+
enable_spectre_mitigation,
764+
bounds,
765+
index,
766+
offset,
767+
|masm, bounds, index| {
768+
let adjusted_bounds = bounds.as_u64() - offset_with_access_size;
769+
let index_reg = index.as_typed_reg().reg;
770+
masm.cmp(
771+
index_reg,
772+
RegImm::i64(adjusted_bounds as i64),
773+
// Similar to the dynamic heap case, even though the
774+
// offset and access size are bound through the heap
775+
// type, when added they can overflow, resulting in
776+
// an erroneous comparison, therefore we rely on the
777+
// target pointer size.
778+
ptr_size,
779+
)?;
780+
Ok(IntCmpKind::GtU)
781+
},
782+
)?;
783+
Some(addr)
784+
} else {
710785
// Account for the general case for bounds-checked memories. The
711786
// access is out of bounds if:
712787
// * index + offset + access_size overflows
@@ -784,80 +859,6 @@ where
784859
self.context.free_reg(bounds.as_typed_reg().reg);
785860
self.context.free_reg(index_offset_and_access_size);
786861
Some(addr)
787-
788-
// Account for the case in which we can completely elide the bounds
789-
// checks.
790-
//
791-
// This case, makes use of the fact that if a memory access uses
792-
// a 32-bit index, then we be certain that
793-
//
794-
// index <= u32::MAX
795-
//
796-
// Therefore if any 32-bit index access occurs in the region
797-
// represented by
798-
//
799-
// bound + guard_size - (offset + access_size)
800-
//
801-
// We are certain that it's in bounds or that the underlying virtual
802-
// memory subsystem will report an illegal access at runtime.
803-
//
804-
// Note:
805-
//
806-
// * bound - (offset + access_size) cannot wrap, because it's checked
807-
// in the condition above.
808-
// * bound + heap.offset_guard_size is guaranteed to not overflow if
809-
// the heap configuration is correct, given that it's address must
810-
// fit in 64-bits.
811-
// * If the heap type is 32-bits, the offset is at most u32::MAX, so
812-
// no adjustment is needed as part of
813-
// [bounds::ensure_index_and_offset].
814-
} else if u64::from(u32::MAX)
815-
<= self.tunables.memory_reservation + self.tunables.memory_guard_size
816-
- offset_with_access_size
817-
{
818-
assert!(can_elide_bounds_check);
819-
assert!(heap.index_type() == WasmValType::I32);
820-
let addr = self.context.any_gpr(self.masm)?;
821-
bounds::load_heap_addr_unchecked(self.masm, &heap, index, offset, addr, ptr_size)?;
822-
Some(addr)
823-
824-
// Account for the all remaining cases, aka. The access is out
825-
// of bounds if:
826-
//
827-
// index > bound - (offset + access_size)
828-
//
829-
// bound - (offset + access_size) cannot wrap, because we already
830-
// checked that (offset + access_size) > bound, above.
831-
} else {
832-
assert!(can_elide_bounds_check);
833-
assert!(heap.index_type() == WasmValType::I32);
834-
let bounds = Bounds::from_u64(self.tunables.memory_reservation);
835-
let addr = bounds::load_heap_addr_checked(
836-
self.masm,
837-
&mut self.context,
838-
ptr_size,
839-
&heap,
840-
enable_spectre_mitigation,
841-
bounds,
842-
index,
843-
offset,
844-
|masm, bounds, index| {
845-
let adjusted_bounds = bounds.as_u64() - offset_with_access_size;
846-
let index_reg = index.as_typed_reg().reg;
847-
masm.cmp(
848-
index_reg,
849-
RegImm::i64(adjusted_bounds as i64),
850-
// Similar to the dynamic heap case, even though the
851-
// offset and access size are bound through the heap
852-
// type, when added they can overflow, resulting in
853-
// an erroneous comparison, therefore we rely on the
854-
// target pointer size.
855-
ptr_size,
856-
)?;
857-
Ok(IntCmpKind::GtU)
858-
},
859-
)?;
860-
Some(addr)
861862
};
862863

863864
self.context.free_reg(index.as_typed_reg().reg);

0 commit comments

Comments
 (0)