Skip to content

Commit c23aae1

Browse files
committed
librasan: Use hand-optimized assembly implementations from musl
1 parent 03e6370 commit c23aae1

File tree

14 files changed

+1317
-3
lines changed

14 files changed

+1317
-3
lines changed

libafl_qemu/librasan/asan/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ default = [
2121
"mimalloc",
2222
"test",
2323
"tracking",
24+
"optimized-assembly",
2425
]
2526
## Enable support for the `dlmalloc` allocator backend
2627
dlmalloc = ["dep:dlmalloc"]
@@ -46,6 +47,8 @@ mimalloc = ["dep:baby-mimalloc"]
4647
test = []
4748
## Enable support for memory tracking
4849
tracking = []
50+
## Enable the optimized assembly implementations of various libc functions
51+
optimized-assembly = []
4952

5053
[dependencies]
5154
baby-mimalloc = { version = "0.2.1", default-features = false, features = [

libafl_qemu/librasan/asan/build.rs

+36-3
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,59 @@
1+
use std::env;
2+
13
fn compile(file: &str, output: &str) {
24
cc::Build::new()
35
.define("_GNU_SOURCE", None)
46
.opt_level(3)
57
.flag("-Werror")
68
.flag("-fno-stack-protector")
79
.flag("-ffunction-sections")
10+
.flag("-Wa,--noexecstack")
811
.include("libc/include/")
912
.file(file)
1013
.compile(output);
1114
}
1215

1316
fn main() {
1417
println!("cargo:rerun-if-changed=libc");
18+
let target = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
1519

1620
compile("libc/src/asprintf.c", "asprintf");
1721
compile("libc/src/log.c", "log");
1822
compile("libc/src/printf.c", "printf");
1923
compile("libc/src/vasprintf.c", "vasprintf");
2024

2125
compile("libc/src/memcmp.c", "memcmp");
22-
compile("libc/src/memcpy.c", "memcpy");
23-
compile("libc/src/memmove.c", "memmove");
24-
compile("libc/src/memset.c", "memset");
26+
27+
let mut memcpy = "libc/src/memcpy.c";
28+
let mut memmove = "libc/src/memmove.c";
29+
let mut memset = "libc/src/memset.c";
30+
31+
if cfg!(feature = "optimized-assembly") {
32+
match target.as_str() {
33+
"aarch64" => {
34+
memcpy = "libc/src/aarch64/memcpy.S";
35+
memset = "libc/src/aarch64/memset.S";
36+
}
37+
"arm" => {
38+
memcpy = "libc/src/arm/memcpy.S";
39+
}
40+
"x86" => {
41+
memcpy = "libc/src/i386/memcpy.s";
42+
memmove = "libc/src/i386/memmove.s";
43+
memset = "libc/src/i386/memset.s";
44+
}
45+
"x86_64" => {
46+
memcpy = "libc/src/x86_64/memcpy.s";
47+
memmove = "libc/src/x86_64/memmove.s";
48+
memset = "libc/src/x86_64/memset.s";
49+
}
50+
_ => {}
51+
}
52+
}
53+
54+
compile(memcpy, "memcpy");
55+
compile(memmove, "memmove");
56+
compile(memset, "memset");
57+
2558
compile("libc/src/strlen.c", "strlen");
2659
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
/*
2+
* https://git.musl-libc.org/cgit/musl/tree/src/string/aarch64/memcpy.S?h=v1.2.5
3+
*
4+
* This file has been copied from musl v1.2.5, which is licensed under the
5+
* following license:
6+
*
7+
* Copyright © 2005-2020 Rich Felker, et al.
8+
*
9+
* Permission is hereby granted, free of charge, to any person obtaining
10+
* a copy of this software and associated documentation files (the
11+
* "Software"), to deal in the Software without restriction, including
12+
* without limitation the rights to use, copy, modify, merge, publish,
13+
* distribute, sublicense, and/or sell copies of the Software, and to
14+
* permit persons to whom the Software is furnished to do so, subject to
15+
* the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be
18+
* included in all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23+
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24+
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25+
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26+
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27+
*/
28+
29+
/*
30+
* memcpy - copy memory area
31+
*
32+
* Copyright (c) 2012-2020, Arm Limited.
33+
* SPDX-License-Identifier: MIT
34+
*/
35+
36+
/* Assumptions:
37+
*
38+
* ARMv8-a, AArch64, unaligned accesses.
39+
*
40+
*/
41+
42+
#define dstin x0
43+
#define src x1
44+
#define count x2
45+
#define dst x3
46+
#define srcend x4
47+
#define dstend x5
48+
#define A_l x6
49+
#define A_lw w6
50+
#define A_h x7
51+
#define B_l x8
52+
#define B_lw w8
53+
#define B_h x9
54+
#define C_l x10
55+
#define C_lw w10
56+
#define C_h x11
57+
#define D_l x12
58+
#define D_h x13
59+
#define E_l x14
60+
#define E_h x15
61+
#define F_l x16
62+
#define F_h x17
63+
#define G_l count
64+
#define G_h dst
65+
#define H_l src
66+
#define H_h srcend
67+
#define tmp1 x14
68+
69+
/* This implementation of memcpy uses unaligned accesses and branchless
70+
sequences to keep the code small, simple and improve performance.
71+
72+
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
73+
copies of up to 128 bytes, and large copies. The overhead of the overlap
74+
check is negligible since it is only required for large copies.
75+
76+
Large copies use a software pipelined loop processing 64 bytes per iteration.
77+
The destination pointer is 16-byte aligned to minimize unaligned accesses.
78+
The loop tail is handled by always copying 64 bytes from the end.
79+
*/
80+
81+
.global memcpy
82+
.type memcpy,%function
83+
memcpy:
84+
add srcend, src, count
85+
add dstend, dstin, count
86+
cmp count, 128
87+
b.hi .Lcopy_long
88+
cmp count, 32
89+
b.hi .Lcopy32_128
90+
91+
/* Small copies: 0..32 bytes. */
92+
cmp count, 16
93+
b.lo .Lcopy16
94+
ldp A_l, A_h, [src]
95+
ldp D_l, D_h, [srcend, -16]
96+
stp A_l, A_h, [dstin]
97+
stp D_l, D_h, [dstend, -16]
98+
ret
99+
100+
/* Copy 8-15 bytes. */
101+
.Lcopy16:
102+
tbz count, 3, .Lcopy8
103+
ldr A_l, [src]
104+
ldr A_h, [srcend, -8]
105+
str A_l, [dstin]
106+
str A_h, [dstend, -8]
107+
ret
108+
109+
.p2align 3
110+
/* Copy 4-7 bytes. */
111+
.Lcopy8:
112+
tbz count, 2, .Lcopy4
113+
ldr A_lw, [src]
114+
ldr B_lw, [srcend, -4]
115+
str A_lw, [dstin]
116+
str B_lw, [dstend, -4]
117+
ret
118+
119+
/* Copy 0..3 bytes using a branchless sequence. */
120+
.Lcopy4:
121+
cbz count, .Lcopy0
122+
lsr tmp1, count, 1
123+
ldrb A_lw, [src]
124+
ldrb C_lw, [srcend, -1]
125+
ldrb B_lw, [src, tmp1]
126+
strb A_lw, [dstin]
127+
strb B_lw, [dstin, tmp1]
128+
strb C_lw, [dstend, -1]
129+
.Lcopy0:
130+
ret
131+
132+
.p2align 4
133+
/* Medium copies: 33..128 bytes. */
134+
.Lcopy32_128:
135+
ldp A_l, A_h, [src]
136+
ldp B_l, B_h, [src, 16]
137+
ldp C_l, C_h, [srcend, -32]
138+
ldp D_l, D_h, [srcend, -16]
139+
cmp count, 64
140+
b.hi .Lcopy128
141+
stp A_l, A_h, [dstin]
142+
stp B_l, B_h, [dstin, 16]
143+
stp C_l, C_h, [dstend, -32]
144+
stp D_l, D_h, [dstend, -16]
145+
ret
146+
147+
.p2align 4
148+
/* Copy 65..128 bytes. */
149+
.Lcopy128:
150+
ldp E_l, E_h, [src, 32]
151+
ldp F_l, F_h, [src, 48]
152+
cmp count, 96
153+
b.ls .Lcopy96
154+
ldp G_l, G_h, [srcend, -64]
155+
ldp H_l, H_h, [srcend, -48]
156+
stp G_l, G_h, [dstend, -64]
157+
stp H_l, H_h, [dstend, -48]
158+
.Lcopy96:
159+
stp A_l, A_h, [dstin]
160+
stp B_l, B_h, [dstin, 16]
161+
stp E_l, E_h, [dstin, 32]
162+
stp F_l, F_h, [dstin, 48]
163+
stp C_l, C_h, [dstend, -32]
164+
stp D_l, D_h, [dstend, -16]
165+
ret
166+
167+
.p2align 4
168+
/* Copy more than 128 bytes. */
169+
.Lcopy_long:
170+
171+
/* Copy 16 bytes and then align dst to 16-byte alignment. */
172+
173+
ldp D_l, D_h, [src]
174+
and tmp1, dstin, 15
175+
bic dst, dstin, 15
176+
sub src, src, tmp1
177+
add count, count, tmp1 /* Count is now 16 too large. */
178+
ldp A_l, A_h, [src, 16]
179+
stp D_l, D_h, [dstin]
180+
ldp B_l, B_h, [src, 32]
181+
ldp C_l, C_h, [src, 48]
182+
ldp D_l, D_h, [src, 64]!
183+
subs count, count, 128 + 16 /* Test and readjust count. */
184+
b.ls .Lcopy64_from_end
185+
186+
.Lloop64:
187+
stp A_l, A_h, [dst, 16]
188+
ldp A_l, A_h, [src, 16]
189+
stp B_l, B_h, [dst, 32]
190+
ldp B_l, B_h, [src, 32]
191+
stp C_l, C_h, [dst, 48]
192+
ldp C_l, C_h, [src, 48]
193+
stp D_l, D_h, [dst, 64]!
194+
ldp D_l, D_h, [src, 64]!
195+
subs count, count, 64
196+
b.hi .Lloop64
197+
198+
/* Write the last iteration and copy 64 bytes from the end. */
199+
.Lcopy64_from_end:
200+
ldp E_l, E_h, [srcend, -64]
201+
stp A_l, A_h, [dst, 16]
202+
ldp A_l, A_h, [srcend, -48]
203+
stp B_l, B_h, [dst, 32]
204+
ldp B_l, B_h, [srcend, -32]
205+
stp C_l, C_h, [dst, 48]
206+
ldp C_l, C_h, [srcend, -16]
207+
stp D_l, D_h, [dst, 64]
208+
stp E_l, E_h, [dstend, -64]
209+
stp A_l, A_h, [dstend, -48]
210+
stp B_l, B_h, [dstend, -32]
211+
stp C_l, C_h, [dstend, -16]
212+
ret
213+
214+
.size memcpy,.-memcpy

0 commit comments

Comments
 (0)