Closed
Description
I'm testing #1076 with clang/test/CodeGen/tbaa.cpp
.
and I have noticed an inconsistency in the alignment of store instructions between the LLVM IR generated by clang
and the one produced by ClangIR
.
Here is a simplified example that demonstrates the difference:
// demo.cc
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef struct {
uint16_t f16;
uint32_t f32;
uint16_t f16_2;
uint32_t f32_2;
} StructA;
uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
*s = 1;
A->f16 = 4;
return *s;
}
LLVM IR Generated by Clang
./bin/clang++ -c demo.cc -Xclang -emit-llvm -o demo.orig.ll
The following is the LLVM IR output when compiled with clang
:
// demo.orig.ll
define dso_local noundef i32 @_Z2g2PjP7StructAy(ptr noundef %s, ptr noundef %A, i64 noundef %count) #0 {
entry:
%s.addr = alloca ptr, align 8
%A.addr = alloca ptr, align 8
%count.addr = alloca i64, align 8
store ptr %s, ptr %s.addr, align 8
store ptr %A, ptr %A.addr, align 8
store i64 %count, ptr %count.addr, align 8
%0 = load ptr, ptr %s.addr, align 8
store i32 1, ptr %0, align 4
%1 = load ptr, ptr %A.addr, align 8
%f16 = getelementptr inbounds nuw %struct.StructA, ptr %1, i32 0, i32 0
// highlight align 4
store i16 4, ptr %f16, align 4
%2 = load ptr, ptr %s.addr, align 8
%3 = load i32, ptr %2, align 4
ret i32 %3
}
LLVM IR Generated by ClangIR
./bin/clang++ -c demo.cc -Xclang -emit-llvm -o demo.ll -fclangir
In contrast, the LLVM IR produced by ClangIR
is as follows:
// demo.ll
define dso_local i32 @_Z2g2PjP7StructAy(ptr %0, ptr %1, i64 %2) #0 {
%4 = alloca ptr, i64 1, align 8
%5 = alloca ptr, i64 1, align 8
%6 = alloca i64, i64 1, align 8
%7 = alloca i32, i64 1, align 4
store ptr %0, ptr %4, align 8
store ptr %1, ptr %5, align 8
store i64 %2, ptr %6, align 8
%8 = load ptr, ptr %4, align 8
store i32 1, ptr %8, align 4
%9 = load ptr, ptr %5, align 8
%10 = getelementptr %struct.StructA, ptr %9, i32 0, i32 0
// highlight align 2
store i16 4, ptr %10, align 2
%11 = load ptr, ptr %4, align 8
%12 = load i32, ptr %11, align 4
store i32 %12, ptr %7, align 4
%13 = load i32, ptr %7, align 4
ret i32 %13
}
Comparison
The significant difference lies in this line:
- In the Clang-generated LLVM IR:
store i16 4, ptr %f16, align 4
- In the ClangIR-generated LLVM IR:
store i16 4, ptr %10, align 2