Skip to content

Commit dfc5514

Browse files
committed
Add f16 and f128 inline ASM support for x86 and x86-64
1 parent 9fdbfe1 commit dfc5514

File tree

6 files changed

+350
-42
lines changed

6 files changed

+350
-42
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+100
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>(
959959
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
960960
Abi::Vector { .. },
961961
) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
962+
(
963+
InlineAsmRegClass::X86(
964+
X86InlineAsmRegClass::xmm_reg
965+
| X86InlineAsmRegClass::ymm_reg
966+
| X86InlineAsmRegClass::zmm_reg,
967+
),
968+
Abi::Scalar(s),
969+
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
970+
&& s.primitive() == Primitive::Float(Float::F128) =>
971+
{
972+
bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))
973+
}
974+
(
975+
InlineAsmRegClass::X86(
976+
X86InlineAsmRegClass::xmm_reg
977+
| X86InlineAsmRegClass::ymm_reg
978+
| X86InlineAsmRegClass::zmm_reg,
979+
),
980+
Abi::Scalar(s),
981+
) if s.primitive() == Primitive::Float(Float::F16) => {
982+
let value = bx.insert_element(
983+
bx.const_undef(bx.type_vector(bx.type_f16(), 8)),
984+
value,
985+
bx.const_usize(0),
986+
);
987+
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
988+
}
989+
(
990+
InlineAsmRegClass::X86(
991+
X86InlineAsmRegClass::xmm_reg
992+
| X86InlineAsmRegClass::ymm_reg
993+
| X86InlineAsmRegClass::zmm_reg,
994+
),
995+
Abi::Vector { element, count: count @ (8 | 16) },
996+
) if element.primitive() == Primitive::Float(Float::F16) => {
997+
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
998+
}
962999
(
9631000
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
9641001
Abi::Scalar(s),
@@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>(
10361073
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
10371074
Abi::Vector { .. },
10381075
) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
1076+
(
1077+
InlineAsmRegClass::X86(
1078+
X86InlineAsmRegClass::xmm_reg
1079+
| X86InlineAsmRegClass::ymm_reg
1080+
| X86InlineAsmRegClass::zmm_reg,
1081+
),
1082+
Abi::Scalar(s),
1083+
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
1084+
&& s.primitive() == Primitive::Float(Float::F128) =>
1085+
{
1086+
bx.bitcast(value, bx.type_f128())
1087+
}
1088+
(
1089+
InlineAsmRegClass::X86(
1090+
X86InlineAsmRegClass::xmm_reg
1091+
| X86InlineAsmRegClass::ymm_reg
1092+
| X86InlineAsmRegClass::zmm_reg,
1093+
),
1094+
Abi::Scalar(s),
1095+
) if s.primitive() == Primitive::Float(Float::F16) => {
1096+
let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));
1097+
bx.extract_element(value, bx.const_usize(0))
1098+
}
1099+
(
1100+
InlineAsmRegClass::X86(
1101+
X86InlineAsmRegClass::xmm_reg
1102+
| X86InlineAsmRegClass::ymm_reg
1103+
| X86InlineAsmRegClass::zmm_reg,
1104+
),
1105+
Abi::Vector { element, count: count @ (8 | 16) },
1106+
) if element.primitive() == Primitive::Float(Float::F16) => {
1107+
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
1108+
}
10391109
(
10401110
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
10411111
Abi::Scalar(s),
@@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
11091179
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
11101180
Abi::Vector { .. },
11111181
) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
1182+
(
1183+
InlineAsmRegClass::X86(
1184+
X86InlineAsmRegClass::xmm_reg
1185+
| X86InlineAsmRegClass::ymm_reg
1186+
| X86InlineAsmRegClass::zmm_reg,
1187+
),
1188+
Abi::Scalar(s),
1189+
) if cx.sess().asm_arch == Some(InlineAsmArch::X86)
1190+
&& s.primitive() == Primitive::Float(Float::F128) =>
1191+
{
1192+
cx.type_vector(cx.type_i32(), 4)
1193+
}
1194+
(
1195+
InlineAsmRegClass::X86(
1196+
X86InlineAsmRegClass::xmm_reg
1197+
| X86InlineAsmRegClass::ymm_reg
1198+
| X86InlineAsmRegClass::zmm_reg,
1199+
),
1200+
Abi::Scalar(s),
1201+
) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),
1202+
(
1203+
InlineAsmRegClass::X86(
1204+
X86InlineAsmRegClass::xmm_reg
1205+
| X86InlineAsmRegClass::ymm_reg
1206+
| X86InlineAsmRegClass::zmm_reg,
1207+
),
1208+
Abi::Vector { element, count: count @ (8 | 16) },
1209+
) if element.primitive() == Primitive::Float(Float::F16) => {
1210+
cx.type_vector(cx.type_i16(), count)
1211+
}
11121212
(
11131213
InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
11141214
Abi::Scalar(s),

compiler/rustc_hir_analysis/src/check/intrinsicck.rs

+4
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> {
6262
ty::Int(IntTy::I64) | ty::Uint(UintTy::U64) => Some(InlineAsmType::I64),
6363
ty::Int(IntTy::I128) | ty::Uint(UintTy::U128) => Some(InlineAsmType::I128),
6464
ty::Int(IntTy::Isize) | ty::Uint(UintTy::Usize) => Some(asm_ty_isize),
65+
ty::Float(FloatTy::F16) => Some(InlineAsmType::F16),
6566
ty::Float(FloatTy::F32) => Some(InlineAsmType::F32),
6667
ty::Float(FloatTy::F64) => Some(InlineAsmType::F64),
68+
ty::Float(FloatTy::F128) => Some(InlineAsmType::F128),
6769
ty::FnPtr(_) => Some(asm_ty_isize),
6870
ty::RawPtr(ty, _) if self.is_thin_ptr_ty(ty) => Some(asm_ty_isize),
6971
ty::Adt(adt, args) if adt.repr().simd() => {
@@ -105,8 +107,10 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> {
105107
width => bug!("unsupported pointer width: {width}"),
106108
})
107109
}
110+
ty::Float(FloatTy::F16) => Some(InlineAsmType::VecF16(size)),
108111
ty::Float(FloatTy::F32) => Some(InlineAsmType::VecF32(size)),
109112
ty::Float(FloatTy::F64) => Some(InlineAsmType::VecF64(size)),
113+
ty::Float(FloatTy::F128) => Some(InlineAsmType::VecF128(size)),
110114
_ => None,
111115
}
112116
}

compiler/rustc_target/src/asm/mod.rs

+12
Original file line numberDiff line numberDiff line change
@@ -707,15 +707,19 @@ pub enum InlineAsmType {
707707
I32,
708708
I64,
709709
I128,
710+
F16,
710711
F32,
711712
F64,
713+
F128,
712714
VecI8(u64),
713715
VecI16(u64),
714716
VecI32(u64),
715717
VecI64(u64),
716718
VecI128(u64),
719+
VecF16(u64),
717720
VecF32(u64),
718721
VecF64(u64),
722+
VecF128(u64),
719723
}
720724

721725
impl InlineAsmType {
@@ -730,15 +734,19 @@ impl InlineAsmType {
730734
Self::I32 => 4,
731735
Self::I64 => 8,
732736
Self::I128 => 16,
737+
Self::F16 => 2,
733738
Self::F32 => 4,
734739
Self::F64 => 8,
740+
Self::F128 => 16,
735741
Self::VecI8(n) => n * 1,
736742
Self::VecI16(n) => n * 2,
737743
Self::VecI32(n) => n * 4,
738744
Self::VecI64(n) => n * 8,
739745
Self::VecI128(n) => n * 16,
746+
Self::VecF16(n) => n * 2,
740747
Self::VecF32(n) => n * 4,
741748
Self::VecF64(n) => n * 8,
749+
Self::VecF128(n) => n * 16,
742750
})
743751
}
744752
}
@@ -751,15 +759,19 @@ impl fmt::Display for InlineAsmType {
751759
Self::I32 => f.write_str("i32"),
752760
Self::I64 => f.write_str("i64"),
753761
Self::I128 => f.write_str("i128"),
762+
Self::F16 => f.write_str("f16"),
754763
Self::F32 => f.write_str("f32"),
755764
Self::F64 => f.write_str("f64"),
765+
Self::F128 => f.write_str("f128"),
756766
Self::VecI8(n) => write!(f, "i8x{n}"),
757767
Self::VecI16(n) => write!(f, "i16x{n}"),
758768
Self::VecI32(n) => write!(f, "i32x{n}"),
759769
Self::VecI64(n) => write!(f, "i64x{n}"),
760770
Self::VecI128(n) => write!(f, "i128x{n}"),
771+
Self::VecF16(n) => write!(f, "f16x{n}"),
761772
Self::VecF32(n) => write!(f, "f32x{n}"),
762773
Self::VecF64(n) => write!(f, "f64x{n}"),
774+
Self::VecF128(n) => write!(f, "f128x{n}"),
763775
}
764776
}
765777
}

compiler/rustc_target/src/asm/x86.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -107,26 +107,26 @@ impl X86InlineAsmRegClass {
107107
match self {
108108
Self::reg | Self::reg_abcd => {
109109
if arch == InlineAsmArch::X86_64 {
110-
types! { _: I16, I32, I64, F32, F64; }
110+
types! { _: I16, I32, I64, F16, F32, F64; }
111111
} else {
112-
types! { _: I16, I32, F32; }
112+
types! { _: I16, I32, F16, F32; }
113113
}
114114
}
115115
Self::reg_byte => types! { _: I8; },
116116
Self::xmm_reg => types! {
117-
sse: I32, I64, F32, F64,
118-
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
117+
sse: I32, I64, F16, F32, F64, F128,
118+
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
119119
},
120120
Self::ymm_reg => types! {
121-
avx: I32, I64, F32, F64,
122-
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
123-
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4);
121+
avx: I32, I64, F16, F32, F64, F128,
122+
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
123+
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4);
124124
},
125125
Self::zmm_reg => types! {
126-
avx512f: I32, I64, F32, F64,
127-
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
128-
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4),
129-
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF32(16), VecF64(8);
126+
avx512f: I32, I64, F16, F32, F64, F128,
127+
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
128+
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4),
129+
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF16(32), VecF32(16), VecF64(8);
130130
},
131131
Self::kreg => types! {
132132
avx512f: I8, I16;

0 commit comments

Comments
 (0)