diff --git a/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S b/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S new file mode 100644 index 0000000000000..7aa48662dc84d --- /dev/null +++ b/src/hotspot/os_cpu/windows_aarch64/sve_helper_windows_aarch64.S @@ -0,0 +1,37 @@ +; Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. +; DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +; +; This code is free software; you can redistribute it and/or modify it +; under the terms of the GNU General Public License version 2 only, as +; published by the Free Software Foundation. +; +; This code is distributed in the hope that it will be useful, but WITHOUT +; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; version 2 for more details (a copy is included in the LICENSE file that +; accompanied this code). +; +; You should have received a copy of the GNU General Public License version +; 2 along with this work; if not, write to the Free Software Foundation, +; Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +; +; Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +; or visit www.oracle.com if you need additional information or have any +; questions. +; + + ; int get_sve_vector_length_impl(void); + ; + ; Returns the current SVE vector length in bytes. + ; Uses the RDVL instruction which returns imm * VL in bytes. + ; With imm=1 this gives the SVE vector length directly. + + ALIGN 4 + EXPORT get_sve_vector_length_impl + AREA sve_helper_text, CODE, READONLY + +get_sve_vector_length_impl + rdvl x0, 1 + ret + + END diff --git a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp index 93beb549366be..dad262a9321da 100644 --- a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp +++ b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp @@ -26,16 +26,42 @@ #include "runtime/os.hpp" #include "runtime/vm_version.hpp" +// Since PF_ARM_SVE_INSTRUCTIONS_AVAILABLE and related constants were added in +// Windows 11 (version 24H2) and in Windows Server 2025, we define them here for +// compatibility with older SDK versions. +#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46 +#endif + +#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47 +#endif + +#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE +#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51 +#endif + +#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE +#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67 +#endif + +// Assembly helper implemented in sve_helper_windows_aarch64.S. Executes the +// RDVL instruction to return the vector length in bytes. +extern "C" int get_sve_vector_length_impl(void); + int VM_Version::get_current_sve_vector_length() { assert(VM_Version::supports_sve(), "should not call this"); - ShouldNotReachHere(); - return 0; + return get_sve_vector_length_impl(); } int VM_Version::set_and_get_current_sve_vector_length(int length) { assert(VM_Version::supports_sve(), "should not call this"); - ShouldNotReachHere(); - return 0; + + // Unlike Linux, Windows does not present a way to modify the VL (the + // rationale is that the OS expects the application to use the maximum vector + // length supported by the hardware), so we simply return the current VL. The + // caller (`VM_Version::initialize()`) will print a warning and move on. + return get_sve_vector_length_impl(); } void VM_Version::get_os_cpu_info() { @@ -43,15 +69,33 @@ void VM_Version::get_os_cpu_info() { if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) { set_feature(CPU_CRC32); } + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { set_feature(CPU_AES); set_feature(CPU_SHA1); set_feature(CPU_SHA2); } + if (IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE)) { set_feature(CPU_ASIMD); } - // No check for CPU_PMULL, CPU_SVE, CPU_SVE2 + + if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVE); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVE2); + } + + if (IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_SVEBITPERM); + } + + if (IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE)) { + set_feature(CPU_FPHP); + set_feature(CPU_ASIMDHP); + } __int64 dczid_el0 = _ReadStatusReg(0x5807 /* ARM64_DCZID_EL0 */); diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java new file mode 100644 index 0000000000000..a9512f78c1845 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEFeatureDetection.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Verify SVE/SVE2 feature detection for both Windows and Linux. + * + * @requires os.arch == "aarch64" & vm.compiler2.enabled + * @library /test/lib / + * @build jdk.test.whitebox.WhiteBox + * @run driver jdk.test.lib.helpers.ClassFileInstaller + * jdk.test.whitebox.WhiteBox + * + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI compiler.c2.aarch64.TestSVEFeatureDetection + */ + +package compiler.c2.aarch64; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import jdk.test.lib.Asserts; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.whitebox.WhiteBox; + +public class TestSVEFeatureDetection { + private static final WhiteBox WB = WhiteBox.getWhiteBox(); + private static final String KEY_USE_SVE = "UseSVE="; + private static final String KEY_MAX_VECTOR = "MaxVectorSize="; + private static final String KEY_HAS_SVE = "has_sve="; + private static final String KEY_HAS_SVE2 = "has_sve2="; + + public static void main(String[] args) throws Exception { + if (args.length > 0 && args[0].equals("flagCheck")) { + printFlags(); + } else { + runDriver(); + } + } + + private static void printFlags() { + int sveLevel = WB.getUintVMFlag("UseSVE").intValue(); + long maxVectorSize = WB.getIntxVMFlag("MaxVectorSize"); + List features = Arrays.asList(WB.getCPUFeatures().split(", ")); + boolean hasSve = features.contains("sve"); + boolean hasSve2 = features.contains("sve2"); + + System.out.println(KEY_USE_SVE + sveLevel); + System.out.println(KEY_MAX_VECTOR + maxVectorSize); + System.out.println(KEY_HAS_SVE + hasSve); + System.out.println(KEY_HAS_SVE2 + hasSve2); + } + + private static void runDriver() throws Exception { + int sveLevel = WB.getUintVMFlag("UseSVE").intValue(); + long maxVectorSize = WB.getIntxVMFlag("MaxVectorSize"); + List features = Arrays.asList(WB.getCPUFeatures().split(", ")); + boolean hasSve = features.contains("sve"); + boolean hasSve2 = features.contains("sve2"); + + // If SVE is not present, just verify a consistent disabled state. + if (!hasSve) { + Asserts.assertEquals(sveLevel, 0, + "UseSVE must be 0 when hardware lacks SVE"); + Asserts.assertFalse(hasSve2, + "sve2 must be absent when sve is absent"); + return; + } + + Asserts.assertTrue(sveLevel > 0, + "UseSVE should be auto-set to > 0 when SVE hardware is present"); + Asserts.assertTrue(maxVectorSize >= 16, + "MaxVectorSize must be >= 16 for SVE, got " + maxVectorSize); + Asserts.assertTrue(Long.bitCount(maxVectorSize) == 1, + "MaxVectorSize must be a power of two, got " + maxVectorSize); + Asserts.assertTrue(maxVectorSize % 16 == 0, + "MaxVectorSize must be a multiple of 16, got " + maxVectorSize); + Asserts.assertTrue(maxVectorSize <= 256, + "MaxVectorSize must be <= 256 (2048 bits), got " + maxVectorSize); + + if (hasSve2) { + Asserts.assertEquals(sveLevel, 2, + "UseSVE should be 2 when hardware supports SVE2"); + } else { + Asserts.assertEquals(sveLevel, 1, + "UseSVE should be 1 when hardware supports SVE but not SVE2"); + } + + OutputAnalyzer out = spawnFlagCheck("-XX:UseSVE=0"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "0"); + out.shouldContain(KEY_HAS_SVE + "false"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=512"); + out.shouldHaveExitValue(0); + out.shouldContain("warning"); + + boolean isWindows = System.getProperty("os.name").toLowerCase().contains("windows"); + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=16"); + out.shouldHaveExitValue(0); + if (isWindows && maxVectorSize > 16) { + out.shouldContain("warning"); + out.shouldContain(KEY_MAX_VECTOR + maxVectorSize); + } else { + out.shouldContain(KEY_MAX_VECTOR + "16"); + } + + if (hasSve2) { + out = spawnFlagCheck("-XX:UseSVE=2"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "2"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "true"); + + out = spawnFlagCheck("-XX:UseSVE=1"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + } else { + out = spawnFlagCheck("-XX:UseSVE=2"); + out.shouldHaveExitValue(0); + out.shouldContain("SVE2 specified, but not supported on current CPU"); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldContain(KEY_HAS_SVE2 + "false"); + } + + out = spawnFlagCheck("-XX:UseSVE=1"); + out.shouldHaveExitValue(0); + out.shouldContain(KEY_USE_SVE + "1"); + out.shouldContain(KEY_HAS_SVE + "true"); + out.shouldMatch("MaxVectorSize=\\d+"); + + if (maxVectorSize >= 32) { + out = spawnFlagCheck("-XX:UseSVE=1", "-XX:MaxVectorSize=32"); + out.shouldHaveExitValue(0); + if (isWindows && maxVectorSize > 32) { + out.shouldContain("warning"); + out.shouldContain(KEY_MAX_VECTOR + maxVectorSize); + } else { + out.shouldContain(KEY_MAX_VECTOR + "32"); + } + } + } + + private static OutputAnalyzer spawnFlagCheck(String... extraFlags) + throws Exception { + List args = new ArrayList<>(); + args.add("-Xbootclasspath/a:."); + args.add("-XX:+UnlockDiagnosticVMOptions"); + args.add("-XX:+WhiteBoxAPI"); + for (String f : extraFlags) { + args.add(f); + } + args.add(TestSVEFeatureDetection.class.getName()); + args.add("flagCheck"); + + ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder( + args.toArray(new String[0])); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.reportDiagnosticSummary(); + return output; + } +} diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java index 445fef5e55a58..0b9c4fb02c903 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestFloat16ScalarOperations.java @@ -714,9 +714,13 @@ public void testSqrtConstantFolding() { @Test @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @IR(counts = {IRNode.FMA_HF, " 0 ", IRNode.REINTERPRET_S2HF, " 0 ", IRNode.REINTERPRET_HF2S, " 0 "}, - applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) + applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @Warmup(10000) public void testFMAConstantFolding() { // If any argument is NaN, the result is NaN. @@ -752,9 +756,13 @@ public void testFMAConstantFolding() { @Test @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zfh", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @IR(failOn = {IRNode.ADD_HF, IRNode.SUB_HF, IRNode.MUL_HF, IRNode.DIV_HF, IRNode.SQRT_HF, IRNode.FMA_HF}, - applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) + applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}, + // On Windows, both GCC and MSVC don't set __STDC_IEC_559__, so FMAs on constants are not folded. + applyIfPlatform = {"windows", "false"}) @Warmup(10000) public void testRounding1() { dst[0] = float16ToRawShortBits(add(RANDOM1, RANDOM2)); diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java index 5c085e6a3a343..fa4d2a1beec01 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestReductions.java @@ -1768,7 +1768,7 @@ private static long longAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, // vector accumulator - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"}) @@ -1776,7 +1776,7 @@ private static long longAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "= 0"}, // Reduction NOT moved out of loop - applyIfCPUFeatureOr = {"asimd", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) // Note: NEON does not support MulVL for auto vectorization. There is // a scalarized implementation, but that is not profitable for @@ -1840,10 +1840,10 @@ private static long longMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_REDUCTION_V, "> 0", IRNode.AND_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1861,10 +1861,10 @@ private static long longAndDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.OR_REDUCTION_V, "> 0", IRNode.OR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1882,10 +1882,10 @@ private static long longOrDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.XOR_REDUCTION_V, "> 0", IRNode.XOR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1903,10 +1903,10 @@ private static long longXorDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_REDUCTION_VL, "> 0", IRNode.ADD_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1924,13 +1924,13 @@ private static long longAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512dq", "false", "sse4.1", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370673 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // MulVL is not implemented on NEON, so we also not have the reduction. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1948,13 +1948,13 @@ private static long longMulDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MIN_REDUCTION_V, "> 0", IRNode.MIN_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1972,13 +1972,13 @@ private static long longMinDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MAX_REDUCTION_V, "> 0", IRNode.MAX_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -1997,10 +1997,10 @@ private static long longMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.AND_REDUCTION_V, "> 0", IRNode.AND_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While AndReductionV is implemented in NEON (see longAndSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2018,10 +2018,10 @@ private static long longAndBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.OR_REDUCTION_V, "> 0", IRNode.OR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While OrReductionV is implemented in NEON (see longOrSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2039,10 +2039,10 @@ private static long longOrBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.XOR_REDUCTION_V, "> 0", IRNode.XOR_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longXorSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2060,10 +2060,10 @@ private static long longXorBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_REDUCTION_VL, "> 0", IRNode.ADD_VL, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longAddSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2081,7 +2081,7 @@ private static long longAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MUL_REDUCTION_VL, "> 0", IRNode.MUL_VL, "> 0"}, - applyIfCPUFeature = {"avx512dq", "true"}, + applyIfCPUFeatureOr = {"avx512dq", "true", "sve", "true"}, applyIfAnd = {"AutoVectorizationOverrideProfitability", "> 0", "LoopUnrollLimit", ">= 1000"}) @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2092,7 +2092,7 @@ private static long longAddBig() { // If you can eliminate this exception for LoopUnrollLimit, please remove // the flag completely from the test, also the "addFlags" at the top. @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // MulVL is not implemented on NEON, so we also not have the reduction. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2110,13 +2110,13 @@ private static long longMulBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MIN_REDUCTION_V, "> 0", IRNode.MIN_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMinSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2134,13 +2134,13 @@ private static long longMinBig() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.MAX_REDUCTION_V, "> 0", IRNode.MAX_VL, "> 0"}, - applyIfCPUFeature = {"avx512", "true"}, + applyIfCPUFeatureOr = {"avx512", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_L, applyIfCPUFeatureAnd = {"avx512", "false", "avx2", "true"}) // I think this could vectorize, but currently does not. Filed: JDK-8370671 @IR(failOn = IRNode.LOAD_VECTOR_L, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // While MaxReductionV is implemented in NEON (see longMaxSimple), MulVL is not. // Filed: JDK-8370686 @IR(failOn = IRNode.LOAD_VECTOR_L, @@ -2159,10 +2159,10 @@ private static long longMaxBig() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2183,10 +2183,10 @@ private static float floatAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2242,10 +2242,10 @@ private static float floatMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2263,10 +2263,10 @@ private static float floatAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2319,10 +2319,10 @@ private static float floatMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2340,10 +2340,10 @@ private static float floatAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_REDUCTION_VF, "> 0", IRNode.MUL_VF, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_F, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_F, @@ -2396,10 +2396,10 @@ private static float floatMaxBig() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_VD, "> 0", IRNode.ADD_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2420,10 +2420,10 @@ private static double doubleAddSimple() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "= 2"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2479,10 +2479,10 @@ private static double doubleMaxSimple() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VD, "= 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2500,10 +2500,10 @@ private static double doubleAddDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2556,10 +2556,10 @@ private static double doubleMaxDotProduct() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.ADD_REDUCTION_V, "> 0", IRNode.ADD_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, @@ -2577,10 +2577,10 @@ private static double doubleAddBig() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_REDUCTION_VD, "> 0", IRNode.MUL_VD, "> 0"}, - applyIfCPUFeature = {"sse4.1", "true"}, + applyIfCPUFeatureOr = {"sse4.1", "true", "sve", "true"}, applyIf = {"AutoVectorizationOverrideProfitability", "> 0"}) @IR(failOn = IRNode.LOAD_VECTOR_D, - applyIfCPUFeatureAnd = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // I think this could vectorize, but currently does not. Filed: JDK-8370677 // But: it is not clear that it would be profitable, given the sequential reduction. @IR(failOn = IRNode.LOAD_VECTOR_D, diff --git a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java index f3c27c4d278a5..076c496d581f2 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestFloat16VectorOperations.java @@ -92,7 +92,7 @@ public TestFloat16VectorOperations() { @Test @Warmup(50) @IR(counts = {IRNode.ADD_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.ADD_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorAddFloat16() { @@ -113,7 +113,7 @@ public void checkResultAdd() { @Test @Warmup(50) @IR(counts = {IRNode.SUB_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.SUB_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorSubFloat16() { @@ -134,7 +134,7 @@ public void checkResultSub() { @Test @Warmup(50) @IR(counts = {IRNode.MUL_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MUL_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMulFloat16() { @@ -154,7 +154,7 @@ public void checkResultMul() { @Test @Warmup(50) @IR(counts = {IRNode.DIV_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.DIV_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorDivFloat16() { @@ -174,7 +174,7 @@ public void checkResultDiv() { @Test @Warmup(50) @IR(counts = {IRNode.MIN_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MIN_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMinFloat16() { @@ -194,7 +194,7 @@ public void checkResultMin() { @Test @Warmup(50) @IR(counts = {IRNode.MAX_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.MAX_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorMaxFloat16() { @@ -214,7 +214,7 @@ public void checkResultMax() { @Test @Warmup(50) @IR(counts = {IRNode.SQRT_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.SQRT_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorSqrtFloat16() { @@ -234,7 +234,7 @@ public void checkResultSqrt() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16() { @@ -256,7 +256,7 @@ public void checkResultFma() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16ScalarMixedConstants() { @@ -279,7 +279,7 @@ public void checkResultFmaScalarMixedConstants() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16MixedConstants() { @@ -302,7 +302,7 @@ public void checkResultFmaMixedConstants() { @Test @Warmup(50) @IR(counts = {IRNode.FMA_VHF, " 0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.FMA_VHF, " 0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorFmaFloat16AllConstants() { @@ -329,7 +329,7 @@ public void checkResultFmaAllConstants() { @Test @Warmup(50) @IR(counts = {IRNode.ADD_VHF, " >0 "}, - applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true", "sve", "true"}) + applyIfCPUFeatureOr = {"avx512_fp16", "true", "zvfh", "true"}) @IR(counts = {IRNode.ADD_VHF, " >0 "}, applyIfCPUFeatureAnd = {"fphp", "true", "asimdhp", "true"}) public void vectorAddConstInputFloat16() {