From 2cf4fb14f1c61aab5a85459a206808b4d20dd36d Mon Sep 17 00:00:00 2001 From: Mathieu Lamarre Date: Fri, 1 May 2020 00:05:10 -0400 Subject: [PATCH 1/2] Use a recent asmjit version Compiled against 9057aa3 --- ext/asmjit | 2 +- src/libcore/struct.cpp | 128 ++++++++++++++++++++-------------------- src/mitsuba/mitsuba.cpp | 2 + 3 files changed, 67 insertions(+), 65 deletions(-) diff --git a/ext/asmjit b/ext/asmjit index 7daa583de..9057aa30b 160000 --- a/ext/asmjit +++ b/ext/asmjit @@ -1 +1 @@ -Subproject commit 7daa583dec454604d32f0d1887cdef928f51807a +Subproject commit 9057aa30b620f0662ff51e2230c126a345063064 diff --git a/src/libcore/struct.cpp b/src/libcore/struct.cpp index bfb640353..589d46e8e 100644 --- a/src/libcore/struct.cpp +++ b/src/libcore/struct.cpp @@ -49,11 +49,11 @@ class StructCompiler { // .. and it is either stored in a general purpose or a vector register struct Value { - X86Gp gp; - X86Xmm xmm; + x86::Gp gp; + x86::Xmm xmm; }; - StructCompiler(X86Compiler &cc, X86Gp x, X86Gp y, bool dither, Label &err_label) + StructCompiler(x86::Compiler &cc, x86::Gp x, x86::Gp y, bool dither, Label &err_label) : cc(cc), xp(x), yp(y), dither(dither), err_label(err_label) { } @@ -63,11 +63,11 @@ class StructCompiler { /* --------------------------------------------------------- */ template - X86Mem const_(T value) { + x86::Mem const_(T value) { #if !defined(DOUBLE_PRECISION) - return cc.newFloatConst(asmjit::kConstScopeGlobal, (float) value); + return cc.newFloatConst(asmjit::ConstPool::kScopeGlobal, (float) value); #else - return cc.newDoubleConst(asmjit::kConstScopeGlobal, (double) value); + return cc.newDoubleConst(asmjit::ConstPool::kScopeGlobal, (double) value); #endif } @@ -106,7 +106,7 @@ class StructCompiler { #endif } - void movs(const X86Xmm &x, const X86Xmm &y) { + void movs(const x86::Xmm &x, const x86::Xmm &y) { #if defined(ENOKI_X86_AVX) #if !defined(DOUBLE_PRECISION) cc.vmovss(x, x, y); @@ -404,13 +404,13 @@ class StructCompiler { } /// Forward/inverse gamma correction using the sRGB profile - X86Xmm gamma(X86Xmm x, bool to_srgb) { + x86::Xmm gamma(x86::Xmm x, bool to_srgb) { #if MTS_JIT_LOG_ASSEMBLY == 1 cc.comment(to_srgb ? "# Linear -> sRGB conversion" : "# sRGB -> linear conversion"); #endif - X86Xmm a = cc.newXmm(), + x86::Xmm a = cc.newXmm(), b = cc.newXmm(); movs(a, const_(to_srgb ? 12.92 : (1.0 / 12.92))); @@ -419,7 +419,7 @@ class StructCompiler { Label low_value = cc.newLabel(); cc.jb(low_value); - X86Xmm y; + x86::Xmm y; if (to_srgb) { y = cc.newXmm(); sqrts(y, x); @@ -475,8 +475,8 @@ class StructCompiler { for (size_t i = 0; i < ncoeffs; ++i) { for (int j = 0; j < 2; ++j) { - X86Xmm &v = (j == 0) ? a : b; - X86Mem coeff = const_(to_srgb ? to_srgb_coeffs[j][i] + x86::Xmm &v = (j == 0) ? a : b; + x86::Mem coeff = const_(to_srgb ? to_srgb_coeffs[j][i] : from_srgb_coeffs[j][i]); if (i == 0) movs(v, coeff); @@ -493,7 +493,7 @@ class StructCompiler { } /// Load a variable from the given structure (or return from the cache if it was already loaded) - std::pair load(const Struct* struct_, const X86Gp &input, const std::string &name) { + std::pair load(const Struct* struct_, const x86::Gp &input, const std::string &name) { Struct::Field field = struct_->field(name); Key key { field.name, field.type, field.flags }; @@ -510,11 +510,11 @@ class StructCompiler { uint32_t op; if (field.is_signed()) - op = field.size < 4 ? X86Inst::kIdMovsx : X86Inst::kIdMovsxd; + op = field.size < 4 ? x86::Inst::kIdMovsx : x86::Inst::kIdMovsxd; else - op = field.size < 4 ? X86Inst::kIdMovzx : X86Inst::kIdMov; + op = field.size < 4 ? x86::Inst::kIdMovzx : x86::Inst::kIdMov; if (field.size == 8) - op = X86Inst::kIdMov; + op = x86::Inst::kIdMov; // Will we need to swap the byte order of the source records? bool bswap = struct_->byte_order() == Struct::ByteOrder::BigEndian; @@ -573,7 +573,7 @@ class StructCompiler { case Struct::Type::Float32: if (bswap) { - X86Gp temp = cc.newUInt32(); + x86::Gp temp = cc.newUInt32(); cc.mov(temp.r32(), x86::dword_ptr(input, offset)); cc.bswap(temp.r32()); #if defined(ENOKI_X86_AVX) @@ -592,7 +592,7 @@ class StructCompiler { case Struct::Type::Float64: if (bswap) { - X86Gp temp = cc.newUInt64(); + x86::Gp temp = cc.newUInt64(); cc.mov(temp.r64(), x86::qword_ptr(input, offset)); cc.bswap(temp.r64()); #if defined(ENOKI_X86_AVX) @@ -615,34 +615,34 @@ class StructCompiler { if (has_flag(field.flags, Struct::Flags::Assert)) { if (field.type == Struct::Type::Float16) { auto ref = cc.newUInt16Const( - asmjit::kConstScopeGlobal, + asmjit::ConstPool::kScopeGlobal, enoki::half::float32_to_float16((float) field.default_)); cc.cmp(value.gp.r16(), ref); } else if (field.type == Struct::Type::Float32) { - auto ref = cc.newFloatConst(asmjit::kConstScopeGlobal, (float) field.default_); + auto ref = cc.newFloatConst(asmjit::ConstPool::kScopeGlobal, (float) field.default_); #if defined(ENOKI_X86_AVX) cc.vucomiss(value.xmm, ref); #else cc.ucomiss(value.xmm, ref); #endif } else if (field.type == Struct::Type::Float64) { - auto ref = cc.newDoubleConst(asmjit::kConstScopeGlobal, (double) field.default_); + auto ref = cc.newDoubleConst(asmjit::ConstPool::kScopeGlobal, (double) field.default_); #if defined(ENOKI_X86_AVX) cc.vucomisd(value.xmm, ref); #else cc.ucomisd(value.xmm, ref); #endif } else if (field.type == Struct::Type::Int8 || field.type == Struct::Type::UInt8) { - auto ref = cc.newByteConst(asmjit::kConstScopeGlobal, (int8_t) field.default_); + auto ref = cc.newByteConst(asmjit::ConstPool::kScopeGlobal, (int8_t) field.default_); cc.cmp(value.gp.r8(), ref); } else if (field.type == Struct::Type::Int16 || field.type == Struct::Type::UInt16) { - auto ref = cc.newInt16Const(asmjit::kConstScopeGlobal, (int16_t) field.default_); + auto ref = cc.newInt16Const(asmjit::ConstPool::kScopeGlobal, (int16_t) field.default_); cc.cmp(value.gp.r16(), ref); } else if (field.type == Struct::Type::Int32 || field.type == Struct::Type::UInt32) { - auto ref = cc.newInt32Const(asmjit::kConstScopeGlobal, (int32_t) field.default_); + auto ref = cc.newInt32Const(asmjit::ConstPool::kScopeGlobal, (int32_t) field.default_); cc.cmp(value.gp.r32(), ref); } else if (field.type == Struct::Type::Int64 || field.type == Struct::Type::UInt64) { - auto ref = cc.newInt64Const(asmjit::kConstScopeGlobal, (int64_t) field.default_); + auto ref = cc.newInt64Const(asmjit::ConstPool::kScopeGlobal, (int64_t) field.default_); cc.cmp(value.gp.r64(), ref); } else { Throw("Internal error!"); @@ -702,7 +702,7 @@ class StructCompiler { } else if (input.first.type == Struct::Type::UInt64) { auto tmp = cc.newUInt64(); cc.mov(tmp, vr.gp.r64()); - auto tmp2 = cc.newUInt64Const(asmjit::kConstScopeGlobal, 0x7fffffffffffffffull); + auto tmp2 = cc.newUInt64Const(asmjit::ConstPool::kScopeGlobal, 0x7fffffffffffffffull); cc.and_(tmp, tmp2); cvtsi2s(vr.xmm, tmp.r64()); cc.test(vr.gp.r64(), vr.gp.r64()); @@ -736,7 +736,7 @@ class StructCompiler { } if (kr.type == Struct::Type::Float32 && kr.type != struct_type_v) { - X86Xmm source = vr.xmm; + x86::Xmm source = vr.xmm; vr.xmm = cc.newXmm(); #if defined(ENOKI_X86_AVX) cc.vcvtss2sd(vr.xmm, vr.xmm, source); @@ -747,7 +747,7 @@ class StructCompiler { } if (kr.type == Struct::Type::Float64 && kr.type != struct_type_v) { - X86Xmm source = vr.xmm; + x86::Xmm source = vr.xmm; vr.xmm = cc.newXmm(); #if defined(ENOKI_X86_AVX) cc.vcvtsd2ss(vr.xmm, vr.xmm, source); @@ -767,7 +767,7 @@ class StructCompiler { } /// Write a variable to memory - void save(const Struct *struct_, const X86Gp &output, + void save(const Struct *struct_, const x86::Gp &output, Struct::Field field, const std::pair &kv) { Key key = kv.first; Value value = kv.second; @@ -800,16 +800,16 @@ class StructCompiler { if (dither) { if (!dither_ready) { - X86Gp index = cc.newUInt64(); + x86::Gp index = cc.newUInt64(); cc.movzx(index.r64(), xp.r8Lo()); cc.mov(index.r8Hi(), yp.r8Lo()); - X86Gp base = cc.newUInt64(); + x86::Gp base = cc.newUInt64(); cc.mov(base.r64(), Imm((uintptr_t) dither_matrix256)); dither_value = cc.newXmm(); #if defined(ENOKI_X86_AVX) - cc.movss(dither_value, X86Mem(base, index, 2, 0, (uint32_t) sizeof(float))); + cc.movss(dither_value, x86::Mem(base, index, 2, 0, (uint32_t) sizeof(float))); #else - cc.vmovss(dither_value, X86Mem(base, index, 2, 0, (uint32_t) sizeof(float))); + cc.vmovss(dither_value, x86::Mem(base, index, 2, 0, (uint32_t) sizeof(float))); #endif #if defined(DOUBLE_PRECISION) #if defined(ENOKI_X86_AVX) @@ -834,16 +834,16 @@ class StructCompiler { } else if (field.type == Struct::Type::UInt64) { cvts2si(value.gp.r64(), value.xmm); - X86Xmm large_thresh = cc.newXmm(); + x86::Xmm large_thresh = cc.newXmm(); movs(large_thresh, const_(9.223372036854776e18 /* 2^63 - 1 */)); - X86Xmm tmp = cc.newXmm(); + x86::Xmm tmp = cc.newXmm(); subs(tmp, value.xmm, large_thresh); - X86Gp tmp2 = cc.newInt64(); + x86::Gp tmp2 = cc.newInt64(); cvts2si(tmp2, tmp); - X86Gp large_result = cc.newInt64(); + x86::Gp large_result = cc.newInt64(); cc.mov(large_result, Imm(0x7fffffffffffffffull)); cc.add(large_result, tmp2); @@ -866,7 +866,7 @@ class StructCompiler { case Struct::Type::Int16: case Struct::Type::UInt16: if (bswap) { - X86Gp temp = cc.newUInt16(); + x86::Gp temp = cc.newUInt16(); cc.mov(temp, value.gp.r16()); cc.xchg(temp.r8Lo(), temp.r8Hi()); value.gp = temp; @@ -877,7 +877,7 @@ class StructCompiler { case Struct::Type::Int32: case Struct::Type::UInt32: if (bswap) { - X86Gp temp = cc.newUInt32(); + x86::Gp temp = cc.newUInt32(); cc.mov(temp, value.gp.r32()); cc.bswap(temp); value.gp = temp; @@ -888,7 +888,7 @@ class StructCompiler { case Struct::Type::Int64: case Struct::Type::UInt64: if (bswap) { - X86Gp temp = cc.newUInt64(); + x86::Gp temp = cc.newUInt64(); cc.mov(temp, value.gp.r64()); cc.bswap(temp); value.gp = temp; @@ -898,7 +898,7 @@ class StructCompiler { case Struct::Type::Float16: if (key.type == Struct::Type::Float64) { - X86Xmm temp = cc.newXmm(); + x86::Xmm temp = cc.newXmm(); #if defined(ENOKI_X86_AVX) cc.vcvtsd2ss(temp, temp, value.xmm); #else @@ -911,12 +911,12 @@ class StructCompiler { value.gp = cc.newUInt32(); #if defined(__F16C__) - X86Xmm temp = cc.newXmm(); + x86::Xmm temp = cc.newXmm(); cc.vcvtps2ph(temp, value.xmm, 0); cc.vmovd(value.gp.r32(), temp); #else - auto call = cc.call(imm_ptr((void *) enoki::half::float32_to_float16), - FuncSignature1(asmjit::CallConv::kIdHost)); + auto call = cc.call(imm((void *) enoki::half::float32_to_float16), + FuncSignatureT(asmjit::CallConv::kIdHost)); call->setArg(0, value.xmm); call->setRet(0, value.gp); #endif @@ -925,7 +925,7 @@ class StructCompiler { } if (bswap) { - X86Gp temp = cc.newUInt16(); + x86::Gp temp = cc.newUInt16(); cc.mov(temp, value.gp.r16()); cc.xchg(temp.r8Lo(), temp.r8Hi()); value.gp = temp; @@ -937,7 +937,7 @@ class StructCompiler { case Struct::Type::Float32: if (key.type == Struct::Type::Float64) { - X86Xmm temp = cc.newXmm(); + x86::Xmm temp = cc.newXmm(); #if defined(ENOKI_X86_AVX) cc.vcvtsd2ss(temp, temp, value.xmm); #else @@ -946,7 +946,7 @@ class StructCompiler { value.xmm = temp; } if (bswap) { - X86Gp temp = cc.newUInt32(); + x86::Gp temp = cc.newUInt32(); #if defined(ENOKI_X86_AVX) cc.vmovd(temp, value.xmm); #else @@ -965,7 +965,7 @@ class StructCompiler { case Struct::Type::Float64: if (key.type == Struct::Type::Float32) { - X86Xmm temp = cc.newXmm(); + x86::Xmm temp = cc.newXmm(); #if defined(ENOKI_X86_AVX) cc.vcvtss2sd(temp, temp, value.xmm); #else @@ -974,7 +974,7 @@ class StructCompiler { value.xmm = temp; } if (bswap) { - X86Gp temp = cc.newUInt64(); + x86::Gp temp = cc.newUInt64(); #if defined(ENOKI_X86_AVX) cc.vmovq(temp, value.xmm); #else @@ -996,11 +996,11 @@ class StructCompiler { } private: // Cache of all currently loaded/converted variables - X86Compiler &cc; - X86Gp xp, yp; + x86::Compiler &cc; + x86::Gp xp, yp; bool dither; Label err_label; - X86Xmm dither_value; + x86::Xmm dither_value; bool dither_ready = false; std::map cache; }; @@ -1238,7 +1238,7 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo } CodeHolder code; - code.init(jit->runtime.getCodeInfo()); + code.init(jit->runtime.codeInfo()); #if MTS_JIT_LOG_ASSEMBLY == 1 Log(Info, "Converting from %s to %s", source->to_string(), target->to_string()); StringLogger logger; @@ -1246,9 +1246,9 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo code.setLogger(&logger); #endif - X86Compiler cc(&code); + x86::Compiler cc(&code); - cc.addFunc(FuncSignature4(asmjit::CallConv::kIdHost)); + cc.addFunc(FuncSignatureT(asmjit::CallConv::kIdHost)); auto width = cc.newInt64("width"); auto height = cc.newInt64("height"); auto input = cc.newIntPtr("input"); @@ -1312,10 +1312,10 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo Throw("Internal error: source and target weights have mismatched names!"); } - X86Xmm scale_factor; + x86::Xmm scale_factor; if (source_weight != nullptr && target_weight == nullptr) { scale_factor = cc.newXmm(); - X86Xmm value = sc.linearize(sc.load(source, input, source_weight->name)).second.xmm; + x86::Xmm value = sc.linearize(sc.load(source, input, source_weight->name)).second.xmm; sc.movs(scale_factor, sc.const_(1.0)); sc.divs(scale_factor, value); } @@ -1345,20 +1345,20 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo Throw("Internal error: source and target alpha have mismatched names!"); } - X86Xmm alpha, inv_alpha; + x86::Xmm alpha, inv_alpha; if (source_alpha != nullptr && target_alpha != nullptr) { alpha = cc.newXmm(); inv_alpha = cc.newXmm(); - X86Xmm value = sc.linearize(sc.load(source, input, source_alpha->name)).second.xmm; + x86::Xmm value = sc.linearize(sc.load(source, input, source_alpha->name)).second.xmm; sc.movs(alpha, value); sc.movs(inv_alpha, sc.const_(1.0)); sc.divs(inv_alpha, value); // Check if alpha is zero and set inv_alpha to zero if that is the case - X86Xmm zero = cc.newXmm(); + x86::Xmm zero = cc.newXmm(); sc.movs(zero, sc.const_(0.0)); - X86Xmm mask = cc.newXmm(); + x86::Xmm mask = cc.newXmm(); sc.movs(mask, value); sc.cmps(mask, zero, 2); sc.blend(inv_alpha, zero, mask); @@ -1376,7 +1376,7 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo Throw("Unable to find field \"%s\"!", f.name); } } else { - X86Xmm accum = cc.newXmm(); + x86::Xmm accum = cc.newXmm(); for (size_t i = 0; i) kv = sc.linearize(kv); sc.muls(result, kv.second.xmm, scale_factor); @@ -1411,7 +1411,7 @@ StructConverter::StructConverter(const Struct *source, const Struct *target, boo source_premult != target_premult) { if (has_multiple_alpha_channels) Throw("Found multiple alpha channels: Alpha (un)premultiplication expects a single alpha channel"); - X86Xmm result = cc.newXmm(); + x86::Xmm result = cc.newXmm(); if (kv.first.type != struct_type_v) kv = sc.linearize(kv); if (target_premult && !source_premult) { diff --git a/src/mitsuba/mitsuba.cpp b/src/mitsuba/mitsuba.cpp index a7184ee2f..368a81fd8 100644 --- a/src/mitsuba/mitsuba.cpp +++ b/src/mitsuba/mitsuba.cpp @@ -17,6 +17,8 @@ #if !defined(__WINDOWS__) # include +#else +# include #endif using namespace mitsuba; From 01d1bbfcad6226c13f0e973a8e2494d3ca77d2e3 Mon Sep 17 00:00:00 2001 From: Mathieu Lamarre Date: Fri, 1 May 2020 00:16:41 -0400 Subject: [PATCH 2/2] Missed one --- src/libcore/struct.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libcore/struct.cpp b/src/libcore/struct.cpp index 589d46e8e..2be78058f 100644 --- a/src/libcore/struct.cpp +++ b/src/libcore/struct.cpp @@ -727,8 +727,8 @@ class StructCompiler { cc.vmovd(vr.xmm, vr.gp.r32()); cc.vcvtph2ps(vr.xmm, vr.xmm); #else - auto call = cc.call(imm_ptr((void *) enoki::half::float16_to_float32), - FuncSignature1(CallConv::kIdHostCDecl)); + auto call = cc.call(imm((void *) enoki::half::float16_to_float32), + FuncSignatureT(CallConv::kIdHostCDecl)); call->setArg(0, vr.gp); call->setRet(0, vr.xmm); #endif