|
| 1 | +/* |
| 2 | +Copyright 2023 Adobe. All rights reserved. |
| 3 | +This file is licensed to you under the Apache License, Version 2.0 (the |
| 4 | +"License"); you may not use this file except in compliance with the License. You |
| 5 | +may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +
|
| 7 | +Unless required by applicable law or agreed to in writing, software distributed |
| 8 | +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR |
| 9 | +REPRESENTATIONS OF ANY KIND, either express or implied. See the License for the |
| 10 | +specific language governing permissions and limitations under the License. |
| 11 | +*/ |
| 12 | +#include "neuralAssetsHelper.h" |
| 13 | + |
| 14 | +#include <algorithm> |
| 15 | +#include <cstdlib> |
| 16 | +#include <limits> |
| 17 | +#include <zlib.h> |
| 18 | + |
| 19 | +namespace adobe::usd { |
| 20 | + |
| 21 | +namespace { |
| 22 | +union Fp32 { |
| 23 | + std::uint32_t u = 0; |
| 24 | + float f; |
| 25 | +}; |
| 26 | + |
| 27 | +constexpr Fp32 f32Infty = {255 << 23}; |
| 28 | +constexpr Fp32 f16Infty = {31 << 23}; |
| 29 | +constexpr Fp32 magic = {15 << 23}; |
| 30 | + |
| 31 | +constexpr Fp32 magic2 = {(254 - 15) << 23}; |
| 32 | +constexpr Fp32 wasInfNan = {(127 + 16) << 23}; |
| 33 | + |
| 34 | +inline std::uint16_t float32ToFloat16(const float fl) { |
| 35 | + constexpr unsigned int signMask = 0x80000000u; |
| 36 | + constexpr unsigned int roundMask = ~0xfffu; |
| 37 | + |
| 38 | + std::uint16_t o = 0; |
| 39 | + |
| 40 | + Fp32 f; |
| 41 | + f.f = fl; |
| 42 | + |
| 43 | + const unsigned int sign = f.u & signMask; |
| 44 | + f.u ^= sign; |
| 45 | + |
| 46 | + // NOTE all the integer compares in this function can be safely |
| 47 | + // compiled into signed compares since all operands are below |
| 48 | + // 0x80000000. Important if you want fast straight SSE2 code |
| 49 | + // (since there's no unsigned PCMPGTD). |
| 50 | + |
| 51 | + if (f.u >= f32Infty.u) // Inf or NaN (all exponent bits set) |
| 52 | + o = (f.u > f32Infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf |
| 53 | + else // (de)normalized number or zero |
| 54 | + { |
| 55 | + f.u &= roundMask; |
| 56 | + f.f *= magic.f; |
| 57 | + f.u -= roundMask; |
| 58 | + if (f.u > f16Infty.u) // clamp to signed infinity if overflowed |
| 59 | + f.u = f16Infty.u; |
| 60 | + |
| 61 | + o = static_cast<std::uint16_t>(f.u >> 13); // take the bits! |
| 62 | + } |
| 63 | + |
| 64 | + o |= sign >> 16; |
| 65 | + return o; |
| 66 | +} |
| 67 | + |
| 68 | +inline float float16ToFloat32(const std::uint16_t h) { |
| 69 | + Fp32 o; |
| 70 | + o.u = (h & 0x7fff) << 13; // exponent/mantissa bits |
| 71 | + o.f *= magic2.f; // exponent adjust |
| 72 | + if (o.f >= wasInfNan.f) // make sure Inf/NaN survive |
| 73 | + o.u |= 255 << 23; |
| 74 | + o.u |= (h & 0x8000) << 16; // sign bit |
| 75 | + |
| 76 | + return o.f; |
| 77 | +} |
| 78 | +} // namespace |
| 79 | + |
| 80 | +bool decompress(const std::uint8_t *inputData, std::size_t inLen, |
| 81 | + std::vector<std::uint8_t> &decompressedData) { |
| 82 | + if (!inLen) { |
| 83 | + return false; |
| 84 | + } |
| 85 | + decompressedData.clear(); |
| 86 | + |
| 87 | + z_stream strm = {}; |
| 88 | + strm.next_in = |
| 89 | + const_cast<Bytef *>(reinterpret_cast<const Bytef *>(inputData)); |
| 90 | + strm.avail_in = static_cast<uInt>(inLen); |
| 91 | + |
| 92 | + // Initialize the zlib decompression stream. |
| 93 | + if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) { |
| 94 | + return false; |
| 95 | + } |
| 96 | + |
| 97 | + int ret; |
| 98 | + const std::size_t bufferSize = 4096; // Temporary buffer size |
| 99 | + std::vector<std::uint8_t> buffer(bufferSize); |
| 100 | + |
| 101 | + // Decompress the data. |
| 102 | + do { |
| 103 | + strm.avail_out = bufferSize; |
| 104 | + strm.next_out = buffer.data(); |
| 105 | + |
| 106 | + ret = inflate(&strm, Z_NO_FLUSH); |
| 107 | + |
| 108 | + switch (ret) { |
| 109 | + case Z_NEED_DICT: |
| 110 | + case Z_DATA_ERROR: |
| 111 | + case Z_MEM_ERROR: |
| 112 | + case Z_STREAM_ERROR: |
| 113 | + inflateEnd(&strm); |
| 114 | + return false; |
| 115 | + } |
| 116 | + |
| 117 | + std::size_t have = bufferSize - strm.avail_out; |
| 118 | + decompressedData.insert(decompressedData.end(), buffer.begin(), |
| 119 | + buffer.begin() + have); |
| 120 | + } while (ret != Z_STREAM_END); |
| 121 | + |
| 122 | + // Clean up and return. |
| 123 | + inflateEnd(&strm); |
| 124 | + return true; |
| 125 | +} |
| 126 | + |
| 127 | +bool compress(const std::uint8_t *inputData, std::size_t inLen, |
| 128 | + std::vector<std::uint8_t> &outputData) { |
| 129 | + if (!inLen) { |
| 130 | + return false; |
| 131 | + } |
| 132 | + outputData.clear(); |
| 133 | + |
| 134 | + z_stream strm = {}; |
| 135 | + strm.zalloc = Z_NULL; |
| 136 | + strm.zfree = Z_NULL; |
| 137 | + strm.opaque = Z_NULL; |
| 138 | + strm.next_in = |
| 139 | + const_cast<Bytef *>(reinterpret_cast<const Bytef *>(inputData)); |
| 140 | + strm.avail_in = static_cast<uInt>(inLen); |
| 141 | + |
| 142 | + // Initialize zlib compression stream. |
| 143 | + if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 16 + MAX_WBITS, 8, |
| 144 | + Z_DEFAULT_STRATEGY) != Z_OK) { |
| 145 | + return false; |
| 146 | + } |
| 147 | + |
| 148 | + const std::size_t bufferSize = 4096; |
| 149 | + std::vector<std::uint8_t> buffer(bufferSize); |
| 150 | + |
| 151 | + int ret; |
| 152 | + do { |
| 153 | + strm.avail_out = bufferSize; |
| 154 | + strm.next_out = buffer.data(); |
| 155 | + |
| 156 | + ret = deflate(&strm, Z_FINISH); |
| 157 | + |
| 158 | + switch (ret) { |
| 159 | + case Z_NEED_DICT: |
| 160 | + case Z_DATA_ERROR: |
| 161 | + case Z_MEM_ERROR: |
| 162 | + case Z_STREAM_ERROR: |
| 163 | + deflateEnd(&strm); |
| 164 | + return false; |
| 165 | + } |
| 166 | + |
| 167 | + std::size_t have = bufferSize - strm.avail_out; |
| 168 | + outputData.insert(outputData.end(), buffer.begin(), buffer.begin() + have); |
| 169 | + } while (strm.avail_out == 0); |
| 170 | + |
| 171 | + // Clean up and return. |
| 172 | + deflateEnd(&strm); |
| 173 | + |
| 174 | + return true; |
| 175 | +} |
| 176 | + |
| 177 | +void float16ToFloat32(const std::uint16_t *inputData, float *outputData, |
| 178 | + std::size_t numElements) { |
| 179 | + for (std::size_t i = 0; i < numElements; ++i) |
| 180 | + outputData[i] = float16ToFloat32(inputData[i]); |
| 181 | +} |
| 182 | + |
| 183 | +void float32ToFloat16(const float *inputData, std::uint16_t *outputData, |
| 184 | + std::size_t numElements) { |
| 185 | + for (std::size_t i = 0; i < numElements; ++i) |
| 186 | + outputData[i] = float32ToFloat16(inputData[i]); |
| 187 | +} |
| 188 | + |
| 189 | +template <typename T> |
| 190 | +T maxOfFloatArray(const T *inputData, std::size_t numElements) { |
| 191 | + T fMax = -std::numeric_limits<T>::max(); |
| 192 | + for (std::size_t i = 0; i < numElements; ++i) |
| 193 | + fMax = std::max(fMax, inputData[i]); |
| 194 | + return fMax; |
| 195 | +} |
| 196 | + |
| 197 | +template <typename T> |
| 198 | +T infNormOfFloatArray(const T *inputData, std::size_t numElements) { |
| 199 | + T fMax = static_cast<T>(0.0); |
| 200 | + for (std::size_t i = 0; i < numElements; ++i) |
| 201 | + fMax = std::max(fMax, std::abs(inputData[i])); |
| 202 | + return fMax; |
| 203 | +} |
| 204 | + |
| 205 | +// Unpack the 4x4 matrix on NGP's weights |
| 206 | +void unpackMLPWeight(const float *in, float *out, const std::size_t d1, |
| 207 | + const std::size_t d2) { |
| 208 | + std::size_t numColMat = d1 / 4; |
| 209 | + std::size_t numRowMat = d2 / 4; |
| 210 | + for (std::size_t i = 0; i < numColMat; i++) { |
| 211 | + for (std::size_t j = 0; j < numRowMat; j++) { |
| 212 | + for (std::size_t k = 0; k < 4; k++) { |
| 213 | + for (std::size_t l = 0; l < 4; l++) { |
| 214 | + const std::size_t in_idx = (((i * numRowMat + j) * 4) + k) * 4 + l; |
| 215 | + const std::size_t out_idx = ((i * 4 + k) * numRowMat + j) * 4 + l; |
| 216 | + |
| 217 | + out[out_idx] = in[in_idx]; |
| 218 | + } |
| 219 | + } |
| 220 | + } |
| 221 | + } |
| 222 | +} |
| 223 | + |
| 224 | +// Pack the 4x4 matrix on NGP's weights |
| 225 | +void packMLPWeight(const float *in, float *out, const std::size_t d1, |
| 226 | + const std::size_t d2) { |
| 227 | + std::size_t numColMat = d1 / 4; |
| 228 | + std::size_t numRowMat = d2 / 4; |
| 229 | + for (std::size_t i = 0; i < numColMat; i++) { |
| 230 | + for (std::size_t k = 0; k < 4; k++) { |
| 231 | + for (std::size_t j = 0; j < numRowMat; j++) { |
| 232 | + for (std::size_t l = 0; l < 4; l++) { |
| 233 | + const std::size_t in_idx = ((i * 4 + k) * numRowMat + j) * 4 + l; |
| 234 | + const std::size_t out_idx = (((i * numRowMat + j) * 4) + k) * 4 + l; |
| 235 | + |
| 236 | + out[out_idx] = in[in_idx]; |
| 237 | + } |
| 238 | + } |
| 239 | + } |
| 240 | + } |
| 241 | +} |
| 242 | + |
| 243 | +const char *getNerfExtString() { return "ADOBE_nerf_asset"; } |
| 244 | + |
| 245 | +template USDFFUTILS_API float maxOfFloatArray<float>(const float *inputData, |
| 246 | + std::size_t numElements); |
| 247 | +template USDFFUTILS_API double maxOfFloatArray<double>(const double *inputData, |
| 248 | + std::size_t numElements); |
| 249 | +template USDFFUTILS_API float |
| 250 | +infNormOfFloatArray<float>(const float *inputData, std::size_t numElements); |
| 251 | +template USDFFUTILS_API double |
| 252 | +infNormOfFloatArray<double>(const double *inputData, std::size_t numElements); |
| 253 | +} // namespace adobe::usd |
0 commit comments