| 
 | 1 | +/*  | 
 | 2 | +Copyright 2023 Adobe. All rights reserved.  | 
 | 3 | +This file is licensed to you under the Apache License, Version 2.0 (the  | 
 | 4 | +"License"); you may not use this file except in compliance with the License. You  | 
 | 5 | +may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0  | 
 | 6 | +
  | 
 | 7 | +Unless required by applicable law or agreed to in writing, software distributed  | 
 | 8 | +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR  | 
 | 9 | +REPRESENTATIONS OF ANY KIND, either express or implied. See the License for the  | 
 | 10 | +specific language governing permissions and limitations under the License.  | 
 | 11 | +*/  | 
 | 12 | +#include "neuralAssetsHelper.h"  | 
 | 13 | + | 
 | 14 | +#include <algorithm>  | 
 | 15 | +#include <cstdlib>  | 
 | 16 | +#include <limits>  | 
 | 17 | +#include <zlib.h>  | 
 | 18 | + | 
 | 19 | +namespace adobe::usd {  | 
 | 20 | + | 
 | 21 | +namespace {  | 
 | 22 | +union Fp32 {  | 
 | 23 | +  std::uint32_t u = 0;  | 
 | 24 | +  float f;  | 
 | 25 | +};  | 
 | 26 | + | 
 | 27 | +constexpr Fp32 f32Infty = {255 << 23};  | 
 | 28 | +constexpr Fp32 f16Infty = {31 << 23};  | 
 | 29 | +constexpr Fp32 magic = {15 << 23};  | 
 | 30 | + | 
 | 31 | +constexpr Fp32 magic2 = {(254 - 15) << 23};  | 
 | 32 | +constexpr Fp32 wasInfNan = {(127 + 16) << 23};  | 
 | 33 | + | 
 | 34 | +inline std::uint16_t float32ToFloat16(const float fl) {  | 
 | 35 | +  constexpr unsigned int signMask = 0x80000000u;  | 
 | 36 | +  constexpr unsigned int roundMask = ~0xfffu;  | 
 | 37 | + | 
 | 38 | +  std::uint16_t o = 0;  | 
 | 39 | + | 
 | 40 | +  Fp32 f;  | 
 | 41 | +  f.f = fl;  | 
 | 42 | + | 
 | 43 | +  const unsigned int sign = f.u & signMask;  | 
 | 44 | +  f.u ^= sign;  | 
 | 45 | + | 
 | 46 | +  // NOTE all the integer compares in this function can be safely  | 
 | 47 | +  // compiled into signed compares since all operands are below  | 
 | 48 | +  // 0x80000000. Important if you want fast straight SSE2 code  | 
 | 49 | +  // (since there's no unsigned PCMPGTD).  | 
 | 50 | + | 
 | 51 | +  if (f.u >= f32Infty.u) // Inf or NaN (all exponent bits set)  | 
 | 52 | +    o = (f.u > f32Infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf  | 
 | 53 | +  else                                        // (de)normalized number or zero  | 
 | 54 | +  {  | 
 | 55 | +    f.u &= roundMask;  | 
 | 56 | +    f.f *= magic.f;  | 
 | 57 | +    f.u -= roundMask;  | 
 | 58 | +    if (f.u > f16Infty.u) // clamp to signed infinity if overflowed  | 
 | 59 | +      f.u = f16Infty.u;  | 
 | 60 | + | 
 | 61 | +    o = static_cast<std::uint16_t>(f.u >> 13); // take the bits!  | 
 | 62 | +  }  | 
 | 63 | + | 
 | 64 | +  o |= sign >> 16;  | 
 | 65 | +  return o;  | 
 | 66 | +}  | 
 | 67 | + | 
 | 68 | +inline float float16ToFloat32(const std::uint16_t h) {  | 
 | 69 | +  Fp32 o;  | 
 | 70 | +  o.u = (h & 0x7fff) << 13; // exponent/mantissa bits  | 
 | 71 | +  o.f *= magic2.f;          // exponent adjust  | 
 | 72 | +  if (o.f >= wasInfNan.f)   // make sure Inf/NaN survive  | 
 | 73 | +    o.u |= 255 << 23;  | 
 | 74 | +  o.u |= (h & 0x8000) << 16; // sign bit  | 
 | 75 | + | 
 | 76 | +  return o.f;  | 
 | 77 | +}  | 
 | 78 | +} // namespace  | 
 | 79 | + | 
 | 80 | +bool decompress(const std::uint8_t *inputData, std::size_t inLen,  | 
 | 81 | +                std::vector<std::uint8_t> &decompressedData) {  | 
 | 82 | +  if (!inLen) {  | 
 | 83 | +    return false;  | 
 | 84 | +  }  | 
 | 85 | +  decompressedData.clear();  | 
 | 86 | + | 
 | 87 | +  z_stream strm = {};  | 
 | 88 | +  strm.next_in =  | 
 | 89 | +      const_cast<Bytef *>(reinterpret_cast<const Bytef *>(inputData));  | 
 | 90 | +  strm.avail_in = static_cast<uInt>(inLen);  | 
 | 91 | + | 
 | 92 | +  // Initialize the zlib decompression stream.  | 
 | 93 | +  if (inflateInit2(&strm, 16 + MAX_WBITS) != Z_OK) {  | 
 | 94 | +    return false;  | 
 | 95 | +  }  | 
 | 96 | + | 
 | 97 | +  int ret;  | 
 | 98 | +  const std::size_t bufferSize = 4096; // Temporary buffer size  | 
 | 99 | +  std::vector<std::uint8_t> buffer(bufferSize);  | 
 | 100 | + | 
 | 101 | +  // Decompress the data.  | 
 | 102 | +  do {  | 
 | 103 | +    strm.avail_out = bufferSize;  | 
 | 104 | +    strm.next_out = buffer.data();  | 
 | 105 | + | 
 | 106 | +    ret = inflate(&strm, Z_NO_FLUSH);  | 
 | 107 | + | 
 | 108 | +    switch (ret) {  | 
 | 109 | +    case Z_NEED_DICT:  | 
 | 110 | +    case Z_DATA_ERROR:  | 
 | 111 | +    case Z_MEM_ERROR:  | 
 | 112 | +    case Z_STREAM_ERROR:  | 
 | 113 | +      inflateEnd(&strm);  | 
 | 114 | +      return false;  | 
 | 115 | +    }  | 
 | 116 | + | 
 | 117 | +    std::size_t have = bufferSize - strm.avail_out;  | 
 | 118 | +    decompressedData.insert(decompressedData.end(), buffer.begin(),  | 
 | 119 | +                            buffer.begin() + have);  | 
 | 120 | +  } while (ret != Z_STREAM_END);  | 
 | 121 | + | 
 | 122 | +  // Clean up and return.  | 
 | 123 | +  inflateEnd(&strm);  | 
 | 124 | +  return true;  | 
 | 125 | +}  | 
 | 126 | + | 
 | 127 | +bool compress(const std::uint8_t *inputData, std::size_t inLen,  | 
 | 128 | +              std::vector<std::uint8_t> &outputData) {  | 
 | 129 | +  if (!inLen) {  | 
 | 130 | +    return false;  | 
 | 131 | +  }  | 
 | 132 | +  outputData.clear();  | 
 | 133 | + | 
 | 134 | +  z_stream strm = {};  | 
 | 135 | +  strm.zalloc = Z_NULL;  | 
 | 136 | +  strm.zfree = Z_NULL;  | 
 | 137 | +  strm.opaque = Z_NULL;  | 
 | 138 | +  strm.next_in =  | 
 | 139 | +      const_cast<Bytef *>(reinterpret_cast<const Bytef *>(inputData));  | 
 | 140 | +  strm.avail_in = static_cast<uInt>(inLen);  | 
 | 141 | + | 
 | 142 | +  // Initialize zlib compression stream.  | 
 | 143 | +  if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 16 + MAX_WBITS, 8,  | 
 | 144 | +                   Z_DEFAULT_STRATEGY) != Z_OK) {  | 
 | 145 | +    return false;  | 
 | 146 | +  }  | 
 | 147 | + | 
 | 148 | +  const std::size_t bufferSize = 4096;  | 
 | 149 | +  std::vector<std::uint8_t> buffer(bufferSize);  | 
 | 150 | + | 
 | 151 | +  int ret;  | 
 | 152 | +  do {  | 
 | 153 | +    strm.avail_out = bufferSize;  | 
 | 154 | +    strm.next_out = buffer.data();  | 
 | 155 | + | 
 | 156 | +    ret = deflate(&strm, Z_FINISH);  | 
 | 157 | + | 
 | 158 | +    switch (ret) {  | 
 | 159 | +    case Z_NEED_DICT:  | 
 | 160 | +    case Z_DATA_ERROR:  | 
 | 161 | +    case Z_MEM_ERROR:  | 
 | 162 | +    case Z_STREAM_ERROR:  | 
 | 163 | +      deflateEnd(&strm);  | 
 | 164 | +      return false;  | 
 | 165 | +    }  | 
 | 166 | + | 
 | 167 | +    std::size_t have = bufferSize - strm.avail_out;  | 
 | 168 | +    outputData.insert(outputData.end(), buffer.begin(), buffer.begin() + have);  | 
 | 169 | +  } while (strm.avail_out == 0);  | 
 | 170 | + | 
 | 171 | +  // Clean up and return.  | 
 | 172 | +  deflateEnd(&strm);  | 
 | 173 | + | 
 | 174 | +  return true;  | 
 | 175 | +}  | 
 | 176 | + | 
 | 177 | +void float16ToFloat32(const std::uint16_t *inputData, float *outputData,  | 
 | 178 | +                      std::size_t numElements) {  | 
 | 179 | +  for (std::size_t i = 0; i < numElements; ++i)  | 
 | 180 | +    outputData[i] = float16ToFloat32(inputData[i]);  | 
 | 181 | +}  | 
 | 182 | + | 
 | 183 | +void float32ToFloat16(const float *inputData, std::uint16_t *outputData,  | 
 | 184 | +                      std::size_t numElements) {  | 
 | 185 | +  for (std::size_t i = 0; i < numElements; ++i)  | 
 | 186 | +    outputData[i] = float32ToFloat16(inputData[i]);  | 
 | 187 | +}  | 
 | 188 | + | 
 | 189 | +template <typename T>  | 
 | 190 | +T maxOfFloatArray(const T *inputData, std::size_t numElements) {  | 
 | 191 | +  T fMax = -std::numeric_limits<T>::max();  | 
 | 192 | +  for (std::size_t i = 0; i < numElements; ++i)  | 
 | 193 | +    fMax = std::max(fMax, inputData[i]);  | 
 | 194 | +  return fMax;  | 
 | 195 | +}  | 
 | 196 | + | 
 | 197 | +template <typename T>  | 
 | 198 | +T infNormOfFloatArray(const T *inputData, std::size_t numElements) {  | 
 | 199 | +  T fMax = static_cast<T>(0.0);  | 
 | 200 | +  for (std::size_t i = 0; i < numElements; ++i)  | 
 | 201 | +    fMax = std::max(fMax, std::abs(inputData[i]));  | 
 | 202 | +  return fMax;  | 
 | 203 | +}  | 
 | 204 | + | 
 | 205 | +// Unpack the 4x4 matrix on NGP's weights  | 
 | 206 | +void unpackMLPWeight(const float *in, float *out, const std::size_t d1,  | 
 | 207 | +                     const std::size_t d2) {  | 
 | 208 | +  std::size_t numColMat = d1 / 4;  | 
 | 209 | +  std::size_t numRowMat = d2 / 4;  | 
 | 210 | +  for (std::size_t i = 0; i < numColMat; i++) {  | 
 | 211 | +    for (std::size_t j = 0; j < numRowMat; j++) {  | 
 | 212 | +      for (std::size_t k = 0; k < 4; k++) {  | 
 | 213 | +        for (std::size_t l = 0; l < 4; l++) {  | 
 | 214 | +          const std::size_t in_idx = (((i * numRowMat + j) * 4) + k) * 4 + l;  | 
 | 215 | +          const std::size_t out_idx = ((i * 4 + k) * numRowMat + j) * 4 + l;  | 
 | 216 | + | 
 | 217 | +          out[out_idx] = in[in_idx];  | 
 | 218 | +        }  | 
 | 219 | +      }  | 
 | 220 | +    }  | 
 | 221 | +  }  | 
 | 222 | +}  | 
 | 223 | + | 
 | 224 | +// Pack the 4x4 matrix on NGP's weights  | 
 | 225 | +void packMLPWeight(const float *in, float *out, const std::size_t d1,  | 
 | 226 | +                   const std::size_t d2) {  | 
 | 227 | +  std::size_t numColMat = d1 / 4;  | 
 | 228 | +  std::size_t numRowMat = d2 / 4;  | 
 | 229 | +  for (std::size_t i = 0; i < numColMat; i++) {  | 
 | 230 | +    for (std::size_t k = 0; k < 4; k++) {  | 
 | 231 | +      for (std::size_t j = 0; j < numRowMat; j++) {  | 
 | 232 | +        for (std::size_t l = 0; l < 4; l++) {  | 
 | 233 | +          const std::size_t in_idx = ((i * 4 + k) * numRowMat + j) * 4 + l;  | 
 | 234 | +          const std::size_t out_idx = (((i * numRowMat + j) * 4) + k) * 4 + l;  | 
 | 235 | + | 
 | 236 | +          out[out_idx] = in[in_idx];  | 
 | 237 | +        }  | 
 | 238 | +      }  | 
 | 239 | +    }  | 
 | 240 | +  }  | 
 | 241 | +}  | 
 | 242 | + | 
 | 243 | +const char *getNerfExtString() { return "ADOBE_nerf_asset"; }  | 
 | 244 | + | 
 | 245 | +template USDFFUTILS_API float maxOfFloatArray<float>(const float *inputData,  | 
 | 246 | +                                                     std::size_t numElements);  | 
 | 247 | +template USDFFUTILS_API double maxOfFloatArray<double>(const double *inputData,  | 
 | 248 | +                                                       std::size_t numElements);  | 
 | 249 | +template USDFFUTILS_API float  | 
 | 250 | +infNormOfFloatArray<float>(const float *inputData, std::size_t numElements);  | 
 | 251 | +template USDFFUTILS_API double  | 
 | 252 | +infNormOfFloatArray<double>(const double *inputData, std::size_t numElements);  | 
 | 253 | +} // namespace adobe::usd  | 
0 commit comments