|
1 | 1 | #include "caffe2/operators/fused_rowwise_random_quantization_ops.h"
|
2 | 2 | #include "caffe2/core/registry.h"
|
| 3 | +#include "caffe2/utils/math.h" |
3 | 4 |
|
4 | 5 | namespace caffe2 {
|
| 6 | + |
| 7 | +#define IS_LITTLE_ENDIAN \ |
| 8 | + [] { \ |
| 9 | + const int32_t kValue = 1; \ |
| 10 | + return reinterpret_cast<const uint8_t*>(&kValue)[0] == 1; \ |
| 11 | + }() |
| 12 | + |
| 13 | +template <class Context> |
| 14 | +bool FloatToFusedRandRowwiseQuantizedOp<Context>::RunOnDevice() { |
| 15 | + CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness"); |
| 16 | + |
| 17 | + const auto& input = Input(DATA_FLOAT); |
| 18 | + auto* output = Output(DATA_FUSED_QUANTIZED); |
| 19 | + |
| 20 | + CAFFE_ENFORCE_EQ( |
| 21 | + input.ndim(), |
| 22 | + 2, |
| 23 | + "Expect input to be a matrix. Reshape the input tensor to a matrix for usage."); |
| 24 | + |
| 25 | + const auto input_rows = input.dim(0); |
| 26 | + const auto input_columns = input.dim(1); |
| 27 | + |
| 28 | + // The "fused" representation stores the [bitwidth][tail][min][max] |
| 29 | + // with the row-wise quantized data in one tensor. Since we store 8/bitwidth |
| 30 | + // quantized data in one byte, the last buckets of some bytes may have |
| 31 | + // unused bits. There are totally tail buckets are unused. |
| 32 | + // We encode *bitwidth* and *tail* at the beginning of |
| 33 | + // each row, following by 32-bit floating data respresenting min and max. |
| 34 | + // | bitwidth | tail | min | max | ... int8 data ... | |
| 35 | + // | 1B | 1B | 4B | 4B | ...output_data....| |
| 36 | + // In output_data: the b-th bucket of the i-th byte stores |
| 37 | + // the i-th data of the b-th segment of input row |
| 38 | + size_t data_per_byte = 8 / bitwidth_; |
| 39 | + // How many bytes in the output |
| 40 | + size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte; |
| 41 | + const std::vector<TIndex> output_dimensions = { |
| 42 | + input_rows, 10 + static_cast<TIndex>(segment_size)}; |
| 43 | + output->Resize(output_dimensions); |
| 44 | + |
| 45 | + const auto* input_data = input.template data<float>(); |
| 46 | + auto* output_data = output->template mutable_data<uint8_t>(); |
| 47 | + const size_t output_columns = static_cast<size_t>(output->dim(1)); |
| 48 | + memset(output_data, 0, output->size()); |
| 49 | + |
| 50 | + if (random_) { |
| 51 | +#ifdef FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL |
| 52 | + random_buffer_.resize(input_columns); |
| 53 | +#endif |
| 54 | + } |
| 55 | + |
| 56 | + for (size_t row = 0; row < input_rows; ++row) { |
| 57 | + math::quantize_and_compress( |
| 58 | + input_data + row * input_columns, |
| 59 | + output_data + row * output_columns, |
| 60 | + input_columns, |
| 61 | + bitwidth_, |
| 62 | + random_, |
| 63 | +#ifdef FUSED_ROWWISE_RANDOM_QUANTIZATION_USE_MKL |
| 64 | + vslStream_, |
| 65 | + random_buffer_ |
| 66 | +#else |
| 67 | + dis_, |
| 68 | + gen_ |
| 69 | +#endif |
| 70 | + ); |
| 71 | + } |
| 72 | + |
| 73 | + return true; |
| 74 | +} |
| 75 | + |
| 76 | +template <class Context> |
| 77 | +bool FusedRandRowwiseQuantizedToFloatOp<Context>::RunOnDevice() { |
| 78 | + CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness"); |
| 79 | + |
| 80 | + const auto& input = Input(DATA_FUSED_QUANTIZED); |
| 81 | + auto* output = Output(DATA_FLOAT); |
| 82 | + CAFFE_ENFORCE_EQ(input.ndim(), 2, "Expect input to be a matrix."); |
| 83 | + CAFFE_ENFORCE_GE( |
| 84 | + input.size(), 4, "Expect input to have size greater than or equal to 4."); |
| 85 | + |
| 86 | + const auto input_rows = input.dim(0); |
| 87 | + const auto input_columns = input.dim(1); |
| 88 | + const auto* input_data = input.template data<uint8_t>(); |
| 89 | + const size_t bitwidth = input_data[0]; |
| 90 | + CAFFE_ENFORCE( |
| 91 | + bitwidth == 1 || bitwidth == 2 || bitwidth == 4 || bitwidth == 8, |
| 92 | + "Unsupported bitwidth"); |
| 93 | + const size_t tail = input_data[1]; |
| 94 | + const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail; |
| 95 | + const std::vector<TIndex> output_dimensions = { |
| 96 | + input_rows, static_cast<TIndex>(output_columns)}; |
| 97 | + output->Resize(output_dimensions); |
| 98 | + auto* output_data = output->template mutable_data<float>(); |
| 99 | + for (size_t row = 0; row < input_rows; ++row) { |
| 100 | + math::decompress_and_dequantize( |
| 101 | + input_data + row * input_columns, |
| 102 | + output_data + row * output_columns, |
| 103 | + input_columns); |
| 104 | + } |
| 105 | + |
| 106 | + return true; |
| 107 | +} |
| 108 | + |
| 109 | +#undef IS_LITTLE_ENDIAN |
| 110 | + |
5 | 111 | REGISTER_CPU_OPERATOR(
|
6 | 112 | FloatToFusedRandRowwiseQuantized,
|
7 | 113 | FloatToFusedRandRowwiseQuantizedOp<CPUContext>);
|
|
0 commit comments