Skip to content

More code clean-up #4506

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion bench/RowwiseAdagradFusedBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

#include "./BenchUtils.h"
#include "fbgemm/Fbgemm.h"
#include "src/RefImplementations.h" // @manual

using namespace std;
using namespace fbgemm;
Expand Down
1 change: 0 additions & 1 deletion include/fbgemm/FbgemmFPCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#pragma once

#include <fbgemm/FbgemmPackMatrixB.h>
#include <fbgemm/SimdUtils.h>
#include <fbgemm/Types.h>
#include <fbgemm/Utils.h>
#include <array>
Expand Down
2 changes: 1 addition & 1 deletion include/fbgemm/OutputProcessing-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ inline int ReQuantizeForFloat<FUSE_RELU, Q_GRAN, outT, inT, nextOPType>::f(
assert(
block.col_size <= ncol_per_group &&
"ReQuantizeOutput should be called at most 1 group at a time.");
int g = block.col_start / ncol_per_group;
if constexpr (
instSet == inst_set_t::anyarch || !std::is_same_v<outT, float>) {
for (int i = block.row_start; i < block.row_start + block.row_size; ++i) {
Expand All @@ -224,6 +223,7 @@ inline int ReQuantizeForFloat<FUSE_RELU, Q_GRAN, outT, inT, nextOPType>::f(
if constexpr (Q_GRAN == QuantizationGranularity::TENSOR) {
Bq_zero_point_idx = 0;
} else if constexpr (Q_GRAN == QuantizationGranularity::GROUP) {
int g = block.col_start / ncol_per_group;
Bq_zero_point_idx = g;
} else if constexpr (Q_GRAN == QuantizationGranularity::OUT_CHANNEL) {
Bq_zero_point_idx = j;
Expand Down
30 changes: 27 additions & 3 deletions include/fbgemm/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <array>
#include <cassert>
#include <cmath>
#include <iomanip>
#include <iostream>
#include <string>
#include <type_traits>

Expand Down Expand Up @@ -96,16 +98,38 @@ FBGEMM_API int compare_buffers(
float atol = 1e-3);

/**
* @brief Debugging helper.
* @brief Print the matrix.
* @param op Transpose type of the matrix.
* @param R The height of the matrix.
* @param C The width of the matrix.
* @param ld The leading dimension of the matrix.
* @param name The prefix string before printing the matrix.
*/
template <typename T>
void printMatrix(
matrix_op_t trans,
matrix_op_t op,
const T* inp,
size_t R,
size_t C,
size_t ld,
std::string name);
const std::string& name) {
// R: number of rows in op(inp)
// C: number of cols in op(inp)
// ld: leading dimension in inp
std::cout << name << ":" << "[" << R << ", " << C << "]" << '\n';
bool tr = (op == matrix_op_t::Transpose);
for (size_t r = 0; r < R; ++r) {
for (size_t c = 0; c < C; ++c) {
T res = tr ? inp[c * ld + r] : inp[r * ld + c];
if constexpr (std::is_integral_v<T>) {
std::cout << std::setw(5) << static_cast<int64_t>(res) << " ";
} else {
std::cout << std::setw(5) << res << " ";
}
}
std::cout << '\n';
}
}

/**
* @brief Transpose a matrix.
Expand Down
3 changes: 2 additions & 1 deletion src/ExecuteKernelU8S8.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
*/

#pragma once
#include "./ExecuteKernel.h" // @manual
#include <cstdint>
#include "./ExecuteKernelGeneric.h" // @manual

namespace fbgemm {

Expand Down
3 changes: 1 addition & 2 deletions src/Fbgemm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
#define FBGEMM_EXPORTS
#include "fbgemm/Fbgemm.h"
#include <cpuinfo.h>
#include <functional>
#include <stdexcept>
#include "./ExecuteKernel.h" // @manual
#include "./ExecuteKernelU8S8.h" // @manual

#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
double packing_time = 0.0;
Expand Down
2 changes: 1 addition & 1 deletion src/FbgemmBfloat16Convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmConvert.h"

#include "./RefImplementations.h" // @manual
#include <stdexcept>

#ifdef USE_MKL
#include <mkl.h>
Expand Down
2 changes: 2 additions & 0 deletions src/FbgemmFPCommon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ partition_array_t partition_sve128 = {
}
};

#ifdef FBGEMM_ENABLE_KLEIDIAI
partition_array_t partition_neon = {
// NOTE: clang-format wants to use a different formatting but the current
// formatting should be easier to read.
Expand Down Expand Up @@ -427,6 +428,7 @@ partition_array_t partition_neon = {
{{ { 8, 15 }, { 0, 0 } } }, // 120
}
};
#endif


partition_array_t partition_avx512 = {
Expand Down
2 changes: 0 additions & 2 deletions src/FbgemmFloat16Convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmConvert.h"

#include "./RefImplementations.h" // @manual

#ifdef USE_MKL
#include <mkl.h>
#endif
Expand Down
1 change: 0 additions & 1 deletion src/GenerateKernelU8S8S32ACC16Avx512.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
*/

#include <iostream>
#include "./CodeGenHelpers.h" // @manual
#include "./GenerateKernel.h" // @manual

namespace fbgemm {
Expand Down
2 changes: 0 additions & 2 deletions src/GroupwiseConv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
#include <tuple>
#include <type_traits>
#include "./CodeGenHelpers.h" // @manual
#include "./RefImplementations.h" // @manual
#include "./TransposeUtils.h" // @manual
#include "fbgemm/Fbgemm.h"
#include "fbgemm/QuantUtilsAvx512.h"
#include "fbgemm/SimdUtils.h"
Expand Down
1 change: 0 additions & 1 deletion src/GroupwiseConvAcc32Avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#define FBGEMM_EXPORTS
#include "./CodeGenHelpers.h" // @manual
#include "./GroupwiseConv.h" // @manual
#include "fbgemm/Fbgemm.h"

namespace fbgemm {

Expand Down
5 changes: 0 additions & 5 deletions src/PackWeightsForDirectConv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,12 @@
#include <cassert>

#include "./DirectConv.h" // @manual
#include "./ExecuteKernel.h" // @manual
#include "./MaskAvx2.h" // @manual
#include "fbgemm/ConvUtils.h"
#include "fbgemm/Fbgemm.h"
#include "fbgemm/FbgemmBuild.h"
#include "fbgemm/UtilsAvx2.h"

#include "./CodeGenHelpers.h" // @manual
#include "./OptimizedKernelsAvx2.h" // @manual
#include "./RefImplementations.h" // @manual
#include "./TransposeUtils.h" // @manual
namespace fbgemm {

PackedDirectConvMatrix::PackedDirectConvMatrix(
Expand Down
63 changes: 0 additions & 63 deletions src/Utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <cmath>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <limits>
#include <new>
Expand Down Expand Up @@ -84,39 +83,6 @@ int compare_buffers(
return 0;
}

/**
* @brief Print the matrix.
* @param op Transpose type of the matrix.
* @param R The height of the matrix.
* @param C The width of the matrix.
* @param ld The leading dimension of the matrix.
* @param name The prefix string before printing the matrix.
*/
template <typename T>
void printMatrix(
matrix_op_t op,
const T* inp,
size_t R,
size_t C,
size_t ld,
const std::string& name) {
// R: number of rows in op(inp)
// C: number of cols in op(inp)
// ld: leading dimension in inp
std::cout << name << ":" << "[" << R << ", " << C << "]" << '\n';
bool tr = (op == matrix_op_t::Transpose);
for (size_t r = 0; r < R; ++r) {
for (size_t c = 0; c < C; ++c) {
T res = tr ? inp[c * ld + r] : inp[r * ld + c];
if constexpr (std::is_integral_v<T>) {
std::cout << std::setw(5) << static_cast<int64_t>(res) << " ";
} else {
std::cout << std::setw(5) << res << " ";
}
}
std::cout << '\n';
}
}

template int compare_buffers<float>(
const float* ref,
Expand Down Expand Up @@ -154,35 +120,6 @@ template int compare_buffers<int64_t>(
size_t max_mismatches_to_report,
float atol);

template void printMatrix<float>(
matrix_op_t op,
const float* inp,
size_t R,
size_t C,
size_t ld,
const std::string& name);
template void printMatrix<int8_t>(
matrix_op_t op,
const int8_t* inp,
size_t R,
size_t C,
size_t ld,
const std::string& name);
template void printMatrix<uint8_t>(
matrix_op_t op,
const uint8_t* inp,
size_t R,
size_t C,
size_t ld,
const std::string& name);
template void printMatrix<int32_t>(
matrix_op_t op,
const int32_t* inp,
size_t R,
size_t C,
size_t ld,
const std::string& name);

namespace {
inst_set_t g_forced_isa = inst_set_t::anyarch;
std::optional<bool> g_Avx512_Ymm_enabled{std::nullopt};
Expand Down
Loading
Loading