Skip to content

Commit

Permalink
fat-seer-6 (#196)
Browse files Browse the repository at this point in the history
Elo   | 14.98 +- 6.61 (95%)
SPRT  | 40.0+0.40s Threads=1 Hash=64MB
LLR   | 2.97 (-2.94, 2.94) [0.00, 5.00]
Games | N: 2600 W: 705 L: 593 D: 1302
Penta | [5, 242, 698, 346, 9]
bench: 4173348
  • Loading branch information
connormcmonigle authored Nov 23, 2024
1 parent bb4ebcf commit b04754b
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 356 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ Seer is an original, strong UCI chess engine. Seer relies on a neural network es
The latest network can be found [here](https://github.com/connormcmonigle/seer-training/releases)
```
cd build
wget -O eval.bin https://github.com/connormcmonigle/seer-training/releases/download/0x35ddef41/q0x35ddef41.bin
wget -O eval.bin https://github.com/connormcmonigle/seer-training/releases/download/0x2291e0ff/q0x2291e0ff.bin
make pgo EVALFILE=eval.bin
```
2 changes: 1 addition & 1 deletion build/makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ EXE = seer
CXX = g++

CXXSTANDARD = 17
EVALFILE = weights/q0x35ddef41.bin
EVALFILE = weights/q0x2291e0ff.bin
OPSLIMIT = 1000000000

CXXSRC += $(wildcard ../src/*.cc )
Expand Down
62 changes: 43 additions & 19 deletions include/nnue/dense_relu_affine_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,63 +22,87 @@
#include <nnue/dot_type.h>
#include <nnue/simd.h>

#include <algorithm>
#include <cstddef>

namespace nnue {

template <typename T, std::size_t dim0, std::size_t dim1>
template <std::size_t dim0, std::size_t dim1, typename T, typename I = T, typename O = dot_type<I>>
struct dense_relu_affine_layer {
static constexpr std::size_t W_numel = dim0 * dim1;
static constexpr std::size_t b_numel = dim1;

alignas(simd::alignment) T W[W_numel];
alignas(simd::alignment) dot_type<T> b[b_numel];
alignas(simd::alignment) O b[b_numel];

[[nodiscard]] constexpr std::size_t num_parameters() const noexcept { return W_numel + b_numel; }

[[nodiscard]] inline aligned_vector<dot_type<T>, dim1> forward(const aligned_vector<T, dim0>& x) const noexcept {
auto result = aligned_vector<dot_type<T>, dim1>::from(b);
[[nodiscard]] inline aligned_vector<O, dim1> forward_relu(const aligned_vector<I, dim0>& x) const noexcept {
auto result = aligned_vector<O, dim1>::from(b);
simd::relu_matrix_vector_product<dim0, dim1>(W, x.data, result.data);
return result;
}

[[nodiscard]] inline aligned_vector<dot_type<T>, dim1> forward(const aligned_slice<T, dim0>& x) const noexcept {
auto result = aligned_vector<dot_type<T>, dim1>::from(b);
[[nodiscard]] inline aligned_vector<O, dim1> forward_relu(const aligned_slice<I, dim0>& x) const noexcept {
auto result = aligned_vector<O, dim1>::from(b);
simd::relu_matrix_vector_product<dim0, dim1>(W, x.data, result.data);
return result;
}

[[nodiscard]] inline aligned_vector<O, dim1> forward_crelu255(const aligned_vector<I, dim0>& x) const noexcept {
auto result = aligned_vector<O, dim1>::from(b);
simd::crelu255_matrix_vector_product<dim0, dim1>(W, x.data, result.data);
return result;
}

[[nodiscard]] inline aligned_vector<O, dim1> forward_crelu255(const aligned_slice<I, dim0>& x) const noexcept {
auto result = aligned_vector<O, dim1>::from(b);
simd::crelu255_matrix_vector_product<dim0, dim1>(W, x.data, result.data);
return result;
}

template <typename streamer_type>
[[maybe_unused]] dense_relu_affine_layer<T, dim0, dim1>& load_(streamer_type& streamer) noexcept {
streamer.template stream<T>(W, W_numel).template stream<dot_type<T>>(b, b_numel);
[[maybe_unused]] dense_relu_affine_layer<dim0, dim1, T, I, O>& load_(streamer_type& streamer) noexcept {
streamer.template stream<T>(W, W_numel).template stream<O>(b, b_numel);
return *this;
}

template <typename exporter_type>
[[maybe_unused]] const dense_relu_affine_layer<T, dim0, dim1>& write_(exporter_type& exporter) const noexcept {
exporter.template write<T>(W, W_numel).template write<dot_type<T>>(b, b_numel);
[[maybe_unused]] const dense_relu_affine_layer<dim0, dim1, T, I, O>& write_(exporter_type& exporter) const noexcept {
exporter.template write<T>(W, W_numel).template write<O>(b, b_numel);
return *this;
}

[[nodiscard]] dense_relu_affine_layer<T, dim0, dim1> half_input_flipped() const noexcept {
[[nodiscard]] dense_relu_affine_layer<dim0, dim1, T, I, O> half_input_flipped() const noexcept {
static_assert(dim0 % 2 == 0);
constexpr std::size_t half_dim0 = dim0 / 2;

dense_relu_affine_layer<T, dim0, dim1> result = *this;
dense_relu_affine_layer<dim0, dim1, T, I, O> result = *this;

for (std::size_t i(0); i < W_numel; i += dim0) {
for (std::size_t j(0); j < half_dim0; ++j) { std::iter_swap(result.W + i + j, result.W + half_dim0 + i + j); }
}

return result;
}

template <typename U>
[[nodiscard]] dense_relu_affine_layer<U, dim0, dim1> quantized(const T& weight_scale, const T& bias_scale) const noexcept {
static_assert(std::is_floating_point_v<T> && std::is_integral_v<U>);
dense_relu_affine_layer<U, dim0, dim1> result{};
#pragma omp simd
for (std::size_t i = 0; i < W_numel; ++i) { result.W[i] = static_cast<U>(std::round(weight_scale * W[i])); }
for (std::size_t i = 0; i < b_numel; ++i) { result.b[i] = static_cast<dot_type<U>>(std::round(bias_scale * b[i])); }
template <typename Q, typename QI = Q, typename QO = dot_type<QI>>
[[nodiscard]] dense_relu_affine_layer<dim0, dim1, Q, QI, QO> quantized(const T& weight_scale, const T& bias_scale) const noexcept {
static_assert(std::is_floating_point_v<T> && std::is_integral_v<Q> && std::is_integral_v<QI> && std::is_integral_v<QO>);
dense_relu_affine_layer<dim0, dim1, Q, QI, QO> result{};

for (std::size_t i = 0; i < W_numel; ++i) {
const float lower_limit = static_cast<float>(std::numeric_limits<Q>::min());
const float upper_limit = static_cast<float>(std::numeric_limits<Q>::max());
result.W[i] = static_cast<Q>(std::clamp(std::round(weight_scale * W[i]), lower_limit, upper_limit));
}

for (std::size_t i = 0; i < b_numel; ++i) {
const float lower_limit = static_cast<float>(std::numeric_limits<QO>::min());
const float upper_limit = static_cast<float>(std::numeric_limits<QO>::max());
result.b[i] = static_cast<QO>(std::clamp(std::round(bias_scale * b[i]), lower_limit, upper_limit));
}

return result;
}
};
Expand Down
8 changes: 4 additions & 4 deletions include/nnue/eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ struct eval : public chess::sided<eval, feature_transformer<weights::quantized_p

template <typename F>
[[nodiscard]] inline propagate_data<std::invoke_result_t<F, final_output_type>> propagate(const bool pov, F&& final_output_encoder) const noexcept {
const auto x1 = (pov ? weights_->white_fc0 : weights_->black_fc0).forward(base_).dequantized<parameter_type>(weights::dequantization_scale);
const auto x2 = concat(x1, weights_->fc1.forward(x1));
const auto x3 = concat(x2, weights_->fc2.forward(x2));
return propagate_data(final_output_encoder(x3), weights_->fc3.forward(x3).item());
const auto x1 = (pov ? weights_->white_fc0 : weights_->black_fc0).forward_crelu255(base_).dequantized<parameter_type>(weights::dequantization_scale);
const auto x2 = concat(x1, weights_->fc1.forward_relu(x1));
const auto x3 = concat(x2, weights_->fc2.forward_relu(x2));
return propagate_data(final_output_encoder(x3), weights_->fc3.forward_relu(x3).item());
}

template <typename F = void_final_output_encoder>
Expand Down
Loading

0 comments on commit b04754b

Please sign in to comment.