Skip to content

Commit caa7faa

Browse files
committed
Replace skills with a new scheme to limit strength
based on Sopel's initial implementation discussed in official-stockfish#3635 in this new scheme, the strenght is of the engine is limited by replacing a (varying) part of the evaluation, with a random perturbation. This scheme is easier to implement than our current skill level implementation, and has the advantage that it has a wider Elo range, being both weaker than skill level 1 and stronger than skill level 19. The skill level option is removed, and instead UCI_Elo and UCI_LimitStrength are the only options available. UCI_Elo is calibrated such that 1500 Elo is equivalent in strength to the engine maia1 (https://lichess.org/@/maia1) which has a blitz rating on lichess of 1500 (based on nearly 600k human games). The full Elo range (750 - 5200) is obtained by playing games between engines roughly 100-200 elo apart with the perturbation going from 0 to 1000, and fitting the ordo results. With this fit, a conversion from UCI_Elo to the magnitude of the random perturbation is possible. All games are played at lichess blitz TC (5m+3s), and playing strenght is different at different TC. Indeed, maia1 is a fixed 1 node leela 'search', independent from TC, whereas this scheme searches normally, and improves with TC. There are a few caveats, it is unclear how the playing style of the engine is, the old skill level was not really satisfactory, it needs to be seen if this is fixed with this approach. Furthermore, while in the engine - engine matches maia1 and SF@1500Elo are equivalent in strength (at blitz TC), it is not sure if its rating against humans will be the same (engine Elo and human Elo can be very different). No functional change
1 parent b7b6b4b commit caa7faa

File tree

4 files changed

+38
-74
lines changed

4 files changed

+38
-74
lines changed

src/evaluate.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <fstream>
2424
#include <iomanip>
2525
#include <sstream>
26+
#include <random>
2627
#include <iostream>
2728
#include <streambuf>
2829
#include <vector>
@@ -61,6 +62,8 @@ namespace Stockfish {
6162
namespace Eval {
6263

6364
bool useNNUE;
65+
bool limitStrength;
66+
int randomEvalPerturb;
6467
string eval_file_loaded = "None";
6568

6669
/// NNUE::init() tries to load a NNUE network at startup time, or when the engine
@@ -1075,6 +1078,20 @@ namespace {
10751078
: -Value(correction);
10761079
}
10771080

1081+
// Randomly perturb the evaluation in a calibrated way to yield a weaker engine
1082+
Value randomly_perturbed_eval(Value v)
1083+
{
1084+
static thread_local std::mt19937_64 tls_rng = [](){
1085+
return std::mt19937_64(std::time(nullptr));
1086+
}();
1087+
1088+
std::normal_distribution<float> d(0.0, QueenValueEg);
1089+
float r = d(tls_rng);
1090+
1091+
// linearly combine the random term with the real evaluation
1092+
return (Eval::randomEvalPerturb * Value(r) + (1000 - Eval::randomEvalPerturb) * v) / 1000;
1093+
}
1094+
10781095
} // namespace Eval
10791096

10801097

@@ -1117,6 +1134,10 @@ Value Eval::evaluate(const Position& pos) {
11171134
// Damp down the evaluation linearly when shuffling
11181135
v = v * (100 - pos.rule50_count()) / 100;
11191136

1137+
// Optionally, limit the playing strength by perturbing the evaluation
1138+
if (Eval::limitStrength)
1139+
v = randomly_perturbed_eval(v);
1140+
11201141
// Guarantee evaluation does not hit the tablebase range
11211142
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
11221143

src/evaluate.h

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ namespace Eval {
3434
Value evaluate(const Position& pos);
3535

3636
extern bool useNNUE;
37+
extern bool limitStrength;
38+
extern int randomEvalPerturb;
3739
extern std::string eval_file_loaded;
3840

3941
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue

src/search.cpp

+1-72
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,6 @@ namespace {
9191
return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1);
9292
}
9393

94-
// Skill structure is used to implement strength limit
95-
struct Skill {
96-
explicit Skill(int l) : level(l) {}
97-
bool enabled() const { return level < 20; }
98-
bool time_to_pick(Depth depth) const { return depth == 1 + level; }
99-
Move pick_best(size_t multiPV);
100-
101-
int level;
102-
Move best = MOVE_NONE;
103-
};
104-
10594
template <NodeType nodeType>
10695
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
10796

@@ -225,7 +214,6 @@ void MainThread::search() {
225214

226215
if ( int(Options["MultiPV"]) == 1
227216
&& !Limits.depth
228-
&& !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
229217
&& rootMoves[0].pv[0] != MOVE_NONE)
230218
bestThread = Threads.get_best_thread();
231219

@@ -290,26 +278,8 @@ void Thread::search() {
290278
std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);
291279

292280
size_t multiPV = size_t(Options["MultiPV"]);
293-
294-
// Pick integer skill levels, but non-deterministically round up or down
295-
// such that the average integer skill corresponds to the input floating point one.
296-
// UCI_Elo is converted to a suitable fractional skill level, using anchoring
297-
// to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo
298-
// for match (TC 60+0.6) results spanning a wide range of k values.
299-
PRNG rng(now());
300-
double floatLevel = Options["UCI_LimitStrength"] ?
301-
std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
302-
double(Options["Skill Level"]);
303-
int intLevel = int(floatLevel) +
304-
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
305-
Skill skill(intLevel);
306-
307-
// When playing with strength handicap enable MultiPV search that we will
308-
// use behind the scenes to retrieve a set of possible moves.
309-
if (skill.enabled())
310-
multiPV = std::max(multiPV, (size_t)4);
311-
312281
multiPV = std::min(multiPV, rootMoves.size());
282+
313283
ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;
314284

315285
trend = SCORE_ZERO;
@@ -445,10 +415,6 @@ void Thread::search() {
445415
if (!mainThread)
446416
continue;
447417

448-
// If skill level is enabled and time is up, pick a sub-optimal best move
449-
if (skill.enabled() && skill.time_to_pick(rootDepth))
450-
skill.pick_best(multiPV);
451-
452418
// Do we have time for the next iteration? Can we stop searching now?
453419
if ( Limits.use_time_management()
454420
&& !Threads.stop
@@ -504,10 +470,6 @@ void Thread::search() {
504470

505471
mainThread->previousTimeReduction = timeReduction;
506472

507-
// If skill level is enabled, swap best PV line with the sub-optimal one
508-
if (skill.enabled())
509-
std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(),
510-
skill.best ? skill.best : skill.pick_best(multiPV)));
511473
}
512474

513475

@@ -1729,39 +1691,6 @@ namespace {
17291691
thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
17301692
}
17311693

1732-
// When playing with strength handicap, choose best move among a set of RootMoves
1733-
// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
1734-
1735-
Move Skill::pick_best(size_t multiPV) {
1736-
1737-
const RootMoves& rootMoves = Threads.main()->rootMoves;
1738-
static PRNG rng(now()); // PRNG sequence should be non-deterministic
1739-
1740-
// RootMoves are already sorted by score in descending order
1741-
Value topScore = rootMoves[0].score;
1742-
int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
1743-
int weakness = 120 - 2 * level;
1744-
int maxScore = -VALUE_INFINITE;
1745-
1746-
// Choose best move. For each move score we add two terms, both dependent on
1747-
// weakness. One is deterministic and bigger for weaker levels, and one is
1748-
// random. Then we choose the move with the resulting highest score.
1749-
for (size_t i = 0; i < multiPV; ++i)
1750-
{
1751-
// This is our magic formula
1752-
int push = ( weakness * int(topScore - rootMoves[i].score)
1753-
+ delta * (rng.rand<unsigned>() % weakness)) / 128;
1754-
1755-
if (rootMoves[i].score + push >= maxScore)
1756-
{
1757-
maxScore = rootMoves[i].score + push;
1758-
best = rootMoves[i].pv[0];
1759-
}
1760-
}
1761-
1762-
return best;
1763-
}
1764-
17651694
} // namespace
17661695

17671696

src/ucioption.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include <algorithm>
2020
#include <cassert>
21+
#include <cmath>
2122
#include <ostream>
2223
#include <sstream>
2324

@@ -37,12 +38,23 @@ UCI::OptionsMap Options; // Global object
3738

3839
namespace UCI {
3940

41+
42+
constexpr float exponent = 0.66;
43+
constexpr int Elo_max = 5200;
44+
constexpr int Elo_min = 750;
45+
4046
/// 'On change' actions, triggered by an option's value change
4147
void on_clear_hash(const Option&) { Search::clear(); }
4248
void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
4349
void on_logger(const Option& o) { start_logger(o); }
4450
void on_threads(const Option& o) { Threads.set(size_t(o)); }
4551
void on_tb_path(const Option& o) { Tablebases::init(o); }
52+
void on_limit_strength(const Option& o) { Eval::limitStrength = o; }
53+
void on_uci_elo(const Option& o) {
54+
Eval::randomEvalPerturb = int(1000 * std::pow(Elo_max - o , exponent) /
55+
std::pow(Elo_max - Elo_min, exponent));
56+
}
57+
4658
void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
4759
void on_eval_file(const Option& ) { Eval::NNUE::init(); }
4860

@@ -72,8 +84,8 @@ void init(OptionsMap& o) {
7284
o["nodestime"] << Option(0, 0, 10000);
7385
o["UCI_Chess960"] << Option(false);
7486
o["UCI_AnalyseMode"] << Option(false);
75-
o["UCI_LimitStrength"] << Option(false);
76-
o["UCI_Elo"] << Option(1350, 1350, 2850);
87+
o["UCI_LimitStrength"] << Option(false, on_limit_strength);
88+
o["UCI_Elo"] << Option(1000, Elo_min , Elo_max, on_uci_elo);
7789
o["UCI_ShowWDL"] << Option(false);
7890
o["SyzygyPath"] << Option("<empty>", on_tb_path);
7991
o["SyzygyProbeDepth"] << Option(1, 1, 100);

0 commit comments

Comments
 (0)