Replace skills with a new scheme to limit strength

vondele · vondele · commit caa7faa59f1d · 2021-09-11T08:33:37.000+02:00
based on Sopel's initial implementation discussed in official-stockfish#3635 in this new scheme, the strenght is of the engine is limited by replacing a (varying) part of the evaluation, with a random perturbation. This scheme is easier to implement than our current skill level implementation, and has the advantage that it has a wider Elo range, being both weaker than skill level 1 and stronger than skill level 19. The skill level option is removed, and instead UCI_Elo and UCI_LimitStrength are the only options available. UCI_Elo is calibrated such that 1500 Elo is equivalent in strength to the engine maia1 (https://lichess.org/@/maia1) which has a blitz rating on lichess of 1500 (based on nearly 600k human games). The full Elo range (750 - 5200) is obtained by playing games between engines roughly 100-200 elo apart with the perturbation going from 0 to 1000, and fitting the ordo results. With this fit, a conversion from UCI_Elo to the magnitude of the random perturbation is possible. All games are played at lichess blitz TC (5m+3s), and playing strenght is different at different TC. Indeed, maia1 is a fixed 1 node leela 'search', independent from TC, whereas this scheme searches normally, and improves with TC. There are a few caveats, it is unclear how the playing style of the engine is, the old skill level was not really satisfactory, it needs to be seen if this is fixed with this approach. Furthermore, while in the engine - engine matches maia1 and SF@1500Elo are equivalent in strength (at blitz TC), it is not sure if its rating against humans will be the same (engine Elo and human Elo can be very different). No functional change
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
@@ -23,6 +23,7 @@
 #include <fstream>
 #include <iomanip>
 #include <sstream>
+#include <random>
 #include <iostream>
 #include <streambuf>
 #include <vector>
@@ -61,6 +62,8 @@ namespace Stockfish {
 namespace Eval {
 
   bool useNNUE;
+  bool limitStrength;
+  int randomEvalPerturb;
   string eval_file_loaded = "None";
 
   /// NNUE::init() tries to load a NNUE network at startup time, or when the engine
@@ -1075,6 +1078,20 @@ namespace {
                                        : -Value(correction);
   }
 
+  // Randomly perturb the evaluation in a calibrated way to yield a weaker engine
+  Value randomly_perturbed_eval(Value v)
+  {
+      static thread_local std::mt19937_64 tls_rng = [](){
+        return std::mt19937_64(std::time(nullptr));
+      }();
+
+      std::normal_distribution<float> d(0.0, QueenValueEg);
+      float r = d(tls_rng);
+
+      // linearly combine the random term with the real evaluation
+      return (Eval::randomEvalPerturb * Value(r) + (1000 - Eval::randomEvalPerturb) * v) / 1000;
+  }
+
 } // namespace Eval
 
 
@@ -1117,6 +1134,10 @@ Value Eval::evaluate(const Position& pos) {
   // Damp down the evaluation linearly when shuffling
   v = v * (100 - pos.rule50_count()) / 100;
 
+  // Optionally, limit the playing strength by perturbing the evaluation
+  if (Eval::limitStrength)
+      v = randomly_perturbed_eval(v);
+
   // Guarantee evaluation does not hit the tablebase range
   v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
 
diff --git a/src/evaluate.h b/src/evaluate.h
@@ -34,6 +34,8 @@ namespace Eval {
   Value evaluate(const Position& pos);
 
   extern bool useNNUE;
+  extern bool limitStrength;
+  extern int randomEvalPerturb;
   extern std::string eval_file_loaded;
 
   // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
diff --git a/src/search.cpp b/src/search.cpp
@@ -91,17 +91,6 @@ namespace {
     return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1);
   }
 
-  // Skill structure is used to implement strength limit
-  struct Skill {
-    explicit Skill(int l) : level(l) {}
-    bool enabled() const { return level < 20; }
-    bool time_to_pick(Depth depth) const { return depth == 1 + level; }
-    Move pick_best(size_t multiPV);
-
-    int level;
-    Move best = MOVE_NONE;
-  };
-
   template <NodeType nodeType>
   Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
 
@@ -225,7 +214,6 @@ void MainThread::search() {
 
   if (   int(Options["MultiPV"]) == 1
       && !Limits.depth
-      && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
       && rootMoves[0].pv[0] != MOVE_NONE)
       bestThread = Threads.get_best_thread();
 
@@ -290,26 +278,8 @@ void Thread::search() {
   std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);
 
   size_t multiPV = size_t(Options["MultiPV"]);
-
-  // Pick integer skill levels, but non-deterministically round up or down
-  // such that the average integer skill corresponds to the input floating point one.
-  // UCI_Elo is converted to a suitable fractional skill level, using anchoring
-  // to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo
-  // for match (TC 60+0.6) results spanning a wide range of k values.
-  PRNG rng(now());
-  double floatLevel = Options["UCI_LimitStrength"] ?
-                      std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
-                        double(Options["Skill Level"]);
-  int intLevel = int(floatLevel) +
-                 ((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024  ? 1 : 0);
-  Skill skill(intLevel);
-
-  // When playing with strength handicap enable MultiPV search that we will
-  // use behind the scenes to retrieve a set of possible moves.
-  if (skill.enabled())
-      multiPV = std::max(multiPV, (size_t)4);
-
   multiPV = std::min(multiPV, rootMoves.size());
+
   ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;
 
   trend = SCORE_ZERO;
@@ -445,10 +415,6 @@ void Thread::search() {
       if (!mainThread)
           continue;
 
-      // If skill level is enabled and time is up, pick a sub-optimal best move
-      if (skill.enabled() && skill.time_to_pick(rootDepth))
-          skill.pick_best(multiPV);
-
       // Do we have time for the next iteration? Can we stop searching now?
       if (    Limits.use_time_management()
           && !Threads.stop
@@ -504,10 +470,6 @@ void Thread::search() {
 
   mainThread->previousTimeReduction = timeReduction;
 
-  // If skill level is enabled, swap best PV line with the sub-optimal one
-  if (skill.enabled())
-      std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(),
-                skill.best ? skill.best : skill.pick_best(multiPV)));
 }
 
 
@@ -1729,39 +1691,6 @@ namespace {
         thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
   }
 
-  // When playing with strength handicap, choose best move among a set of RootMoves
-  // using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.
-
-  Move Skill::pick_best(size_t multiPV) {
-
-    const RootMoves& rootMoves = Threads.main()->rootMoves;
-    static PRNG rng(now()); // PRNG sequence should be non-deterministic
-
-    // RootMoves are already sorted by score in descending order
-    Value topScore = rootMoves[0].score;
-    int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
-    int weakness = 120 - 2 * level;
-    int maxScore = -VALUE_INFINITE;
-
-    // Choose best move. For each move score we add two terms, both dependent on
-    // weakness. One is deterministic and bigger for weaker levels, and one is
-    // random. Then we choose the move with the resulting highest score.
-    for (size_t i = 0; i < multiPV; ++i)
-    {
-        // This is our magic formula
-        int push = (  weakness * int(topScore - rootMoves[i].score)
-                    + delta * (rng.rand<unsigned>() % weakness)) / 128;
-
-        if (rootMoves[i].score + push >= maxScore)
-        {
-            maxScore = rootMoves[i].score + push;
-            best = rootMoves[i].pv[0];
-        }
-    }
-
-    return best;
-  }
-
 } // namespace
 
 
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cmath>
 #include <ostream>
 #include <sstream>
 
@@ -37,12 +38,23 @@ UCI::OptionsMap Options; // Global object
 
 namespace UCI {
 
+
+constexpr float exponent = 0.66;
+constexpr int Elo_max = 5200;
+constexpr int Elo_min = 750;
+
 /// 'On change' actions, triggered by an option's value change
 void on_clear_hash(const Option&) { Search::clear(); }
 void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
+void on_limit_strength(const Option& o) { Eval::limitStrength = o; }
+void on_uci_elo(const Option& o) {
+  Eval::randomEvalPerturb = int(1000 * std::pow(Elo_max - o      , exponent) /
+                                       std::pow(Elo_max - Elo_min, exponent));
+}
+
 void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
 void on_eval_file(const Option& ) { Eval::NNUE::init(); }
 
@@ -72,8 +84,8 @@ void init(OptionsMap& o) {
   o["nodestime"]             << Option(0, 0, 10000);
   o["UCI_Chess960"]          << Option(false);
   o["UCI_AnalyseMode"]       << Option(false);
-  o["UCI_LimitStrength"]     << Option(false);
-  o["UCI_Elo"]               << Option(1350, 1350, 2850);
+  o["UCI_LimitStrength"]     << Option(false, on_limit_strength);
+  o["UCI_Elo"]               << Option(1000, Elo_min , Elo_max, on_uci_elo);
   o["UCI_ShowWDL"]           << Option(false);
   o["SyzygyPath"]            << Option("<empty>", on_tb_path);
   o["SyzygyProbeDepth"]      << Option(1, 1, 100);