AlphaQuoridor/evaluate_network.py at main · dorakingx/AlphaQuoridor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# ====================
# New Parameter Evaluation Section
# ====================

# Import packages
from game import State
from pv_mcts import pv_mcts_action
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K
from pathlib import Path
from shutil import copy
import numpy as np

# Prepare parameters
EN_GAME_COUNT = 10 # Number of games per evaluation (originally 400)
EN_TEMPERATURE = 1.0 # Temperature of the Boltzmann distribution

# Points for the first player
def first_player_point(ended_state):
    # 1: first player wins, 0: first player loses, 0.5: draw
    if ended_state.is_lose():
        return 0 if ended_state.is_first_player() else 1
    return 0.5

# Execute one game
def play(next_actions):
    # Generate state
    state = State()

    # Loop until the game ends
    while True:
        # When the game ends
        if state.is_done():
            break

        # Get action
        next_action = next_actions[0] if state.is_first_player() else next_actions[1]
        action = next_action(state)

        # Get the next state
        state = state.next(action)

    # Return points for the first player
    return first_player_point(state)

# Replace the best player
def update_best_player():
    copy('./model/latest.keras', './model/best.keras')
    print('Change BestPlayer')

# Network evaluation
def evaluate_network():
    # Load the model of the latest player
    model0 = load_model('./model/latest.keras')

    # Load the model of the best player
    model1 = load_model('./model/best.keras')

    # Generate a function to select actions using PV MCTS
    next_action0 = pv_mcts_action(model0, EN_TEMPERATURE)
    next_action1 = pv_mcts_action(model1, EN_TEMPERATURE)
    next_actions = (next_action0, next_action1)

    # Repeat multiple matches
    total_point = 0
    for i in range(EN_GAME_COUNT):
        # Execute one game
        if i % 2 == 0:
            total_point += play(next_actions)
        else:
            total_point += 1 - play(list(reversed(next_actions)))

        # Output
        print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='')
    print('')

    # Calculate average points
    average_point = total_point / EN_GAME_COUNT
    print('AveragePoint', average_point)

    # Clear models
    K.clear_session()
    del model0
    del model1

    # Replace the best player
    if average_point > 0.5:
        update_best_player()
        return True
    else:
        return False

# Operation check
if __name__ == '__main__':
    evaluate_network()