-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdump.js
More file actions
118 lines (114 loc) · 4.58 KB
/
dump.js
File metadata and controls
118 lines (114 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import MonteCarloNode from "./MonteCarloNode.js";
import Player from "./MonteCarloPlayer.js";
export default class MonteCarlo {
constructor(game, UCB1ExploreParam = 2) {
this.game = game
this.UCB1ExploreParam = UCB1ExploreParam
this.nodes = new Map() // map: State.hash() => MonteCarloNode
} /** If given state does not exist, create dangling node. */
makeNode(state) {
if (!this.nodes.has(state.hash())) {
let unexpandedPlays = this.game.legalPlays(state).slice()
let node = new MonteCarloNode(null, null, state, unexpandedPlays)
this.nodes.set(state.hash(), node)
}
} /** From given state, repeatedly run MCTS to build statistics. */
runSearch(state, timeout = 3) {
this.makeNode(state)
let end = Date.now() + timeout * 1000
while (Date.now() < end) {
let node = this.select(state)
let winner = this.game.winner(node.state)
if (node.isLeaf() === false && winner === false) {
node = this.expand(node)
winner = this.simulate(node)
}
this.backpropagate(node, winner)
console.log(node.state.hash())
}
} /** Get the best move from available statistics. */
bestPlay(state) {
this.makeNode(state) // If not all children are expanded, not enough information
if (this.nodes.get(state.hash()).isFullyExpanded() === false)
throw new Error("Not enough information!")
let node = this.nodes.get(state.hash())
let allPlays = node.allPlays()
let bestPlay
let max = -Infinity
for (let play of allPlays) {
let childNode = node.childNode(play)
if (childNode.n_plays > max) {
bestPlay = play
max = childNode.n_plays
}
}
return bestPlay
} /** Phase 1, Selection: Select until not fully expanded OR leaf */
select(state) {
let node = this.nodes.get(state.hash())
while(node.isFullyExpanded() && !node.isLeaf()) {
let plays = node.allPlays()
let bestPlay
let bestUCB1 = -Infinity
for (let play of plays) {
let childUCB1 = node.childNode(play).getUCB1(this.UCB1ExploreParam)
if (childUCB1 > bestUCB1) {
bestPlay = play
bestUCB1 = childUCB1
}
}
node = node.childNode(bestPlay)
}
return node
} /** Phase 2, Expansion: Expand a random unexpanded child node */
expand(node) {
let plays = node.unexpandedPlays()
let index = Math.floor(Math.random() * plays.length)
let play = plays[index]
let childState = this.game.nextState(node.state, play)
let childUnexpandedPlays = this.game.legalPlays(childState)
let childNode = node.expand(play, childState, childUnexpandedPlays)
this.nodes.set(childState.hash(), childNode)
return childNode
} /** Phase 3, Simulation: Play game to terminal state, return winner */
simulate(node) {
let state = node.state
let winner = this.game.winner(state)
while (winner === false) {
let plays = this.game.legalPlays(state)
let play = plays[Math.floor(Math.random() * plays.length)]
console.log("IM HERE")
state = this.game.nextState(state, play)
winner = this.game.winner(state)
}
return winner
} /** Phase 4, Backpropagation: Update ancestor statistics */
backpropagate(node, winner) {
while (node !== null) {
node.n_plays += 1
// Parent's choice
if (node.state.isPlayer(winner)) {
node.n_wins += 1
}
node = node.parent
}
}
// Utility Methods /** Return MCTS statistics for this node and children nodes */
getStats(state) {
let node = this.nodes.get(state.hash())
let stats = { n_plays: node.n_plays,
n_wins: node.n_wins,
children: [] }
for (let child of node.children.values()) {
if (child.node === null)
stats.children.push({ play: child.play,
n_plays: null,
n_wins: null})
else
stats.children.push({ play: child.play,
n_plays: child.node.n_plays,
n_wins: child.node.n_wins})
}
return stats
}
}