-
Notifications
You must be signed in to change notification settings - Fork 185
/
Copy pathPolicy.py
71 lines (54 loc) · 2.29 KB
/
Policy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
'''
The code for the lookahead policy we use in our
Static Model
'''
import numpy as np
# the lookahead policy
class LookaheadPolicy():
def __init__(self, model):
self.model = model
# function returning the decision x_t from the current state
# and current time. The argument decisions is given to
# use a local variable rather than for getting outside information
def get_decision(self, METRIC):
# the matrix with decisions to be made for each node and each time
decisions = [ ([0] * self.model.G.vertexCount) for row in range(self.model.G.Horizon + 1) ]
# initialize the value costs at different nodes at different times to infinity
V = np.ones((self.model.G.Horizon + 1, self.model.G.vertexCount)) * np.inf
# make the costs at the destination 0
for t_prime in range(self.model.G.Horizon + 1):
V[t_prime][self.model.G.end_node] = 0
# the algortihm that uses the "stepping backwards in time" method
lookAheadTime = self.model.G.Horizon - 1
while lookAheadTime >= 0:
for k in range(self.model.G.vertexCount):
# find the solutions to Bellman's eq. that are shown
# in 5.22 and 5.23
argMin = - 1
minVal = np.inf
for l in self.model.G.neighbors[k]:
if (METRIC == "PERCENTILE"):
spread = self.model.G.spreads[k][l]
mean = self.model.estimated_costs[k][l]
if minVal >= V[lookAheadTime + 1][l] + self.use_percentile_val(self.model.theta, spread, mean):
argMin = l
minVal = V[lookAheadTime + 1][l] + self.use_percentile_val(self.model.theta, spread, mean)
else:
if minVal >= V[lookAheadTime + 1][l] + dist[k][l]:
argMin = l
minVal = V[lookAheadTime + 1][l] + dist[k][l]
# updating the solutions to the equations
V[lookAheadTime][k] = minVal
decisions[lookAheadTime][k] = argMin
lookAheadTime -= 1
return decisions[0][self.model.state.node]
'''
the function that takes as arguments the percentile we are going to
use, theta (espressed as a value in [0,1]), the spread for a link and
the mean cost of that link and returns the value corresponding to
the theta precentile of the interval [(1 - spread) * mean, (1 + spread) * mean]
'''
def use_percentile_val(self,theta, spread, mean):
point_val = 1 - spread + (2 * spread) * theta
used_cost = mean * point_val
return used_cost