-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathqlearningmotor.py
More file actions
166 lines (152 loc) · 6.99 KB
/
qlearningmotor.py
File metadata and controls
166 lines (152 loc) · 6.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
import matplotlib.pyplot as plt
import time
import random
# import Rpi.GPIO as GPIO
# Parameter Init
pinPWM = 17 # Pin Number you were using in Raspberry Pi for PWM
pinRotary = 0
episodeNum = 4_000
rotatePenalty = 1
destinationReward = 10
epsilon = 0.95
decayValue = epsilon / (episodeNum - 1)
showPlot = 2500 # Will show plot every 2500 episode
learningRate = 0.3
discountRate = 0.9
Q_table_start = np.zeros([128, 2]) # init the q table space, in this plant we have to action either to drive or brake
initReading = 0 # The first time your sensor read the speed
setPoint = 32 # Choose your DC Motor Speed setpoint (in RPM)
controlalgorithm = 'qlearn'
initError = np.array([setPoint - initReading, 0, 0]) # array include three types of error [currenterror, derivativerror, sum of error]
timeSampling = 0.001 # Systems sampling time
samplingFreq = 1 / timeSampling # Systems sampling frequency
pwmResolution = 4096 #Because of raspberry pi 3 has 12bit resolution
derivativeGain = 121
proportionalGain = 1000
integralGain = 0.02
period = 0.5
frequency = 1 / period
amplitude = 32 # The range is from -maximum motor speed to maximum motor speed
simulationTime = 360 # your simulation time
# Main Program
class env():
def __init__(self, pinPWM, lastError, timeSampling):
self.pinPWM = pinPWM
self.pinRotary = pinRotary
self.pwmResolution = pwmResolution
self.setPoint = setPoint
self.controlalgorithm = controlalgorithm
self.timeSampling = timeSampling
self.proportionalGain = proportionalGain
self.derivativeGain = derivativeGain
self.integralGain = integralGain
self.learningRate = learningRate
self.discountRate = discountRate
self.epsilon = epsilon
self.episodeDecay = decayValue
self.episode = episodeNum
self.frequency = frequency
self.amplitude = amplitude
self.simulationTime = simulationTime
self.lastError = lastError
def errorUpdate(self):
sensorRead = sensorRead()
new_state = ceil(sensorRead - self.setPoint) # this is the error at instance t+1
derror = self.lastError[1]
sumerror += self.lastError[2] * self.timeSampling
lastError = np.array([new_state, derror, sumerror])
self.lastError = lastError
return lastError
def sensorRead(self):
newval = random.randint(1, 100)
return newval
def refSignal(simulationTime, reference, period, amplitude):
frequency = 1 / period
if (frequency > 0.5 * samplingFreq) & (reference != 'unit step'):
raise Exception("Please use frequency at least less than half times of your sampling frequency to avoid aliasing")
t = np.linspace(0, simulationTime, simulationTime * 2)
# Signal Reference List
if reference == 'sinusoidal': # Sinusoidal
y = amplitude * np.sin(2 * np.pi * frequency * t)
elif reference == 'unit step': # Unit Step Signal
y = amplitude * np.heaviside(t, 0)
elif reference == 'square': # Square Wave Signal
y = amplitude * np.sign(np.sin(2 * np.pi * frequency * t))
elif reference == 'random':
# Pseudo Random Binary Sequence (PRBS)
extendTime = self.simulationTime * 10 # we multiple the simulation time by 10 to extend the PRBS signal time
initSpace = np.zeros(extendTime)
freqRange = np.random.randint(frequency, samplingFreq, extendTime)
idxAmp = 0
while idxAmp < extendTime:
initSpace[idxAmp] = amplitude
initSpace[idxAmp + 1] = -amplitude
idxAmp += 2
idxTime = 0
y = np.zeros(extendTime)
while freqRange[idxTime] < extendTime:
idx = freqRange[idxTime]
y[idx:] = initSpace[idxTime]
idxTime += 1
else: # Manual control
y = None
return y, t
def PID(self, error, sampling):
controlout = self.proportionalGain * self.lastError[0] + self.derivativeGain * self.lastError[1] / sampling + self.integralGain * self.lastError[2]
controlout = controlout / self.pwmResolution * 100
if controlout > self.pwmResolution:
controlout = 100
return controlout
def antiwindup(self, error, sampling):
# Anti windup logic
if self.lastError[0] <= 0.001: # when the error near to zero turn off the integral gain
controlout = self.proportionalGain * self.lastError[0] + self.derivativeGain * self.lastError[1] / sampling
controlout = controlout / self.pwmResolution * 100
else: # else act as normal PID
controlout = self.PID(error, sampling)
return controlout
def qlearn(self, error):
# The Q-Learning in this case will be act as a bang-bang controller (on-off control)
Q_table = Q_table_start
rowterm, colterm = 63, 1
init_state = ceil(error[0])
for x in range(self.episode):
stop = False
while not stop:
if random.uniform(0, 1) < self.epsilon:
if random.uniform(0,1) > 0.5:
colstate = 1
else:
colstate = 0
controlout = Q_table[init_state, colstate]
else:
colstate = np.argmax(Q_table[init_state, :])
controlout = Q_table[init_state, colstate]
new_state = self.errorUpdate(self.sensorRead()) # this is the new state
new_colstate = np.argmax(Q_table[new_state, :])
if (init_state == rowterm and colstate == colterm):
reward = destinationReward
stop = True
else:
reward = -rotatePenalty
now_qvalue = Q_table[init_state, :]
maxqnext = Q_table[new_state, new_colstate]
new_qvalue = now_qvalue * (1 - learningRate) + learningRate * (reward + self.discountRate * maxqnext)
Q_table[init_state, colstate] = new_qvalue
init_state, colstate = new_state, new_colstate
if self.episodeNum >= episode >= 1:
self.epsilon -= decayValue
class control(env):
def __init__(self):
super().__init__(pinPWM, initError, timeSampling)
def applications(self, controlalgorithm):
if controlalgorithm == 'PID':
controlout = super().PID(error = self.lastError, sampling = self.timeSampling)
elif controlalgorithm == 'antiwindup':
controlout = super().antiwindup(error = self.lastError, sampling = self.timeSampling)
else:
controlout = super().qlearn(error = self.lastError)
return controlout
if __name__ == "__main__":
env(pinPWM, initError, timeSampling)