-
Notifications
You must be signed in to change notification settings - Fork 648
/
Copy pathoptimizer.py
65 lines (56 loc) · 2.19 KB
/
optimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import math
class SimpleGrda:
def __init__(self, params, lr=0.01, c=0.0, mu=0.7):
self.params = [x for x in params]
self.lr = lr
self.c = c
self.mu = mu
self.iterations = 0
# self.accumulators = [paddle.to_tensor(x.detach().cpu().numpy())
# for x in self.params]
self.accumulators = [
paddle.create_parameter(
x.shape,
x.dtype,
default_initializer=paddle.nn.initializer.Uniform(-0.1, 0.1))
for x in self.params
]
self.l1_accumulation = 0
def step(self):
c = self.c
mu = self.mu
lr = self.lr
l1_diff = c * math.pow(lr, (0.5 + mu)) * math.pow(
self.iterations + 1., mu) - c * math.pow(lr, (
0.5 + mu)) * math.pow(self.iterations + 0., mu)
self.l1_accumulation += l1_diff
first_iter = max(1 - self.iterations, 0)
updates = []
grads = [x.grad for x in self.params]
for p, g, a in zip(self.params, grads, self.accumulators):
new_a = a + first_iter * p - self.lr * g
updates.append((a, new_a))
new_a_l1 = paddle.abs(new_a) - self.l1_accumulation
new_p = paddle.sign(new_a) * paddle.clip(new_a_l1, min=0)
updates.append([p, new_p])
for raw_value, new_value in updates:
raw_value.set_value(new_value)
self.iterations += 1
def clear_grad(self):
for p in self.params:
if not p.stop_gradient:
p.clear_gradient()