-
Notifications
You must be signed in to change notification settings - Fork 2k
/
Copy pathFusedBatchNorm_grad.ts
111 lines (104 loc) · 3.76 KB
/
FusedBatchNorm_grad.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/**
* @license
* Copyright 2020 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
import {FusedBatchNorm, FusedBatchNormAttrs} from '../kernel_names';
import {GradConfig, NamedAttrMap} from '../kernel_registry';
import {add} from '../ops/add';
import {getReductionAxes} from '../ops/broadcast_util';
import {mul} from '../ops/mul';
import {reshape} from '../ops/reshape';
import {rsqrt} from '../ops/rsqrt';
import {scalar} from '../ops/scalar';
import {sub} from '../ops/sub';
import {sum} from '../ops/sum';
import {tile} from '../ops/tile';
import {Tensor} from '../tensor';
import {Rank, ShapeMap} from '../types';
export const fusedBatchNormGradConfig: GradConfig = {
kernelName: FusedBatchNorm,
inputsToSave: ['x', 'mean', 'variance', 'scale'],
gradFunc: <R extends Rank>(
dy: Tensor, saved: Tensor[], attrs: NamedAttrMap) => {
const {varianceEpsilon} = attrs as unknown as FusedBatchNormAttrs;
const [x, mean, variance, scale] = saved;
const scaleValue = scale == null ? scalar(1) : scale;
const reductionAxes = getReductionAxes(mean.shape, x.shape);
const tileShape: number[] = [];
if (mean.rank === 1) {
for (let i = 0; i < x.shape.length - 1; ++i) {
tileShape.push(x.shape[i]);
}
tileShape.push(1);
}
const xMinusMean = sub(x, mean);
const dyTimesScaleValue = mul(dy, scaleValue);
const oneOverSqrtVariance = rsqrt(add(variance, scalar(varianceEpsilon)));
const minusHalfRCube = mul(
mul(mul(oneOverSqrtVariance, oneOverSqrtVariance), oneOverSqrtVariance),
scalar(-0.5));
const derX = () => {
if (mean.rank === 1) {
return reshape(
mul(mul(dy,
tile(
reshape(oneOverSqrtVariance,
[1, 1, 1, 1, mean.shape[0]]),
tileShape)),
scaleValue),
x.shape);
} else {
return reshape(mul(mul(dy, oneOverSqrtVariance), scaleValue), x.shape);
}
};
const derMean = () => {
let meanDer =
mul(mul(oneOverSqrtVariance, scalar(-1)), dyTimesScaleValue);
if (mean.rank === 1) {
meanDer = sum(meanDer, reductionAxes);
}
return reshape(meanDer, mean.shape as ShapeMap[R]);
};
const derVariance = () => {
let varianceDer = mul(mul(minusHalfRCube, xMinusMean), dyTimesScaleValue);
if (mean.rank === 1) {
varianceDer = sum(varianceDer, reductionAxes);
}
return reshape(varianceDer, mean.shape as ShapeMap[R]);
};
const derScale = () => {
const xMinusMean2TimesRsqrt = mul(xMinusMean, oneOverSqrtVariance);
let scaleDer = mul(dy, xMinusMean2TimesRsqrt);
if (mean.rank === 1) {
scaleDer = sum(scaleDer, reductionAxes);
}
return reshape(scaleDer, mean.shape as ShapeMap[R]);
};
const derOffset = () => {
let offsetDer = dy;
if (mean.rank === 1) {
offsetDer = sum(offsetDer, reductionAxes);
}
return reshape(offsetDer, mean.shape as ShapeMap[R]);
};
return {
x: derX,
mean: derMean,
variance: derVariance,
scale: derScale,
offset: derOffset
};
}
};