Skip to content

Commit

Permalink
Simplify function selection (klauspost#206)
Browse files Browse the repository at this point in the history
Eliminate double branching for creating the mask and sending it.
  • Loading branch information
klauspost authored Sep 5, 2022
1 parent c4b4a70 commit 6939176
Showing 1 changed file with 112 additions and 86 deletions.
198 changes: 112 additions & 86 deletions galois_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,58 +144,71 @@ func sliceXor(in, out []byte, o *options) {

// 4-way butterfly
func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
if o.useAVX2 || o.useAVX512 {
if len(work[0]) > 0 {
var mask uint8
if log_m01 == modulus {
mask |= 1 << 0
}
if len(work[0]) == 0 {
return
}

t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
if log_m01 == modulus {
if log_m23 == modulus {
mask |= 1 << 1
}
if log_m02 == modulus {
mask |= 1 << 2
}
t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
switch mask {
case 0:
ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
case 1:
ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
case 2:
ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
case 3:
if log_m02 == modulus {
ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_3(work, dist*24, t01, t23, t02)
case 4:
ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
case 5:
}
} else {
if log_m02 == modulus {
ifftDIT4_avx512_5(work, dist*24, t01, t23, t02)
case 6:
} else {
ifftDIT4_avx512_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx512_6(work, dist*24, t01, t23, t02)
case 7:
ifftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_2(work, dist*24, t01, t23, t02)
}
} else {
switch mask {
case 0:
ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
case 1:
ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
case 2:
ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
case 3:
if log_m02 == modulus {
ifftDIT4_avx512_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx512_0(work, dist*24, t01, t23, t02)
}
}
}
return
} else if o.useAVX2 {
if log_m01 == modulus {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_3(work, dist*24, t01, t23, t02)
case 4:
ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
case 5:
}
} else {
if log_m02 == modulus {
ifftDIT4_avx2_5(work, dist*24, t01, t23, t02)
case 6:
} else {
ifftDIT4_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m23 == modulus {
if log_m02 == modulus {
ifftDIT4_avx2_6(work, dist*24, t01, t23, t02)
case 7:
ifftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m02 == modulus {
ifftDIT4_avx2_4(work, dist*24, t01, t23, t02)
} else {
ifftDIT4_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
Expand All @@ -205,58 +218,71 @@ func ifftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options
}

func fftDIT4(work [][]byte, dist int, log_m01, log_m23, log_m02 ffe, o *options) {
if o.useAVX2 || o.useAVX512 {
if len(work[0]) > 0 {
var mask uint8
if log_m02 == modulus {
mask |= 1 << 0
}
if len(work[0]) == 0 {
return
}

t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
if log_m02 == modulus {
if log_m01 == modulus {
mask |= 1 << 1
}
if log_m23 == modulus {
mask |= 1 << 2
}
t01 := &multiply256LUT[log_m01]
t23 := &multiply256LUT[log_m23]
t02 := &multiply256LUT[log_m02]
if o.useAVX512 {
switch mask {
case 0:
fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
case 1:
fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
case 2:
fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
case 3:
if log_m23 == modulus {
fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_3(work, dist*24, t01, t23, t02)
case 4:
fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
case 5:
}
} else {
if log_m23 == modulus {
fftDIT4_avx512_5(work, dist*24, t01, t23, t02)
case 6:
} else {
fftDIT4_avx512_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx512_6(work, dist*24, t01, t23, t02)
case 7:
fftDIT4_avx512_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_2(work, dist*24, t01, t23, t02)
}
} else {
switch mask {
case 0:
fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
case 1:
fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
case 2:
fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
case 3:
if log_m23 == modulus {
fftDIT4_avx512_4(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx512_0(work, dist*24, t01, t23, t02)
}
}
}
return
} else if o.useAVX2 {
if log_m02 == modulus {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_3(work, dist*24, t01, t23, t02)
case 4:
fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
case 5:
}
} else {
if log_m23 == modulus {
fftDIT4_avx2_5(work, dist*24, t01, t23, t02)
case 6:
} else {
fftDIT4_avx2_1(work, dist*24, t01, t23, t02)
}
}
} else {
if log_m01 == modulus {
if log_m23 == modulus {
fftDIT4_avx2_6(work, dist*24, t01, t23, t02)
case 7:
fftDIT4_avx2_7(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_2(work, dist*24, t01, t23, t02)
}
} else {
if log_m23 == modulus {
fftDIT4_avx2_4(work, dist*24, t01, t23, t02)
} else {
fftDIT4_avx2_0(work, dist*24, t01, t23, t02)
}
}
}
Expand Down

0 comments on commit 6939176

Please sign in to comment.