forked from miking-lang/miking
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatmul.mc
78 lines (71 loc) · 2.77 KB
/
matmul.mc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
-- Runs a naive implementation of matrix multiplication using both the default
-- OCaml generation and the accelerated generation, to compare their
-- performance and also to ensure they get the same results.
--
-- We use matrices of integers because the addition of floating-point values is
-- technically not an associative operation, so the compiler cannot parallelize
-- folds over such operations. This results in significant improvements over
-- the default version.
include "common.mc"
include "string.mc"
let transposeSq : [[Int]] -> [[Int]] = lam m.
-- We use this utest to express that m is a square matrix.
utest length m with length (head m) in
let n = length m in
let g : Int -> Int -> Int = lam i. lam j.
let row : [Int] = get m j in
let cell : Int = get row i in
cell in
let f : Int -> [Int] = lam i. create n (g i) in
let t : [[Int]] = create n f in
t
let addProd : [Int] -> [Int] -> Int = lam row. lam col.
recursive let work : [Int] -> [Int] -> [Int] = lam row. lam col.
if null row then (let t : [Int] = [] in t)
else if null col then (let t : [Int] = [] in t)
else
let rh : Int = head row in
let ch : Int = head col in
let rt : [Int] = tail row in
let ct : [Int] = tail col in
cons (muli rh ch) (work rt ct)
in
foldl addi 0 (work row col)
let matMulSq : [[Int]] -> [[Int]] -> [[Int]] = lam a. lam b.
-- We use the below utests to express size equality constraints on the
-- dimensions of a and b.
utest length a with length b in
utest length a with length (head a) in
utest length a with length (head b) in
let b = transposeSq b in
map
(lam aRow : [Int].
let row : [Int] = map (addProd aRow) b in
row)
a
let matSumSq : [[Int]] -> Int = lam m.
-- We use an explicit type annotation here to eliminate the type variable
-- result of map (needed because type annot does not eliminate these).
let t : [Int] = map (foldl addi 0) m in
foldl addi 0 t
mexpr
-- If we increase this value, the accelerated code will become faster relative
-- to the pure OCaml code.
let n = 128 in
let a : [[Int]] = create n (lam. create n (lam. randIntU 0 10)) in
let b : [[Int]] = create n (lam. create n (lam. randIntU 0 10)) in
let t0 = wallTimeMs () in
let c1 : [[Int]] = matMulSq a b in
let cpu : Int = matSumSq c1 in
let t1 = wallTimeMs () in
let c2 : [[Int]] = accelerate (matMulSq a b) in
let gpu : Int = accelerate (matSumSq c2) in
let t2 = wallTimeMs () in
printLn (join ["OCaml time: ", float2string (divf (subf t1 t0) 1000.0)]);
printLn (join ["Accelerated time: ", float2string (divf (subf t2 t1) 1000.0)]);
if eqi cpu gpu then
()
else
printLn "OCaml and accelerated code got different results";
printLn (join ["OCaml result: ", int2string cpu]);
printLn (join ["Accelerated result: ", int2string gpu])