Skip to content

Commit

Permalink
nn.GPU unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholas-leonard committed Jul 2, 2016
1 parent 0e7f438 commit d642e59
Show file tree
Hide file tree
Showing 2 changed files with 219 additions and 1 deletion.
3 changes: 2 additions & 1 deletion doc/cunnmodules.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
# Additional Modules #

The following nn modules are also made available by the cunn package:
* [DataParallelTable](#nn.DataParallelTable) : A module to parallelize FPROP and BPROP across multiple-GPUs.
* [DataParallelTable](#nn.DataParallelTable) : parallelize calls to `forward` and `backward` across multiple-GPUs.
* [GPU](https://github.com/torch/nn/blob/master/doc/simple.md#nn.GPU) : decorates a module so that it can be executed on a specific GPU device.

<a name="nn.DataParallelTable"/>
## DataParallelTable ##
Expand Down
217 changes: 217 additions & 0 deletions test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5388,6 +5388,223 @@ function cunntest.VolumetricReplicationPadding_backward()
precision_backward, 'error on state (backward) ')
end

function cunntest.GPU()
local ndevice = cutorch.getDeviceCount()
if ndevice < 2 then
return
end
assert(nn.GPU, "Please update nn to latest version")

local originaldevice = cutorch.getDevice()

cutorch.setDevice(1)
local linear = nn.Linear(3,4)
local linear2 = linear:clone():float()
linear.mybuffer = {torch.CudaTensor(3)}

local gpu = nn.GPU(linear, 2, 1)
gpu:cuda()

mytester:assert(linear.mybuffer[1]:getDevice() == 2)
mytester:assert(linear.weight:getDevice() == 2)
mytester:assert(cutorch.getDevice() == originaldevice)

local input = torch.CudaTensor(2,3):uniform(0,1)
local output = gpu:forward(input)

mytester:assert(linear.output:getDevice() == 2)
mytester:assert(output:getDevice() == 1)
mytester:assert(gpu._input:getDevice() == 2)

local gradOutput = torch.CudaTensor(2,4):uniform(0,1)
gpu:zeroGradParameters()
mytester:assert(cutorch.getDevice() == 1)
local gradInput = gpu:backward(input, gradOutput)

mytester:assert(cutorch.getDevice() == 1)
mytester:assert(gpu._gradOutput:getDevice() == 2)
mytester:assert(linear.gradInput:getDevice() == 2)
mytester:assert(gradInput:getDevice() == 1)

mytester:assert(cutorch.getDevice() == 1)
local input2, gradOutput2 = input:float(), gradOutput:float()
local output2 = linear2:forward(input2)
linear2:zeroGradParameters()
local gradInput2 = linear2:backward(input2, gradOutput2)


mytester:assertTensorEq(input2, input:float(), 0.000001)
mytester:assertTensorEq(gradInput2, gradInput:float(), 0.000001)

local params, gradParams = gpu:parameters()
local params2, gradParams2 = linear2:parameters()

for i=1,#params do
mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001)
mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001)
end

-- test serialize/deserialize

local gpustr = torch.serialize(gpu)
mytester:assert(cutorch.getDevice() == 1)
local gpu2 = torch.deserialize(gpustr)
mytester:assert(cutorch.getDevice() == 1)

local output2 = gpu2:forward(input)

mytester:assert(gpu2.modules[1].output:getDevice() == 2)
mytester:assert(output2:getDevice() == 1)
mytester:assert(gpu2._input:getDevice() == 2)

gpu2:zeroGradParameters()
mytester:assert(cutorch.getDevice() == 1)
local gradInput2 = gpu2:backward(input, gradOutput)

mytester:assert(cutorch.getDevice() == 1)
mytester:assert(gpu2._gradOutput:getDevice() == 2)
mytester:assert(gpu2.modules[1].gradInput:getDevice() == 2)
mytester:assert(gradInput2:getDevice() == 1)

mytester:assertTensorEq(input2, input2, 0.000001)
mytester:assertTensorEq(gradInput2, gradInput2, 0.000001)

local params, gradParams = gpu:parameters()
local params2, gradParams2 = gpu2:parameters()

for i=1,#params do
mytester:assert(params2[i]:getDevice() == params[i]:getDevice())
mytester:assert(gradParams2[i]:getDevice() == gradParams[i]:getDevice())
mytester:assertTensorEq(params2[i]:float(), params[i]:float(), 0.000001)
mytester:assertTensorEq(gradParams2[i]:float(), gradParams[i]:float(), 0.000001)
end


-- test table input/output
local lin1, lin2 = nn.Linear(3,4), nn.Linear(3,4)
local para = nn.ParallelTable():add(lin1):add(lin2)
local para2 = para:clone():float()
local gpu = nn.GPU(para, 2, 1)

gpu:cuda()
mytester:assert(lin1.weight:getDevice() == 2)
mytester:assert(lin2.weight:getDevice() == 2)
mytester:assert(cutorch.getDevice() == 1)

local device3 = cutorch.getDeviceCount()
local input = {
torch.CudaTensor(2,3):uniform(0,1),
cutorch.withDevice(device3, function() return torch.CudaTensor(2,3):uniform(0,1) end) -- tests input from multiple devices
}
local output = gpu:forward(input)

mytester:assert(para.output[1]:getDevice() == 2)
mytester:assert(para.output[2]:getDevice() == 2)
mytester:assert(output[1]:getDevice() == 1)
mytester:assert(output[2]:getDevice() == 1)
mytester:assert(gpu._input[1]:getDevice() == 2)
mytester:assert(gpu._input[2]:getDevice() == 2)

local gradOutput = {
torch.CudaTensor(2,4):uniform(0,1),
cutorch.withDevice(device3, function() return torch.CudaTensor(2,4):uniform(0,1) end) -- tests gradOutput from multiple devices
}

gpu:zeroGradParameters()
mytester:assert(cutorch.getDevice() == 1)
local gradInput = gpu:backward(input, gradOutput)

mytester:assert(cutorch.getDevice() == 1)
mytester:assert(gpu._gradOutput[1]:getDevice() == 2)
mytester:assert(gpu._gradOutput[2]:getDevice() == 2)
mytester:assert(para.gradInput[1]:getDevice() == 2)
mytester:assert(para.gradInput[2]:getDevice() == 2)
mytester:assert(gradInput[1]:getDevice() == 1)
mytester:assert(gradInput[2]:getDevice() == device3)

local input2, gradOutput2 = {input[1]:float(), input[2]:float()}, {gradOutput[1]:float(), gradOutput[2]:float()}
local output2 = para2:forward(input2)
para2:zeroGradParameters()
local gradInput2 = para2:backward(input2, gradOutput2)

mytester:assertTensorEq(input2[1], input[1]:float(), 0.000001)
mytester:assertTensorEq(input2[2], input[2]:float(), 0.000001)
mytester:assertTensorEq(gradInput2[1], gradInput[1]:float(), 0.000001)
mytester:assertTensorEq(gradInput2[2], gradInput[2]:float(), 0.000001)

local params, gradParams = gpu:parameters()
local params2, gradParams2 = para2:parameters()

for i=1,#params do
mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001)
mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001)
end

-- test that it handles reduction in input/output size

input[2], gradOutput[2] = nil, nil
para.modules[2] = nil
para.output[2] = nil
para.gradInput[2] = nil

local output = gpu:forward(input)

mytester:assert(#gpu._input == 1)
mytester:assert(#output == 1)

local gradInput = gpu:backward(input, gradOutput)

mytester:assert(#gpu._gradOutput == 1)
mytester:assert(#gradInput == 1)

-- test sequential multi-GPUs

local mlp = nn.Sequential()
for device=1,ndevice do
local outdevice = device == ndevice and 1 or device
mlp:add(nn.GPU(nn.Linear(3,3), device, outdevice))
mytester:assert(cutorch.getDevice() == 1)
end
mlp:cuda()
mytester:assert(cutorch.getDevice() == 1)

local input = torch.CudaTensor(2,3):uniform(0,1)
local gradOutput = torch.CudaTensor(2,3):uniform(0,1)

local output = mlp:forward(input)
mlp:zeroGradParameters()
local gradInput = mlp:backward(input, gradOutput)

-- test CPU only

local params, gradParams = mlp:parameters()

mlp:float()

local input2, gradOutput2 = input:float(), gradOutput:float()

local _cutorch = cutorch
cutorch = nil

local output2 = mlp:forward(input2)
mlp:zeroGradParameters()
local gradInput2 = mlp:backward(input2, gradOutput2)

cutorch = _cutorch

mytester:assertTensorEq(output:float(), output2, 0.000001)
mytester:assertTensorEq(gradInput:float(), gradInput2, 0.000001)

local params2, gradParams2 = mlp:parameters()

for i=1,#params do
mytester:assertTensorEq(params[i]:float(), params2[i], 0.000001)
mytester:assertTensorEq(gradParams[i]:float(), gradParams2[i], 0.000001)
end

cutorch.setDevice(originaldevice)
end

local function setUp()
cutorch.setDevice(1)
end
Expand Down

0 comments on commit d642e59

Please sign in to comment.