From d642e595b5257cb06de5792a221701e149b5f619 Mon Sep 17 00:00:00 2001 From: nicholas-leonard Date: Sat, 2 Jul 2016 16:30:58 -0400 Subject: [PATCH] nn.GPU unit test --- doc/cunnmodules.md | 3 +- test.lua | 217 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 1 deletion(-) diff --git a/doc/cunnmodules.md b/doc/cunnmodules.md index 05ec1b82..de8cbbe4 100644 --- a/doc/cunnmodules.md +++ b/doc/cunnmodules.md @@ -2,7 +2,8 @@ # Additional Modules # The following nn modules are also made available by the cunn package: - * [DataParallelTable](#nn.DataParallelTable) : A module to parallelize FPROP and BPROP across multiple-GPUs. + * [DataParallelTable](#nn.DataParallelTable) : parallelize calls to `forward` and `backward` across multiple-GPUs. + * [GPU](https://github.com/torch/nn/blob/master/doc/simple.md#nn.GPU) : decorates a module so that it can be executed on a specific GPU device. ## DataParallelTable ## diff --git a/test.lua b/test.lua index 49c9ce3a..94a70073 100644 --- a/test.lua +++ b/test.lua @@ -5388,6 +5388,223 @@ function cunntest.VolumetricReplicationPadding_backward() precision_backward, 'error on state (backward) ') end +function cunntest.GPU() + local ndevice = cutorch.getDeviceCount() + if ndevice < 2 then + return + end + assert(nn.GPU, "Please update nn to latest version") + + local originaldevice = cutorch.getDevice() + + cutorch.setDevice(1) + local linear = nn.Linear(3,4) + local linear2 = linear:clone():float() + linear.mybuffer = {torch.CudaTensor(3)} + + local gpu = nn.GPU(linear, 2, 1) + gpu:cuda() + + mytester:assert(linear.mybuffer[1]:getDevice() == 2) + mytester:assert(linear.weight:getDevice() == 2) + mytester:assert(cutorch.getDevice() == originaldevice) + + local input = torch.CudaTensor(2,3):uniform(0,1) + local output = gpu:forward(input) + + mytester:assert(linear.output:getDevice() == 2) + mytester:assert(output:getDevice() == 1) + mytester:assert(gpu._input:getDevice() == 2) + + local gradOutput = torch.CudaTensor(2,4):uniform(0,1) + gpu:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput = gpu:backward(input, gradOutput) + + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu._gradOutput:getDevice() == 2) + mytester:assert(linear.gradInput:getDevice() == 2) + mytester:assert(gradInput:getDevice() == 1) + + mytester:assert(cutorch.getDevice() == 1) + local input2, gradOutput2 = input:float(), gradOutput:float() + local output2 = linear2:forward(input2) + linear2:zeroGradParameters() + local gradInput2 = linear2:backward(input2, gradOutput2) + + + mytester:assertTensorEq(input2, input:float(), 0.000001) + mytester:assertTensorEq(gradInput2, gradInput:float(), 0.000001) + + local params, gradParams = gpu:parameters() + local params2, gradParams2 = linear2:parameters() + + for i=1,#params do + mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001) + mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001) + end + + -- test serialize/deserialize + + local gpustr = torch.serialize(gpu) + mytester:assert(cutorch.getDevice() == 1) + local gpu2 = torch.deserialize(gpustr) + mytester:assert(cutorch.getDevice() == 1) + + local output2 = gpu2:forward(input) + + mytester:assert(gpu2.modules[1].output:getDevice() == 2) + mytester:assert(output2:getDevice() == 1) + mytester:assert(gpu2._input:getDevice() == 2) + + gpu2:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput2 = gpu2:backward(input, gradOutput) + + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu2._gradOutput:getDevice() == 2) + mytester:assert(gpu2.modules[1].gradInput:getDevice() == 2) + mytester:assert(gradInput2:getDevice() == 1) + + mytester:assertTensorEq(input2, input2, 0.000001) + mytester:assertTensorEq(gradInput2, gradInput2, 0.000001) + + local params, gradParams = gpu:parameters() + local params2, gradParams2 = gpu2:parameters() + + for i=1,#params do + mytester:assert(params2[i]:getDevice() == params[i]:getDevice()) + mytester:assert(gradParams2[i]:getDevice() == gradParams[i]:getDevice()) + mytester:assertTensorEq(params2[i]:float(), params[i]:float(), 0.000001) + mytester:assertTensorEq(gradParams2[i]:float(), gradParams[i]:float(), 0.000001) + end + + + -- test table input/output + local lin1, lin2 = nn.Linear(3,4), nn.Linear(3,4) + local para = nn.ParallelTable():add(lin1):add(lin2) + local para2 = para:clone():float() + local gpu = nn.GPU(para, 2, 1) + + gpu:cuda() + mytester:assert(lin1.weight:getDevice() == 2) + mytester:assert(lin2.weight:getDevice() == 2) + mytester:assert(cutorch.getDevice() == 1) + + local device3 = cutorch.getDeviceCount() + local input = { + torch.CudaTensor(2,3):uniform(0,1), + cutorch.withDevice(device3, function() return torch.CudaTensor(2,3):uniform(0,1) end) -- tests input from multiple devices + } + local output = gpu:forward(input) + + mytester:assert(para.output[1]:getDevice() == 2) + mytester:assert(para.output[2]:getDevice() == 2) + mytester:assert(output[1]:getDevice() == 1) + mytester:assert(output[2]:getDevice() == 1) + mytester:assert(gpu._input[1]:getDevice() == 2) + mytester:assert(gpu._input[2]:getDevice() == 2) + + local gradOutput = { + torch.CudaTensor(2,4):uniform(0,1), + cutorch.withDevice(device3, function() return torch.CudaTensor(2,4):uniform(0,1) end) -- tests gradOutput from multiple devices + } + + gpu:zeroGradParameters() + mytester:assert(cutorch.getDevice() == 1) + local gradInput = gpu:backward(input, gradOutput) + + mytester:assert(cutorch.getDevice() == 1) + mytester:assert(gpu._gradOutput[1]:getDevice() == 2) + mytester:assert(gpu._gradOutput[2]:getDevice() == 2) + mytester:assert(para.gradInput[1]:getDevice() == 2) + mytester:assert(para.gradInput[2]:getDevice() == 2) + mytester:assert(gradInput[1]:getDevice() == 1) + mytester:assert(gradInput[2]:getDevice() == device3) + + local input2, gradOutput2 = {input[1]:float(), input[2]:float()}, {gradOutput[1]:float(), gradOutput[2]:float()} + local output2 = para2:forward(input2) + para2:zeroGradParameters() + local gradInput2 = para2:backward(input2, gradOutput2) + + mytester:assertTensorEq(input2[1], input[1]:float(), 0.000001) + mytester:assertTensorEq(input2[2], input[2]:float(), 0.000001) + mytester:assertTensorEq(gradInput2[1], gradInput[1]:float(), 0.000001) + mytester:assertTensorEq(gradInput2[2], gradInput[2]:float(), 0.000001) + + local params, gradParams = gpu:parameters() + local params2, gradParams2 = para2:parameters() + + for i=1,#params do + mytester:assertTensorEq(params2[i], params[i]:float(), 0.000001) + mytester:assertTensorEq(gradParams2[i], gradParams[i]:float(), 0.000001) + end + + -- test that it handles reduction in input/output size + + input[2], gradOutput[2] = nil, nil + para.modules[2] = nil + para.output[2] = nil + para.gradInput[2] = nil + + local output = gpu:forward(input) + + mytester:assert(#gpu._input == 1) + mytester:assert(#output == 1) + + local gradInput = gpu:backward(input, gradOutput) + + mytester:assert(#gpu._gradOutput == 1) + mytester:assert(#gradInput == 1) + + -- test sequential multi-GPUs + + local mlp = nn.Sequential() + for device=1,ndevice do + local outdevice = device == ndevice and 1 or device + mlp:add(nn.GPU(nn.Linear(3,3), device, outdevice)) + mytester:assert(cutorch.getDevice() == 1) + end + mlp:cuda() + mytester:assert(cutorch.getDevice() == 1) + + local input = torch.CudaTensor(2,3):uniform(0,1) + local gradOutput = torch.CudaTensor(2,3):uniform(0,1) + + local output = mlp:forward(input) + mlp:zeroGradParameters() + local gradInput = mlp:backward(input, gradOutput) + + -- test CPU only + + local params, gradParams = mlp:parameters() + + mlp:float() + + local input2, gradOutput2 = input:float(), gradOutput:float() + + local _cutorch = cutorch + cutorch = nil + + local output2 = mlp:forward(input2) + mlp:zeroGradParameters() + local gradInput2 = mlp:backward(input2, gradOutput2) + + cutorch = _cutorch + + mytester:assertTensorEq(output:float(), output2, 0.000001) + mytester:assertTensorEq(gradInput:float(), gradInput2, 0.000001) + + local params2, gradParams2 = mlp:parameters() + + for i=1,#params do + mytester:assertTensorEq(params[i]:float(), params2[i], 0.000001) + mytester:assertTensorEq(gradParams[i]:float(), gradParams2[i], 0.000001) + end + + cutorch.setDevice(originaldevice) +end + local function setUp() cutorch.setDevice(1) end