diff --git a/residual-layers.lua b/residual-layers.lua index 92f0ac1..716d013 100644 --- a/residual-layers.lua +++ b/residual-layers.lua @@ -84,9 +84,9 @@ function addResidualLayer2(input, nChannels, nOutChannels, stride) end -- Add them together - net = cudnn.SpatialBatchNormalization(nOutChannels)(net) net = nn.CAddTable(){net, skip} - net = cudnn.ReLU(true)(net) + net = cudnn.SpatialBatchNormalization(nOutChannels)(net) + --net = cudnn.ReLU(true)(net) -- ^ don't put a ReLU here! see http://gitxiv.com/comments/7rffyqcPLirEEsmpX return net diff --git a/train-cifar.lua b/train-cifar.lua index e114829..d3b026e 100644 --- a/train-cifar.lua +++ b/train-cifar.lua @@ -178,7 +178,9 @@ function forwardBackwardBatch(batch) --]] -- From https://github.com/bgshih/cifar.torch/blob/master/train.lua#L119-L128 - if sgdState.epochCounter < 80 then + if sgdState.nEvalCounter < 400 then + sgdState.learningRate = 0.01 + elseif sgdState.epochCounter < 80 then sgdState.learningRate = 0.1 elseif sgdState.epochCounter < 120 then sgdState.learningRate = 0.01